/usr/lib/python2.7/dist-packages/xapers/parser.py is in xapers 0.7.1-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | """
This file is part of xapers.
Xapers is free software: you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation, either version 3 of the License, or (at your
option) any later version.
Xapers is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with xapers. If not, see <http://www.gnu.org/licenses/>.
Copyright 2012-2017
Jameson Rollins <jrollins@finestructure.net>
"""
import os
##################################################
class ParseError(Exception):
"""Base class for Xapers parser exceptions."""
def __init__(self, msg):
self.msg = msg
def __str__(self):
return self.msg
##################################################
class ParserBase():
"""Base class for Xapers document parsering."""
def __init__(self, path):
self.path = os.path.expanduser(path)
def extract(self):
pass
##################################################
def parse_data(data):
# FIXME: determine mime type
mimetype = 'pdf'
from xapers.parsers.pdf import extract
try:
text = extract(data)
except Exception, e:
raise ParseError("Could not parse file: %s" % e)
return text
def parse_file(path):
# FIXME: determine mime type
mimetype = 'pdf'
try:
mod = __import__('xapers.parsers.' + mimetype, fromlist=['Parser'])
pmod = getattr(mod, 'Parser')
except ImportError:
raise ParseError("Unknown parser '%s'." % mimetype)
if not os.path.exists(path):
raise ParseError("File '%s' not found." % path)
if not os.path.isfile(path):
raise ParseError("File '%s' is not a regular file." % path)
try:
text = pmod(path).extract()
except Exception, e:
raise ParseError("Could not parse file: %s" % e)
return text
|