/usr/lib/python2.7/dist-packages/xapers/sources/cryptoeprint.py is in xapers 0.7.1-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 | import urllib
from HTMLParser import HTMLParser
description = "Cryptology ePrint Archive"
url = "https://eprint.iacr.org/"
url_format = 'https://eprint.iacr.org/%s'
url_regex = 'https?://eprint.iacr.org/(\d{4,}/\d{3,})'
# don't know what a scan_regex looks like for IACR eprints. i don't
# think there is one, because i think the submission process happens
# after the pdf is formalized.
# custom definitions for IACR eprints:
bibtex_url = 'https://eprint.iacr.org/eprint-bin/cite.pl?entry=%s'
pdf_url = 'https://eprint.iacr.org/%s.pdf'
# html parser override to override handler methods
class IACRParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.pre = False
self.data = None
def handle_starttag(self, tag, attrs):
if (tag == 'pre'):
self.pre = True
def handle_endtag(self, tag):
if (tag == 'pre'):
self.pre = False
def handle_data(self, data):
if (self.pre):
self.data = data
def fetch_bibtex(id):
url = bibtex_url % id
# this would be tons easier in python3
f = urllib.urlopen(url)
html = f.read()
ct = filter(lambda x: x.startswith('charset='), map(str.strip, f.headers.typeheader.split(';')))
if ct:
charset = ct[-1].split('=')[1]
else:
charset = 'iso8859-1'
f.close()
p = IACRParser()
p.feed(html)
try:
ret = unicode(p.data, charset)
except LookupError, e:
# if they send some super mangled charset we can try again
# with the default:
ret = unicode(p.data, 'iso8859-1')
return ret
def fetch_file(id):
url = pdf_url % id
f = urllib.urlopen(url)
pdf = f.read()
f.close()
return (id.split('/').pop() + '.pdf', pdf)
|