|
@@ -29,12 +29,14 @@ |
|
|
|
|
|
from __future__ import print_function |
|
|
|
|
|
import os |
|
|
from os import write |
|
|
from os.path import splitext |
|
|
|
|
|
import re |
|
|
import unicodedata |
|
|
|
|
|
from six import u |
|
|
from six import ensure_text, u |
|
|
|
|
|
from .api import ( |
|
|
entity_to_authors, entity_to_classes, entity_to_doi, |
|
@@ -63,14 +65,14 @@ |
|
|
} |
|
|
|
|
|
STRING_TO_TEX_URL = { |
|
|
'{': r'\{', |
|
|
'}': r'\}', |
|
|
'#': r'\#', |
|
|
'&': r'\&', |
|
|
'^': r'\^{}', |
|
|
'%': r'\%', |
|
|
'$': r'\$', |
|
|
'_': r'\_', |
|
|
'{': u(r'\{'), |
|
|
'}': u(r'\}'), |
|
|
'#': u(r'\#'), |
|
|
'&': u(r'\&'), |
|
|
'^': u(r'\^{}'), |
|
|
'%': u(r'\%'), |
|
|
'$': u(r'\$'), |
|
|
'_': u(r'\_'), |
|
|
} |
|
|
|
|
|
COMBINING_DIACRITIC_TO_TEX = { |
|
@@ -92,15 +94,15 @@ |
|
|
} |
|
|
|
|
|
STRING_TO_TEX_PATTERN = re.compile( |
|
|
u'|'.join(re.escape(key) for key in STRING_TO_TEX), |
|
|
u('|').join(re.escape(key) for key in STRING_TO_TEX), |
|
|
flags=re.UNICODE) |
|
|
|
|
|
STRING_TO_TEX_URL_PATTERN = re.compile( |
|
|
u'|'.join(re.escape(key) for key in STRING_TO_TEX_URL), |
|
|
u('|').join(re.escape(key) for key in STRING_TO_TEX_URL), |
|
|
flags=re.UNICODE) |
|
|
|
|
|
COMBINING_DIACRITIC_TO_TEX_PATTERN = re.compile( |
|
|
u'(.)({})'.format( |
|
|
u('(.)({})').format( |
|
|
u'|'.join(re.escape(key)for key in COMBINING_DIACRITIC_TO_TEX)), |
|
|
flags=re.UNICODE) |
|
|
|
|
@@ -137,7 +139,10 @@ def escape_to_tex(string, escape_type='normal'): |
|
|
|
|
|
""" |
|
|
if string is None: |
|
|
return '' |
|
|
return u('') |
|
|
|
|
|
string = ensure_text(string) |
|
|
|
|
|
if escape_type == 'normal': |
|
|
escaped_string = STRING_TO_TEX_PATTERN.sub( |
|
|
lambda match: STRING_TO_TEX[match.group()], string) |
|
@@ -149,10 +154,10 @@ def escape_to_tex(string, escape_type='normal'): |
|
|
escape_type)) |
|
|
|
|
|
escaped_string = COMBINING_DIACRITIC_TO_TEX_PATTERN.sub( |
|
|
lambda match: '{{{} {}}}'.format( |
|
|
lambda match: u('{{{} {}}}').format( |
|
|
COMBINING_DIACRITIC_TO_TEX[match.group(2)], |
|
|
match.group(1)), |
|
|
unicodedata.normalize('NFD', u(escaped_string))) |
|
|
unicodedata.normalize('NFD', escaped_string)) |
|
|
return escaped_string |
|
|
|
|
|
|
|
@@ -279,15 +284,15 @@ def authors_to_bibtex_authors(authors): |
|
|
Returns |
|
|
------- |
|
|
entry : str |
|
|
Bibtex entry. |
|
|
Bibtex entry in Unicode string. |
|
|
|
|
|
""" |
|
|
bibtex_authors = [] |
|
|
for n, (author, humanness) in enumerate(authors): |
|
|
if humanness: |
|
|
bibtex_authors.append(escape_to_tex(author)) |
|
|
else: |
|
|
bibtex_authors.append('{' + escape_to_tex(author) + '}') |
|
|
bibtex_authors.append(u('{') + escape_to_tex(author) + '}') |
|
|
return bibtex_authors |
|
|
|
|
|
|
|
@@ -299,18 +304,18 @@ def entity_to_bibtex_entry(entity, key=None): |
|
|
entity : dict |
|
|
Wikidata entity as hierarchical structure. |
|
|
key : str |
|
|
Bibtex key |
|
|
Bibtex key. |
|
|
|
|
|
Returns |
|
|
------- |
|
|
entry : str |
|
|
Bibtex entry. |
|
|
Bibtex entry in Unicode string. |
|
|
|
|
|
""" |
|
|
if key is None: |
|
|
entry = "@Article{%s,\n" % entity['id'] |
|
|
entry = u("@Article{%s,\n") % entity['id'] |
|
|
else: |
|
|
entry = "@Article{%s,\n" % escape_to_tex(key) |
|
|
entry = u("@Article{%s,\n") % escape_to_tex(key) |
|
|
authors = authors_to_bibtex_authors( |
|
|
entity_to_authors(entity, return_humanness=True)) |
|
|
entry += " author = {%s},\n" % u" and ".join(authors) |
|
@@ -393,14 +398,16 @@ def main(): |
|
|
|
|
|
entities = wb_get_entities(qs) |
|
|
|
|
|
bib = "" |
|
|
bib = u("") |
|
|
for q, key in zip(qs, keys): |
|
|
entity = entities[q] |
|
|
bib += entity_to_bibtex_entry(entity, key=key) |
|
|
bib += '\n' |
|
|
|
|
|
with open(bib_filename, 'w') as f: |
|
|
f.write(bib.encode('utf-8')) |
|
|
# Write BibTeX-formatted string to file |
|
|
output_file = os.open(bib_filename, os.O_RDWR | os.O_CREAT) |
|
|
output_encoding = "utf-8" |
|
|
write(output_file, bib.encode(output_encoding)) |
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
0 comments on commit
44dc7b8