|
@@ -227,7 +227,7 @@ def get_field_value(data, preferences, base_fieldname): |
|
|
|
|
|
|
|
return json.loads(data)[me_fieldname] |
|
|
|
|
|
|
|
class MementoData2: |
|
|
|
class MementoData: |
|
|
|
|
|
|
|
def __init__(self, template_string, mementoembed_api): |
|
|
|
self.mementoembed_api = mementoembed_api |
|
@@ -461,242 +461,3 @@ def get_memento_data(self, urim, session=None): |
|
|
|
return self._mementodata[urim] |
|
|
|
|
|
|
|
|
|
|
|
class MementoData: |
|
|
|
|
|
|
|
def __init__(self, template, mementoembed_api): |
|
|
|
self.template = template |
|
|
|
|
|
|
|
if mementoembed_api.endswith('/'): |
|
|
|
self.mementoembed_api = mementoembed_api[:-1] |
|
|
|
else: |
|
|
|
self.mementoembed_api = mementoembed_api |
|
|
|
|
|
|
|
self.fields_and_preferences = self._get_field_names_and_preferences() |
|
|
|
module_logger.info("fields and preferences {}".format(self.fields_and_preferences)) |
|
|
|
self.endpoint_list = self._get_endpoint_list() |
|
|
|
|
|
|
|
# TODO: consider other backends than RAM |
|
|
|
self.data = {} |
|
|
|
self.urimlist = [] |
|
|
|
|
|
|
|
def _get_field_names_and_preferences(self): |
|
|
|
|
|
|
|
fields_and_preferences = [] |
|
|
|
|
|
|
|
template_surrogate_fields = get_template_surrogate_fields( |
|
|
|
self.template |
|
|
|
) |
|
|
|
|
|
|
|
for field in template_surrogate_fields: |
|
|
|
|
|
|
|
module_logger.info("examining template field {} for preferences...".format(field)) |
|
|
|
|
|
|
|
if '|prefer ' in field: |
|
|
|
fieldname, preference = [i.strip() for i in field.split('|prefer ')] |
|
|
|
preference = preference.replace(' }}', '') |
|
|
|
|
|
|
|
else: |
|
|
|
fieldname = field |
|
|
|
preference = None |
|
|
|
|
|
|
|
fieldname = fieldname.replace('{{ element.surrogate.', '') |
|
|
|
fieldname = fieldname.replace(' }}', '') |
|
|
|
|
|
|
|
fields_and_preferences.append( |
|
|
|
(fieldname, preference) |
|
|
|
) |
|
|
|
|
|
|
|
return fields_and_preferences |
|
|
|
|
|
|
|
def _get_endpoint_list(self): |
|
|
|
|
|
|
|
endpoints = {} |
|
|
|
|
|
|
|
for fieldname,pref in self.fields_and_preferences: |
|
|
|
|
|
|
|
if fieldname not in ['urim', 'creation_time', 'memento_datetime_14num']: |
|
|
|
|
|
|
|
endpoint = self.mementoembed_api + fieldname_to_endpoint[fieldname] |
|
|
|
|
|
|
|
endpoints.setdefault(endpoint, []) |
|
|
|
|
|
|
|
if pref is not None: |
|
|
|
if pref not in endpoints[endpoint]: |
|
|
|
endpoints[endpoint].append(pref) |
|
|
|
|
|
|
|
return endpoints |
|
|
|
|
|
|
|
def add(self, urim): |
|
|
|
self.urimlist.append(urim) |
|
|
|
|
|
|
|
def fetch_all_memento_data(self, session=None): |
|
|
|
|
|
|
|
if session is not None: |
|
|
|
fs = FuturesSession(session=session) |
|
|
|
else: |
|
|
|
fs = FuturesSession() |
|
|
|
|
|
|
|
service_uri_futures = {} |
|
|
|
service_uri_to_endpoint = {} |
|
|
|
|
|
|
|
rt_preferences = {} |
|
|
|
|
|
|
|
for urim in self.urimlist: |
|
|
|
|
|
|
|
module_logger.debug("working on URI-M {}".format(urim)) |
|
|
|
|
|
|
|
for endpoint in self.endpoint_list: |
|
|
|
|
|
|
|
headers = {} |
|
|
|
|
|
|
|
service_uri = endpoint + urim |
|
|
|
|
|
|
|
if len(self.endpoint_list[endpoint]) > 0: |
|
|
|
|
|
|
|
me_preferences = [] |
|
|
|
|
|
|
|
for pref in self.endpoint_list[endpoint]: |
|
|
|
|
|
|
|
module_logger.info("examining preference {}".format(pref)) |
|
|
|
|
|
|
|
for singlepref in pref.split(','): |
|
|
|
prefname, value = singlepref.split('=') |
|
|
|
|
|
|
|
if prefname in raintale_specific_preferences: |
|
|
|
rt_preferences.setdefault(service_uri, []).append(singlepref) |
|
|
|
else: |
|
|
|
me_preferences.append(pref) |
|
|
|
|
|
|
|
headers['Prefer'] = ','.join(me_preferences) |
|
|
|
|
|
|
|
module_logger.debug("issuing request for service URI {}".format(service_uri)) |
|
|
|
|
|
|
|
service_uri_futures.setdefault(urim, {}) |
|
|
|
service_uri_to_endpoint[service_uri] = endpoint.replace(self.mementoembed_api, '') |
|
|
|
service_uri_futures[urim][service_uri] = \ |
|
|
|
fs.get(service_uri, headers=headers) |
|
|
|
|
|
|
|
all_memento_data = {} |
|
|
|
|
|
|
|
module_logger.info("rt_preferences are {}".format(rt_preferences)) |
|
|
|
|
|
|
|
def urim_generator(working_list): |
|
|
|
|
|
|
|
while len(working_list) > 0: |
|
|
|
choice = random.choice(working_list) |
|
|
|
yield choice |
|
|
|
|
|
|
|
working_service_uri_list = [] |
|
|
|
for urim in service_uri_futures: |
|
|
|
for working_service_uri in service_uri_futures[urim]: |
|
|
|
working_service_uri_list.append((urim, working_service_uri)) |
|
|
|
|
|
|
|
module_logger.debug("extracting data from futures: {}".format(service_uri_futures)) |
|
|
|
|
|
|
|
module_logger.info("extracting data from all services for all URI-Ms...") |
|
|
|
|
|
|
|
for urim,service_uri in urim_generator(working_service_uri_list): |
|
|
|
|
|
|
|
if service_uri_futures[urim][service_uri].done(): |
|
|
|
|
|
|
|
module_logger.info("service URI {} is ready".format(service_uri)) |
|
|
|
|
|
|
|
result = service_uri_futures[urim][service_uri].result() |
|
|
|
all_memento_data.setdefault(urim, {}) |
|
|
|
|
|
|
|
endpoint_uri = service_uri_to_endpoint[service_uri] |
|
|
|
|
|
|
|
module_logger.info("corresponding endpoint uri is {}".format(endpoint_uri)) |
|
|
|
|
|
|
|
if endpoint_uri == '/services/product/thumbnail/': |
|
|
|
|
|
|
|
module_logger.info("result: {}".format(result)) |
|
|
|
module_logger.info("content-length: {}".format(len(result.content))) |
|
|
|
|
|
|
|
all_memento_data[urim]['thumbnail'] = png_to_datauri(result.content) |
|
|
|
|
|
|
|
elif endpoint_uri == '/services/memento/imagedata/': |
|
|
|
|
|
|
|
try: |
|
|
|
jdata = result.json() |
|
|
|
except json.decoder.JSONDecodeError as e: |
|
|
|
module_logger.exception("Failed to process imagedata output from MementoEmbed endpoint for call to {}, quitting...".format(service_uri)) |
|
|
|
raise e |
|
|
|
|
|
|
|
for rt_pref in rt_preferences[service_uri]: |
|
|
|
|
|
|
|
if 'rank=' in rt_pref: |
|
|
|
|
|
|
|
var, rank = rt_pref.split('=') |
|
|
|
irank = int(rank) - 1 |
|
|
|
|
|
|
|
all_memento_data[urim][ "image_rank__{}".format(rank) ] = jdata["ranked images"][irank] |
|
|
|
|
|
|
|
else: |
|
|
|
|
|
|
|
try: |
|
|
|
jdata = result.json() |
|
|
|
except json.decoder.JSONDecodeError as e: |
|
|
|
module_logger.exception("Failed to process general output from MementoEmbed endpoint for call to {}, quitting...".format(service_uri)) |
|
|
|
raise e |
|
|
|
|
|
|
|
for key in jdata: |
|
|
|
all_memento_data[urim][ key.replace('-', '_') ] = jdata[key] |
|
|
|
|
|
|
|
working_service_uri_list.remove((urim,service_uri)) |
|
|
|
|
|
|
|
module_logger.info("all memento data: {}".format(all_memento_data)) |
|
|
|
|
|
|
|
module_logger.info("done extracting data from all services for all URI-Ms.") |
|
|
|
|
|
|
|
self.data = all_memento_data |
|
|
|
|
|
|
|
def get_memento_data(self, urim, session=None): |
|
|
|
|
|
|
|
if urim not in self.urimlist: |
|
|
|
self.add(urim) |
|
|
|
|
|
|
|
if urim not in self.data: |
|
|
|
self.fetch_all_memento_data(session=session) |
|
|
|
|
|
|
|
return self.data[urim] |
|
|
|
|
|
|
|
def get_sanitized_template(self): |
|
|
|
|
|
|
|
template_surrogate_fields = get_template_surrogate_fields( |
|
|
|
self.template |
|
|
|
) |
|
|
|
|
|
|
|
replacement_list = [] |
|
|
|
|
|
|
|
for field in template_surrogate_fields: |
|
|
|
|
|
|
|
if "|prefer " in field: |
|
|
|
fielddata = [i.strip() for i in field.split('|prefer ')] |
|
|
|
|
|
|
|
for preference in fielddata[1].split(','): |
|
|
|
|
|
|
|
preference = preference.replace(' }}', '') |
|
|
|
|
|
|
|
module_logger.info("looking at preference {}".format(preference)) |
|
|
|
|
|
|
|
if 'rank=' in preference: |
|
|
|
|
|
|
|
var, rank = preference.split('=') |
|
|
|
|
|
|
|
fieldname = fielddata[0] + "_rank__" + rank + ' }}' |
|
|
|
else: |
|
|
|
fieldname = fielddata[0] + " }}" |
|
|
|
|
|
|
|
replacement_list.append( (field, fieldname) ) |
|
|
|
|
|
|
|
sanitized_template = self.template |
|
|
|
|
|
|
|
module_logger.info("replacement list: {}".format(replacement_list)) |
|
|
|
|
|
|
|
for replacement in replacement_list: |
|
|
|
sanitized_template = sanitized_template.replace(replacement[0], replacement[1]) |
|
|
|
|
|
|
|
return sanitized_template |
|
|
|
|
|
|
|
|
0 comments on commit
c54b9b1