Skip to content
Permalink
Browse files

I have removed pipenv from the setup process because it is failing in…

… the Docker build
  • Loading branch information...
shawnmjones committed May 4, 2019
1 parent 9ca7361 commit 4fc1bb94f6b1b3a681a0093c4366fef439879c50
Showing with 199 additions and 5 deletions.
  1. +2 −0 .dockerignore
  2. +4 −3 Dockerfile
  3. +2 −2 mementoembed/mementoresource.py
  4. +153 −0 mementoembed/seedresource.py
  5. +36 −0 mementoembed/services/memento.py
  6. +2 −0 setup.py
@@ -1,5 +1,7 @@
.git
.gitignore
Pipfile
Pipfile.lock
README.md
Dockerfile
LICENSE
@@ -23,9 +23,10 @@ RUN npm install
RUN npm install puppeteer

# for Python environment dependencies
RUN pip install pipenv
COPY Pipfile.lock Pipfile /app/
RUN pipenv install --system
# RUN pip install pipenv
# COPY Pipfile.lock Pipfile /app/
# COPY Pipfile /app/
# RUN pipenv install --system

# installing the MementoEmbed application
COPY . /app
@@ -110,7 +110,7 @@ def get_timegate_from_response(response):
urig = response.links['timegate']['url']
except KeyError as e:
raise NotAMementoError(
"link header coult not be parsed for timegate URI",
"link header could not be parsed for timegate URI",
response=response, original_exception=e)

return urig
@@ -125,7 +125,7 @@ def get_original_uri_from_response(response):
urir = response.links['original']['url']
except KeyError as e:
raise NotAMementoError(
"link header coult not be parsed for original URI",
"link header could not be parsed for original URI",
response=response, original_exception=e)
except aiu.timemap.MalformedLinkFormatTimeMap as e:
module_logger.exception("Failed to process link header for URI-R, link header: {}".format(response.headers['link']))
@@ -0,0 +1,153 @@
import re
import logging

import aiu
import requests

from requests.exceptions import Timeout, TooManyRedirects, \
ChunkedEncodingError, ContentDecodingError, StreamConsumedError, \
URLRequired, MissingSchema, InvalidSchema, InvalidURL, \
UnrewindableBodyError, ConnectionError, SSLError

from .archiveresource import archive_collection_patterns
from .mementoresource import get_original_uri_from_response, \
get_memento_datetime_from_response, get_memento, \
NotAMementoError

module_logger = logging.getLogger('mementoembed.seedresource')

def get_timemap_from_response(response):

urit = None

try:
urit = response.links['timemap']['url']
except KeyError as e:
raise NotAMementoError(
"link header could not be parsed for TimeMap URI",
response=response, original_exception=e
)

return urit

class SeedResourceError(Exception):

user_facing_error = "This is a problem processing the seed for this memento."

def __init__(self, message, original_exception=None):
self.message = message
self.original_exception = original_exception

class InvalidTimeMapURI(SeedResourceError):

user_facing_error = "The URI of the memento list (TimeMap) for this memento is invalid."

class TimeMapTimeoutError(SeedResourceError):

user_facing_error = "Could not download a memento list (TimeMap) for this memento."

class TimeMapSSLError(SeedResourceError):

user_facing_error = "There was a problem processing the certificate for the TimeMap for this memento."

class TimeMapConnectionFailure(SeedResourceError):

user_facing_error = "Could not download a memento list (TimeMap) for this memento."


class SeedResource:

def __init__(self, memento, httpcache):

self.httpcache = httpcache
self.memento = memento
self.logger = logging.getLogger('mementoembed.seedresource.SeedResource')

collection_id = None

for pattern in archive_collection_patterns:

self.logger.debug("attempting to match pattern {}".format(pattern))
m = re.match(pattern, self.memento.urim)

if m:
self.logger.debug("matched pattern {}".format(m.group(1)))
collection_id = m.group(1)
break

if collection_id is not None:

self.aic = aiu.ArchiveItCollection(
collection_id=collection_id,
session=httpcache,
logger=self.logger
)

else:
self.aic = None

response = get_memento(httpcache.get, memento.urim)
self.urit = get_timemap_from_response(response)
self.urir = get_original_uri_from_response(response)

def fetch_timemap(self):

try:
# get URI-T
r = self.httpcache.get(self.urit)

self.timemap = aiu.convert_LinkTimeMap_to_dict(r.text)

# process and store TimeMap

except (URLRequired, MissingSchema, InvalidSchema, InvalidURL) as e:
raise InvalidTimeMapURI("", original_exception=e)

except Timeout as e:
raise TimeMapTimeoutError("", original_exception=e)

except SSLError as e:
raise TimeMapSSLError("", original_exception=e)

except (UnrewindableBodyError, ConnectionError) as e:
raise TimeMapConnectionFailure("", original_exception=e)

def mementocount(self):

self.fetch_timemap()

return len(self.timemap["mementos"]["list"])

def first_mdt(self):

self.fetch_timemap()

return self.timemap["mementos"]["first"]["datetime"]

def first_urim(self):

self.fetch_timemap()

return self.timemap["mementos"]["first"]["uri"]

def last_mdt(self):

self.fetch_timemap()

return self.timemap["mementos"]["last"]["datetime"]

def last_urim(self):

self.fetch_timemap()

return self.timemap["mementos"]["last"]["uri"]

def seed_metadata(self):

metadata = {}

if self.aic is not None:

metadata = self.aic.get_seed_metadata(self.urir)['collection_web_pages']

return metadata
@@ -13,6 +13,7 @@
from mementoembed.textprocessing import extract_text_snippet, extract_title
from mementoembed.cachesession import CacheSession
from mementoembed.archiveresource import ArchiveResource
from mementoembed.seedresource import SeedResource
from mementoembed.imageselection import get_best_image, convert_imageuri_to_pngdata_uri
from mementoembed.version import __useragent__

@@ -172,10 +173,37 @@ def archivedata(urim, preferences):

return response, 200

def seeddata(urim, preferences):

httpcache = CacheSession(
timeout=current_app.config['REQUEST_TIMEOUT_FLOAT'],
user_agent=__useragent__,
starting_uri=urim
)

memento = memento_resource_factory(urim, httpcache)

sr = SeedResource(memento, httpcache)

output = {}

output['urim'] = urim
output['seeduri'] = sr.urir
output['mementocount'] = sr.mementocount
output['first-mdt'] = sr.first_mdt
output['last-mdt'] = sr.last_mdt
output['metadata'] = sr.seed_metadata

response = make_response(json.dumps(output, indent=4))
response.headers['Content-Type'] = 'application/json'

return response, 200

@bp.route('/services/memento/contentdata/')
@bp.route('/services/memento/archivedata/')
@bp.route('/services/memento/originalresourcedata/')
@bp.route('/services/memento/bestimage/')
@bp.route('/services/memento/seeddata/')
def no_urim():
path = request.url_rule.rule
return """WARNING: no URI-M submitted, please append a URI-M to {}
@@ -246,3 +274,11 @@ def originaldata_endpoint(subpath):
prefs[key] = value.lower()

return handle_errors(originaldata, urim, prefs)

@bp.route('/services/memento/seeddata/<path:subpath>')
def aitdata_endpoint(subpath):
urim = extract_urim_from_request_path(request.full_path, '/services/memento/aitdata/')

prefs = {}

return handle_errors(seeddata, urim, prefs)
@@ -14,6 +14,7 @@
include_package_data=True,
install_requires=[
'aiu',
'Brotli',
'bs4',
'cairosvg',
'dicttoxml',
@@ -28,6 +29,7 @@
'redis',
'requests',
'requests_cache',
'sphinx',
'tldextract'
],
scripts=['bin/fetch_surrogate_data'],

0 comments on commit 4fc1bb9

Please sign in to comment.
You can’t perform that action at this time.