Skip to content
Permalink
Browse files

requests_cache CachedSession is now back in use

  • Loading branch information...
shawnmjones committed Jun 12, 2019
1 parent 2e439f1 commit 7ca463caf8056d31e57c5a0df158154fb27ec8a1
@@ -1,8 +1,5 @@
CACHEENGINE = 'Redis'
CACHE_DBNUMBER = 0
CACHE_DBHOST = "localhost"
CACHE_DBPORT = 6379
CACHE_DBPASSWORD = ""
CACHEENGINE = 'SQLite'
CACHEDBFILE = 'mementoembed'
URICACHE_EXPIRATION = 604800
APPLICATION_LOGLEVEL = "DEBUG"
REQUEST_TIMEOUT = 15
@@ -6,10 +6,12 @@

import redis
import requests
import requests_cache

from time import strftime

from redis import RedisError
from redis_namespace import StrictRedis
from flask import Flask, request, render_template, make_response, current_app

from .memstock.uricache import RedisCache, NoCache
@@ -49,20 +51,39 @@ def test_file_access(filename):

def getURICache():

credentials = {}

if current_app.config['CACHEENGINE'] == 'Redis':
credentials['dbnumber'] = current_app.config["CACHE_DBNUMBER"]
credentials['host'] = current_app.config["CACHE_DBHOST"]
credentials['port'] = current_app.config["CACHE_DBPORT"]
credentials['password'] = current_app.config["CACHE_DBPASSWORD"]
session = requests.Session()
expiration_delta = current_app.config['URICACHE_EXPIRATION']

return RedisCache(credentials, session, expiration_delta)
conn = StrictRedis(
host=current_app.config["CACHE_DBHOST"],
port=current_app.config["CACHE_DBPORT"],
password=current_app.config["CACHE_DBPASSWORD"],
db=current_app.config["CACHE_DBNUMBER"],
namespace='uricache:'
)

return requests_cache.CachedSession(
cache_name="uricache",
backend="redis",
expire_after=current_app.config['URICACHE_EXPIRATION'],
old_data_on_error=True,
connection=conn,
namespace='uricache'
)

else:
return NoCache(None, session, expiration_delta)
# SQLite as default

if '.' in current_app.config['CACHEDBFILE']:
cachename, ext = current_app.config['CACHEDBFILE'].rsplit('.', 1)
else:
cachename = current_app.config['CACHEDBFILE']
ext = '.sqlite'

return requests_cache.CachedSession(
cache_name=cachename,
extension=ext
)

def get_requests_timeout(config):

@@ -211,17 +232,29 @@ def create_app():
if app.config['ENABLE_THUMBNAILS'].lower() == "yes":
if not os.path.exists( app.config['THUMBNAIL_WORKING_FOLDER'] ):
application_logger.info("creating thumbnail folder at {}".format(app.config['THUMBNAIL_WORKING_FOLDER']))
os.makedirs( app.config['THUMBNAIL_WORKING_FOLDER'] )

try:
os.makedirs( app.config['THUMBNAIL_WORKING_FOLDER'] )
except FileExistsError:
pass # TODO: a race condition exists in Flask sometimes

if app.config['ENABLE_IMAGEREEL'].lower() == "yes":
if not os.path.exists( app.config['IMAGEREEL_WORKING_FOLDER'] ):
application_logger.info("creating imagereel folder at {}".format(app.config['IMAGEREEL_WORKING_FOLDER']))
os.makedirs( app.config['IMAGEREEL_WORKING_FOLDER'] )

try:
os.makedirs( app.config['IMAGEREEL_WORKING_FOLDER'] )
except FileExistsError:
pass # TODO: a race condition exists in Flask sometimes

if app.config['ENABLE_DOCREEL'].lower() == "yes":
if not os.path.exists( app.config['DOCREEL_WORKING_FOLDER'] ):
application_logger.info("creating imagereel folder at {}".format(app.config['DOCREEL_WORKING_FOLDER']))
os.makedirs( app.config['DOCREEL_WORKING_FOLDER'] )

try:
os.makedirs( app.config['DOCREEL_WORKING_FOLDER'] )
except FileExistsError:
pass # TODO: a race condition exists in Flask sometimes

application_logger.info("MementoEmbed is now initialized and ready to receive requests")

@@ -13,7 +13,7 @@
from requests.exceptions import Timeout, TooManyRedirects, \
ChunkedEncodingError, ContentDecodingError, StreamConsumedError, \
URLRequired, MissingSchema, InvalidSchema, InvalidURL, \
UnrewindableBodyError, ConnectionError, SSLError
UnrewindableBodyError, ConnectionError, SSLError, ReadTimeout

wayback_pattern = re.compile('(/[0-9]{14})/')

@@ -140,7 +140,7 @@ def get_memento(http_cache, urim):
except (URLRequired, MissingSchema, InvalidSchema, InvalidURL) as e:
raise MementoInvalidURI("", original_exception=e)

except Timeout as e:
except (Timeout, ReadTimeout) as e:
raise MementoTimeoutError("", original_exception=e)

except SSLError as e:
@@ -1,4 +1,5 @@
import logging
from requests.exceptions import ReadTimeout

from urllib.parse import urljoin, urlparse

@@ -72,6 +73,9 @@ def favicon(self):
self.http_cache
)

except ReadTimeout:
module_logger.exception("Failed to download favicon due to timeout error, searching for favicon using a different method...")

self.logger.debug("original link favicon is now {}".format(self.original_link_favicon_uri))

# 2. try to construct the favicon URI and look for it in the archive
@@ -20,7 +20,7 @@ def attempt_cache_deletion(urim):
baduris = ["", None]

if urim not in baduris:
getURICache().purgeuri(urim)
getURICache().cache.delete_url(urim)

def handle_errors(function_name, urim, preferences):

@@ -38,7 +38,8 @@ def handle_errors(function_name, urim, preferences):
'make_your_own_memento.html',
urim = urim
),
"error details": repr(traceback.format_exc())
"error details": repr(
traceback.format_exc())
}, indent=4))
response.headers['Content-Type'] = 'application/json'
return response, 404
@@ -3,10 +3,10 @@ https://www.webarchive.org.uk/wayback/archive/20090522221251/http://blasttheory.
https://web.archive.org/web/20180515130056/http://www.cs.odu.edu/~mln/,Michael L. Nelson,"About Me... I joined the Computer Science department at Old Dominion University in 2002. I worked at NASA Langley Research Center from 1991-2002. Through a NASA fellowship, I spent the 2000-2001 ac",2018-05-15T13:00:56Z,http://www.cs.odu.edu/~mln/images/mln-ad-100x130.jpg,https://archive.org,ARCHIVE.ORG,https://web.archive.org/_static/images/archive.ico,,,,http://www.cs.odu.edu/~mln/,www.cs.odu.edu,https://web.archive.org/web/http://www.cs.odu.edu/favicon.ico,Live
https://webarchive.nrscotland.gov.uk/20170805210615/http://livingonwater.co.uk/index.php/homepage/show/home/home,Living on Water - Home,"Thanks to the Living on Water initiative a choice of new, high-quality residential and commercial moorings are being developed at key places along Scotland\u2019s beautiful canal network. This means tha",2017-08-05T21:06:21Z,https://webarchive.nrscotland.gov.uk/20170805210621im_/http://www.livingonwater.co.uk/system/uploads/images/homeslider/1.jpg,https://webarchive.nrscotland.gov.uk,NRSCOTLAND.GOV.UK,https://webarchive.nrscotland.gov.uk/wb-static/imr/images/favicon.png,,,,http://www.livingonwater.co.uk/index.php/homepage/show/home/home,www.livingonwater.co.uk,https://www.google.com/s2/favicons?domain=livingonwater.co.uk,Live
http://arquivo.pt/wayback/19980205082901/http://www.caleida.pt/saramago/,José Saramago - Home Page,Este Site foi distinguido com a insignia de Top 5% Portugal \u00a9 1996: Caleida Comunica\u00e7\u00e3o Global Lda Page designed for Netscape 1.2 - 256c 800x600,1998-02-05T08:29:01Z,http://arquivo.pt/wayback/19980205082901im_/http://www.caleida.pt/saramago/imagens/foto_saramago.gif,http://arquivo.pt,ARQUIVO.PT,http://arquivo.pt/img/logo-16.png,,,,http://www.caleida.pt/saramago/,www.caleida.pt,http://www.caleida.pt/wp-content/uploads/2019/01/cropped-caleida-32x32.png,Live
http://wayback.archive-it.org/2950/20120508033201/http://www.salon.com/2012/05/02/did_may_day_succeed/singleton/,Did May Day succeed?,"Yesterday’s Occupy reboot mobilized a diverse group of people, but reverted to familiar tactics in the end",2012-05-08T03:32:01Z,http://wayback.archive-it.org/2950/20120508033201im_/http://media.salon.com/2012/05/occupy_la-460x307.jpg,https://archive-it.org,ARCHIVE-IT.ORG,http://wayback.archive-it.org/favicon.ico,2950,Occupy Movement 2011/2012,https://archive-it.org/collections/2950,http://www.salon.com/2012/05/02/did_may_day_succeed/singleton/,www.salon.com,http://wayback.archive-it.org/2950/20120508033201im_/http://www.salon.com/favicon.ico,Rotten
http://wayback.archive-it.org/2950/20120508033201/http://www.salon.com/2012/05/02/did_may_day_succeed/singleton/,Did May Day succeed?,"Yesterday’s Occupy reboot mobilized a diverse group of people, but reverted to familiar tactics in the end",2012-05-08T03:32:01Z,http://wayback.archive-it.org/2950/20120508033201im_/http://media.salon.com/2012/05/occupy_la-460x307.jpg,https://archive-it.org,ARCHIVE-IT.ORG,https://www.archive-it.org/favicon.ico,2950,Occupy Movement 2011/2012,https://archive-it.org/collections/2950,http://www.salon.com/2012/05/02/did_may_day_succeed/singleton/,www.salon.com,http://wayback.archive-it.org/2950/20120508033201im_/http://www.salon.com/favicon.ico,Rotten
http://webarchive.nationalarchives.gov.uk/20081208222543/http://www.nacell.org.uk/,NACELL - National Advisory Centre on Early Language Learning,"ELL Forum Email forum for news exchange, discussion and peer support. More",2008-12-08T22:25:43Z,http://webarchive.nationalarchives.gov.uk/20081208222543im_/http:/www.nacell.org.uk/images/yellow_top_bar.gif,http://webarchive.nationalarchives.gov.uk,NATIONALARCHIVES.GOV.UK,http://webarchive.nationalarchives.gov.uk/search/img/favicon.ico,,,,http://www.nacell.org.uk/,www.nacell.org.uk,https://www.google.com/s2/favicons?domain=nacell.org.uk,Live
http://webarchive.parliament.uk/20100426094738/http://www.publications.parliament.uk/pa/cm199899/cmselect/cmagric/141/9020402.htm,House of Commons - Agriculture - Minutes of Evidence,"Examination of witnesses (Questions 373 - 379) THURSDAY 4 FEBRUARY 1999 MR ROBERT GEORGE, MR MICHAEL HOSKING, MR MERVYN MOUNTJOY, MR DONALD TURTLE Chairman 373. Gentlemen, welcome to this first s",2010-04-26T09:47:38Z,http://webarchive.parliament.uk/20100426094738im_/http://www.publications.parliament.uk/server-side/images/parliament_logo.gif,http://webarchive.parliament.uk,PARLIAMENT.UK,https://www.google.com/s2/favicons?domain=parliament.uk,,,,http://www.publications.parliament.uk/pa/cm199899/cmselect/cmagric/141/9020402.htm,www.publications.parliament.uk,http://webarchive.parliament.uk/20100426094738oe_/http://www.publications.parliament.uk/favicon.ico,Live
http://archive.is/20130508132946/http://flexispy.com/,FlexiSPY - The worlds most powerful spyphone,"EnglishEspaсolРусскийRevealing Secrets Since 2005Need help ordering ?USA: (1) 858-227-4400Can I help you?Our agents are ready to assist you. Click ""Chat Now"" to be connected to one instantly.Chat N",2013-05-08T13:29:46Z,https://archive.is/pSSpa/c667e53d34bcbf4d70c034fd96ccbdab453e957a.jpg,http://archive.is,ARCHIVE.IS,http://archive.is/favicon.ico,,,,http://flexispy.com/,flexispy.com,http://flexispy.com/favicon.ico,Live
http://webarchive.loc.gov/all/20160830014245/https://www.whitehouse.gov/,The White House,"See the President's daily schedule, explore behind-the-scenes photos from inside the White House, and find out all the ways you can engage with the most interactive administration in our country's",2016-08-30T01:42:45Z,http://webarchive.loc.gov/all/20160830014245im_/https://www.whitehouse.gov/sites/whitehouse.gov/files/hero-backgrounds/fish.jpg,http://webarchive.loc.gov,LOC.GOV,https://www.google.com/s2/favicons?domain=loc.gov,,,,https://www.whitehouse.gov/,www.whitehouse.gov,http://webarchive.loc.gov/all/20160830014245im_/https://www.whitehouse.gov/profiles/forall/themes/custom/fortyfour/favicon.ico,Live
http://archive.is/MIq1y,Virginia Commonwealth University - School of Engineering,"CS Seminar – Michael L. Nelson, Ph.D. February 24, 201711:00 a.m. – 12:00 p.m. Engineering West Hall, Room 106 The Department of Computer Science is pleased to present Dr. Michael L. Nelson, Profes",2017-02-14T22:18:32Z,https://archive.is/MIq1y/6287ab3e834f85e94198b71febff904c6be45ad3.png,http://archive.is,ARCHIVE.IS,http://archive.is/favicon.ico,,,,http://www.egr.vcu.edu/news-events/events/cs-seminar-michael-l-nelson-phd.html,www.egr.vcu.edu,https://www.google.com/s2/favicons?domain=vcu.edu,Rotten
http://wayback.archive-it.org/all/20160209000335/https://twitter.com/TEN_GOP/status/689216708695994368,Tennessee GOP on Twitter,"“If #MLK was alive today, he'd lead ""All Lives Matter"" March in Washington DC #MLKDay #MLKDay2016 #AllLivesMatter”",2016-02-09T00:03:35Z,http://wayback.archive-it.org/all/20160209000335im_/https://pbs.twimg.com/profile_banners/4224729994/1449504876/1500x500,https://archive-it.org,ARCHIVE-IT.ORG,http://wayback.archive-it.org/favicon.ico,,,,https://twitter.com/TEN_GOP/status/689216708695994368,twitter.com,http://wayback.archive-it.org/all/20160209000335im_/https://abs.twimg.com/favicons/favicon.ico,Live
http://wayback.archive-it.org/all/20160209000335/https://twitter.com/TEN_GOP/status/689216708695994368,Tennessee GOP on Twitter,"“If #MLK was alive today, he'd lead ""All Lives Matter"" March in Washington DC #MLKDay #MLKDay2016 #AllLivesMatter”",2016-02-09T00:03:35Z,http://wayback.archive-it.org/all/20160209000335im_/https://pbs.twimg.com/profile_banners/4224729994/1449504876/1500x500,https://archive-it.org,ARCHIVE-IT.ORG,https://www.archive-it.org/favicon.ico,,,,https://twitter.com/TEN_GOP/status/689216708695994368,twitter.com,http://wayback.archive-it.org/all/20160209000335im_/https://abs.twimg.com/favicons/favicon.ico,Live
@@ -25,22 +25,27 @@ def test_service(endpoint, datarow):

r = requests.get("{}{}".format(endpoint, urim))

self.assertEqual(r.status_code, 200)
self.assertEqual(r.status_code, 200, "status code was not 200 for URI-M {} at endpoint {}".format(urim, endpoint))

data = r.json()

self.assertEqual(data['urim'], urim)
self.assertIn("generation-time", data)
self.assertEqual(data['urim'], urim, msg="failed to match given URI-M of {} at endpoint {}".format(urim, endpoint))
self.assertIn("generation-time", data, msg="generation-time field is not present for URI-M {} at endpoint {}".format(urim, endpoint))

for field in r.json():

if field not in ['urim', 'generation-time', 'snippet']:

if datarow[field] == '':
self.assertEqual(data[field], None, msg="failed for field {}".format(field))
else:
self.assertEqual(data[field], datarow[field], msg="failed for field {}".format(field))
try:

if datarow[field] == '':
self.assertEqual(data[field], None, msg="failed for field {}".format(field))
else:
self.assertEqual(data[field], datarow[field], msg="failed for field {}".format(field))

except AssertionError as e:
print("Failed with URI-M {} for field {} at endpoint {}".format(urim, field, endpoint))
print("exception: {}".format(e))

with open(batteryfilename) as f:
reader = csv.DictReader(f)

0 comments on commit 7ca463c

Please sign in to comment.
You can’t perform that action at this time.