Skip to content
Permalink
Browse files

original resource favicon algorithm changes, passes unit tests

  • Loading branch information...
shawnmjones committed May 21, 2019
1 parent 76d5905 commit 2f6967a6fe1606441a59f4f1d160a5be92636682
Showing with 32 additions and 15 deletions.
  1. +23 −15 mementoembed/originalresource.py
  2. +9 −0 tests/test_originalresource.py
@@ -8,6 +8,8 @@
find_conventional_favicon_on_live_web, query_timegate_for_favicon, \
get_favicon_from_resource_content, construct_conventional_favicon_uri

from .mementoresource import get_memento_datetime_from_response, NotAMementoError

module_logger = logging.getLogger('mementoembed.originalresource')

class OriginalResource:
@@ -52,21 +54,30 @@ def favicon(self):

if candidate_favicon is not None:

# self.original_link_favicon_uri = query_timegate_for_favicon(
# self.memento.timegate[0:self.memento.timegate.find(self.uri)],
# candidate_favicon,
# self.memento.memento_datetime,
# self.http_cache
# )
self.original_link_favicon_uri = candidate_favicon
# make sure favicon is a memento

self.logger.debug("original link favicon is now {}".format(self.original_link_favicon_uri))
try:
r = self.http_cache.get(candidate_favicon)
get_memento_datetime_from_response(r)

self.logger.debug("failed to find favicon in HTML for URI {}".format(self.uri))
# if we get here, then it is a memento, just use it
self.original_link_favicon_uri = candidate_favicon

except NotAMementoError:
# try datetime negotiation
self.original_link_favicon_uri = query_timegate_for_favicon(
self.memento.timegate[0:self.memento.timegate.find(self.uri)],
candidate_favicon,
self.memento.memento_datetime,
self.http_cache
)

self.logger.debug("original link favicon is now {}".format(self.original_link_favicon_uri))

# 2. try to construct the favicon URI and look for it in the archive
if self.original_link_favicon_uri is None:

self.logger.debug("failed to find favicon in HTML for URI {}".format(self.uri))
self.logger.debug("querying web archive for original favicon at conventional URI")

self.original_link_favicon_uri = query_timegate_for_favicon(
@@ -76,32 +87,29 @@ def favicon(self):
self.http_cache
)

self.logger.debug("failed to find favicon in archive for URI {}".format(self.uri))

# 3. request the home page of the site on the live web and look for favicon in its HTML
if self.original_link_favicon_uri is None:

self.logger.debug("failed to find favicon in archive for URI {}".format(self.uri))
self.logger.debug("interrogating HTML of live web home page for favicon URI")

self.original_link_favicon_uri = get_favicon_from_resource_content(
"{}://{}".format(original_scheme, self.domain), self.http_cache)

self.logger.debug("failed to find favicon in HTML of live page for URI {}".format(self.uri))

# 4. try to construct the favicon URI and look for it on the live web
if self.original_link_favicon_uri is None:

self.logger.debug("failed to find favicon in HTML of live page for URI {}".format(self.uri))
self.logger.debug("requesting the live web home page of the resource and searching "
"for the favicon in its content")

self.original_link_favicon_uri = find_conventional_favicon_on_live_web(
original_scheme, self.domain, self.http_cache)

self.logger.debug("failed to find favicon on live web for URI {}".format(self.uri))

# 5. if all else fails, fall back to the Google favicon service
if self.original_link_favicon_uri is None:

self.logger.debug("failed to find favicon on live web for URI {}".format(self.uri))
self.logger.debug("attempting to query the google favicon service for the archive favicon URI")

self.original_link_favicon_uri = get_favicon_from_google_service(
@@ -269,6 +269,15 @@ def test_favicon_from_html(self):
}
}
),
original_favicon:
mock_response(
headers = {
'content-type': 'image/',
},
text = expected_content,
status=200,
url = expected_favicon
),
expected_original_uri:
mock_response(
headers = {

0 comments on commit 2f6967a

Please sign in to comment.
You can’t perform that action at this time.