Permalink
Browse files

A workaround for Brotli encoding that is not suppored by the requests…

… library
  • Loading branch information...
shawnmjones committed Oct 11, 2018
1 parent 4e4559a commit c08d0d7455e67f23bd39fd3ab28a9bdd8739e56e
Showing with 110 additions and 25 deletions.
  1. +1 −0 Pipfile
  2. +54 −24 Pipfile.lock
  3. +55 −1 mementoembed/cachesession.py
View
@@ -18,6 +18,7 @@ jusText = "==2.2.0"
Pillow = "==5.1.0"
sphinx = "*"
sphinx-rtd-theme = "*"
Brotli = "==1.0.4"
[dev-packages]
View
@@ -1,7 +1,7 @@
{
"_meta": {
"hash": {
"sha256": "90094a94d0a810ee291662cc4ef32084f09ff5245b2ea038bcc0828b1786cfaf"
"sha256": "a9b514083104b9493f74f8bdea013d57197e86b2b28150a8d63594c9e482363a"
},
"pipfile-spec": 6,
"requires": {
@@ -25,10 +25,10 @@
},
"alabaster": {
"hashes": [
"sha256:674bb3bab080f598371f4443c5008cbfeb1a5e622dd312395d2d82af2c54c456",
"sha256:b63b1f4dc77c074d386752ec4a8a7517600f6c0db8cd42980cae17ab7b3275d7"
"sha256:446438bdcca0e05bd45ea2de1668c1d9b032e1a9154c2c259092d77031ddd359",
"sha256:a661d72d58e6ea8a57f7a86e37d86716863ee5e92788398526d58b26a4e4dc02"
],
"version": "==0.7.11"
"version": "==0.7.12"
},
"babel": {
"hashes": [
@@ -45,6 +45,35 @@
],
"version": "==4.6.3"
},
"brotli": {
"hashes": [
"sha256:0a915b7bfcbbb6450535e4aee742293a4bd1232d6d1c596c4683c15bf8d49876",
"sha256:145d248421790c5e77ef6bc31a9fe51b6c456794c79dbff234eaf9794ddf553b",
"sha256:22a6057c12651894455a2a047f314c08b3aecbbf88ea664acdb5fce00f6cc65d",
"sha256:24559371040598278fe728df6d23cd92ba970dfb86376ea2a58395088b1fddbd",
"sha256:39ee2de7907210d35669b80a3cf5234a8d1a89e0d82424b6603b2b8fea1c6877",
"sha256:3ac100faf3435bc681c23dac7105b0cebfbb0f50f8093c5d5f0900ec301b2784",
"sha256:7b933f559f5d412c45c18d7286836833fe9be55b5afb9b520849dec349e9600e",
"sha256:7ea9227565051c28968b1f49c8d691da9ecfc47cd86f8d88970abd4ec096ae5f",
"sha256:801ed45374e79643a6fa22df3fa15f9b1b511ac0b7ec4bd3889a2893aeb75110",
"sha256:81e5559a329ddbf275526fe857ac0088531fbc832fc397ac6649251fdacd5e3f",
"sha256:859289e7f313ea48530fd5f0f9de671b9ed8618e965e864445a82f7f78d83f29",
"sha256:8cd6ca60708a4cb3eacb6af372f08df9eb4d8a98ebb3e6c9f84db430ff38152a",
"sha256:9e4d3bd1e698fec5b9551674a3f54f4a1f6bcf26dff0872ce7526f6861d018f0",
"sha256:a9910a161785a420c5c7a54fac789b18c475b79a3c061354add03c0b209e8396",
"sha256:c87db9ca5bf9378fe5a0e4aae8abbabb8d345edfa90632fa3253007e4bedf0ef",
"sha256:d5c4e9efbdf4fcdc72d7d70fdcef531470fb273c81b9f4ebf1b64d4c9b5f9cf6",
"sha256:dfbc67f719008ad5c70894f7ed5930746c476c179f943215aae8fce1b110bd5c",
"sha256:e6068677014258c7161f03f9033247e236fe5362a4e97e9d62582a1400815c4f",
"sha256:ea80498d48a11aeaf2dda058e1d0082176bab5351fc17e3718c5320094f9bfe5",
"sha256:ec5680c8b5c45de0346a2111b73f7d3edc9e3c9c43e5ee8b2787836ccc3d5d32",
"sha256:f7e8d43a533572a85ddeca0a747b2726e15482bd81b4a1e84ef04e138ff58b5e",
"sha256:faec94d0ced372b114280b532b0f74f71fcdde2493a4e85da2fdca67f6ebef9e",
"sha256:fe6fe495741d276f7d3cdffe8bd71daa06be0112a5ffbe05ee4f545224f83f21"
],
"index": "pypi",
"version": "==1.0.4"
},
"bs4": {
"hashes": [
"sha256:36ecea1fd7cc5c0c6e4a1ff075df26d50da647b75376626cc186e2212886dd3a"
@@ -67,10 +96,11 @@
},
"click": {
"hashes": [
"sha256:29f99fc6125fbc931b758dc053b3114e55c77a6e4c6c3a2674a2dc986016381d",
"sha256:f15516df478d5a56180fbf80e68f206010e6d160fc39fa508b65e035fd75130b"
"sha256:2335065e6395b9e67ca716de5f7526736bfa6ceead690adf616d925bdc622b13",
"sha256:5b94b49521f6456670fdb30cd82a4eca9412788a93fa6dd6df72c94d5a8ff2d7"
],
"version": "==6.7"
"markers": "python_version != '3.0.*' and python_version >= '2.7' and python_version != '3.1.*' and python_version != '3.3.*' and python_version != '3.2.*'",
"version": "==7.0"
},
"cssselect": {
"hashes": [
@@ -197,11 +227,11 @@
},
"packaging": {
"hashes": [
"sha256:e9215d2d2535d3ae866c3d6efc77d5b24a0192cce0ff20e42896cc0664f889c0",
"sha256:f019b770dd64e585a99714f1fd5e01c7a8f11b45635aa953fd41c689a657375b"
"sha256:0886227f54515e592aaa2e5a553332c73962917f2831f1b0f9b9f4380a4b9807",
"sha256:f95a1e147590f204328170981833854229bb2912ac3d5f89e2a8ccd2834800c9"
],
"markers": "python_version != '3.1.*' and python_version >= '2.6' and python_version != '3.0.*' and python_version != '3.2.*'",
"version": "==17.1"
"markers": "python_version != '3.0.*' and python_version >= '2.6' and python_version != '3.2.*' and python_version != '3.1.*'",
"version": "==18.0"
},
"pillow": {
"hashes": [
@@ -246,11 +276,11 @@
},
"pyparsing": {
"hashes": [
"sha256:905d8090c335314568b5faee0025b1829f27bb974604a5762a6cdef3a7dfc3b7",
"sha256:f493ee323be1e94929416b3585eefcc04943115cecbaaa35a8c86d1a2368af19"
"sha256:bc6c7146b91af3f567cf6daeaec360bc07d45ffec4cf5353f4d7a208ce7ca30a",
"sha256:d29593d8ebe7b57d6967b62494f8c72b03ac0262b1eed63826c6f788b3606401"
],
"markers": "python_version != '3.1.*' and python_version >= '2.6' and python_version != '3.0.*' and python_version != '3.2.*'",
"version": "==2.2.1"
"markers": "python_version != '3.0.*' and python_version >= '2.6' and python_version != '3.2.*' and python_version != '3.1.*'",
"version": "==2.2.2"
},
"pytz": {
"hashes": [
@@ -320,19 +350,19 @@
},
"sphinx": {
"hashes": [
"sha256:95acd6648902333647a0e0564abdb28a74b0a76d2333148aa35e5ed1f56d3c4b",
"sha256:c091dbdd5cc5aac6eb95d591a819fd18bccec90ffb048ec465b165a48b839b45"
"sha256:652eb8c566f18823a022bb4b6dbc868d366df332a11a0226b5bc3a798a479f17",
"sha256:d222626d8356de702431e813a05c68a35967e3d66c6cd1c2c89539bb179a7464"
],
"index": "pypi",
"version": "==1.8.0"
"version": "==1.8.1"
},
"sphinx-rtd-theme": {
"hashes": [
"sha256:3b49758a64f8a1ebd8a33cb6cc9093c3935a908b716edfaa5772fd86aac27ef6",
"sha256:80e01ec0eb711abacb1fa507f3eae8b805ae8fa3e8b057abfdf497e3f644c82c"
"sha256:02f02a676d6baabb758a20c7a479d58648e0f64f13e07d1b388e9bb2afe86a09",
"sha256:d0f6bc70f98961145c5b0e26a992829363a197321ba571b31b24ea91879e0c96"
],
"index": "pypi",
"version": "==0.4.1"
"version": "==0.4.2"
},
"sphinxcontrib-websupport": {
"hashes": [
@@ -359,10 +389,10 @@
},
"warcio": {
"hashes": [
"sha256:4f5cf5e0f3cb4cbdf06430d9a89c018fd51a53f327958c4a4260e350713701c8",
"sha256:956021f73c26d2bfd5be5ff5acfd244ac041688540d121d06197d86c243aa1a3"
"sha256:3990e1800622b68d2264714b20204bbfb5a049f867b65d8779f6475b75d8d942",
"sha256:85c678cb2b382e039c04f5dcd275a4d881bf138d7e09d1900f3778e7d135c1b5"
],
"version": "==1.5.3"
"version": "==1.6.1"
},
"webencodings": {
"hashes": [
@@ -1,9 +1,56 @@
import logging
import requests
import brotli
import requests_cache
module_logger = logging.getLogger('mementoembed.cachesession')
module_logger = logging.getLogger('mementoembed.cachesession')
class BrotliResponse:
"""
This class exists because requests had not yet implemented the
Brotli content-encoding ('br').
"""
def __init__(self, response):
self.response = response
self._content = False
self.apparent_encoding = self.response.apparent_encoding
self.links = self.response.links
self.headers = self.response.headers
self.status_code = self.response.status_code
self.url = self.response.url
@property
def content(self):
if self._content is False:
self._content = self.response.content
if 'content-encoding' in self.response.headers:
if self.response.headers['content-encoding'] == 'br':
self._content = brotli.decompress(self._content)
return self._content
@property
def text(self):
content = None
encoding = self.response.encoding
if self.response.encoding is None:
encoding = self.apparent_encoding
try:
content = str(self.content, encoding, errors='replace')
except (LookupError, TypeError):
content = str(self.content, errors='replace')
return content
class CacheSession:
@@ -20,7 +67,10 @@ def get(self, uri, headers={}, use_referrer=True):
req_headers = {}
req_headers['accept-encoding'] = "gzip, deflate"
for key in headers:
# Note that this will allow the caller to overwrite the accept-encoding
req_headers[key] = headers[key]
if use_referrer:
@@ -33,4 +83,8 @@ def get(self, uri, headers={}, use_referrer=True):
module_logger.debug("response status: {}".format(response.status_code))
module_logger.debug("response headers: {}".format(response.headers))
if 'content-encoding' in response.headers:
if response.headers['content-encoding'] == 'br':
response = BrotliResponse(response)
return response

0 comments on commit c08d0d7

Please sign in to comment.