Skip to content
Please note that GitHub no longer supports your web browser.

We recommend upgrading to the latest Google Chrome or Firefox.

Learn more
Permalink
Browse files

fix behavior in python3

  • Loading branch information...
tdurieux committed Oct 2, 2018
1 parent 051d115 commit 12bc55b5f65c07359950274e9a9eb83851dbaa1a
Showing with 36 additions and 14 deletions.
  1. +26 −14 server.py
  2. +10 −0 test.py
@@ -15,7 +15,7 @@

# non standards, in requirements.txt
from flask import Flask, request, Markup, render_template, redirect, url_for, send_from_directory
import github
import github


def clean_github_repository(repo):
@@ -40,15 +40,22 @@ def clean_github_repository(repo):

TEXT_CHARACTERS = ''.join([chr(code) for code in range(32,127)] + list('\b\f\n\r\t'))
def istext(s, threshold=0.30):
if type(s) != str:
s = s.decode('utf8')
# if s contains any null, it's not text:
if '\x00' in s:
return False
# an "empty" string is "text" (arbitrary but reasonable choice):
if not s:
return True
# Get the substring of s made up of non-text characters
translate_table = dict((ord(char), None) for char in TEXT_CHARACTERS)
binary_length = float(len(s.translate(None, TEXT_CHARACTERS)))

binary_length = 0
try:
binary_length = float(len(s.translate(None, TEXT_CHARACTERS)))
except TypeError:
print("error")
translate_table = dict((ord(char), None) for char in TEXT_CHARACTERS)
binary_length = float(len(s.translate(str.maketrans(translate_table))))
# s is 'text' if less than 30% of its characters are non-text ones:
return binary_length/len(s) <= threshold

@@ -122,7 +129,7 @@ def file_render(file, repository_configuration):
return Markup("The file %s is too big to be anonymized (beyond 1MB, Github limit)" % (file.name))
if ".md" in file.name or file.name == file.name.upper() or "changelog" == file.name.lower():
return Markup("<div class='markdown-body'>%s</div>" % remove_terms(
self.github.render_markdown(file.decoded_content.decode('utf-8')).decode('utf-8'),
self.github.render_markdown(file.decoded_content.decode('utf-8')),
repository_configuration))
if ".jpg" in file.name or ".png" in file.name or ".png" in file.name or ".gif" in file.name:
return Markup("<img src='%s' alt='%s'>" % (file.url, file.name))
@@ -185,7 +192,7 @@ def is_up_to_date(repository_config, g_commit):
commit_date = datetime.strptime(g_commit.last_modified, "%a, %d %b %Y %H:%M:%S %Z")
return 'pushed_at' in repository_config and commit_date.strftime("%s") == repository_config["pushed_at"]

def get_type_content(file_name, path, repository_configuration, g_repo):
def get_type_content(file_name, path, repository_configuration, g_repo, is_website):
"""
Get the content type of a file from its extension
:param file_name: the filename
@@ -194,11 +201,11 @@ def get_type_content(file_name, path, repository_configuration, g_repo):
:param g_repo: the Github repository
:return: the content type
"""
if is_website(path, repository_configuration, g_repo):
if is_website:
content_type = 'text/plain; charset=utf-8'
if ".html" in file_name:
content_type = 'text/html; charset=utf-8'
if ".md" in file_name or file.name == file.name.upper():
if ".md" in file_name or file_name == file_name.upper():
content_type = 'text/html; charset=utf-8'
if ".jpg" in file_name \
or ".png" in file_name \
@@ -249,16 +256,17 @@ def get_content(current_file, files, path, repository_config, g_repo):
cached_file_path = os.path.join(cache_path, file_path)
if os.path.exists(cached_file_path):
return send_from_directory(os.path.dirname(cached_file_path), os.path.basename(cached_file_path),
mimetype=get_type_content(path, path, repository_config, g_repo).replace("; charset=utf-8", ""))
mimetype=get_type_content(path, path, repository_config, g_repo, False).replace("; charset=utf-8", ""))
content = ''
if is_website(path, repository_config, g_repo):
if current_file.type != 'dir' and is_website(path, repository_config, g_repo):
if current_file.size > 1000000:
blob = g_repo.get_git_blob(current_file.sha)
if blob.encoding == 'base64':
content = base64.b64decode(blob.content).decode('utf-8')
else:
content = blob.content.decode('utf-8')
else:
print(current_file.type)
content = current_file.decoded_content.decode('utf-8')
if ".html" in current_file.name \
or ".txt" in current_file.name \
@@ -270,7 +278,7 @@ def get_content(current_file, files, path, repository_config, g_repo):
or ".js" in current_file.name:
content = remove_terms(content, repository_config)
if ".md" in current_file.name:
content = remove_terms(self.github.render_markdown(content).decode('utf-8'), repository_config)
content = remove_terms(self.github.render_markdown(content), repository_config)
else:
content = render_template('repo.html',
repository=repository_config,
@@ -285,7 +293,10 @@ def get_content(current_file, files, path, repository_config, g_repo):
if not os.path.exists(os.path.dirname(content_cache_path)):
os.makedirs(os.path.dirname(content_cache_path))
with open(content_cache_path, 'w') as f:
f.write(content.encode('utf8'))
if type(content) == str:
f.write(content)
else:
f.write(content.encode('utf8'))
return content

def is_website(path, repository_config, g_repo):
@@ -370,9 +381,10 @@ def repository(id, path):

cache_path = os.path.join(self.config_dir, id, "cache")
if os.path.isfile(os.path.join(cache_path, path)):
print("here", path)
return send_from_directory(os.path.dirname(os.path.join(cache_path, path)),
os.path.basename(os.path.join(cache_path, path)),
mimetype=get_type_content(path, path, repository_configuration, g_repo).replace("; charset=utf-8", "")),
mimetype=get_type_content(path, path, repository_configuration, g_repo, is_website(path, repository_configuration, g_repo)).replace("; charset=utf-8", ""))
elif os.path.exists(os.path.join(cache_path, path, "index.html")):
return send_from_directory(os.path.join(cache_path, path), "index.html", mimetype='text/html')
elif os.path.exists(os.path.join(cache_path, path, "README.md")):
@@ -391,7 +403,7 @@ def repository(id, path):
files, current_file = get_current_folder_files(clean_path, current_file, repository_configuration, g_repo, g_commit)

content = get_content(current_file, files, clean_path, repository_configuration, g_repo)
content_type = get_type_content(current_file.name, clean_path, repository_configuration, g_repo)
content_type = get_type_content(current_file.name, clean_path, repository_configuration, g_repo, False)
return content, {'Content-Type': content_type}

@application.route('/', methods=['GET'])
10 test.py
@@ -39,6 +39,16 @@ def test_create_repository(self):
rv = self.app.get("/repository/%s/" % anonymous_id)
assert b"Anonymous XXX" in rv.data

def test_open_pomxml(self):
anonymous_id = self.create_repository("https://github.com/SpoonLabs/astor", "astor")
rv = self.app.get("/repository/%s/pom.xml" % anonymous_id)
assert b"XXX: AST transformation for Repairs" in rv.data

def test_web_site_repository(self):
anonymous_id = self.create_repository("https://github.com/SpoonLabs/astor", "astor")
rv = self.app.get("/repository/%s/docs/getting-starting.md" % anonymous_id)
assert b"Getting started XXX" in rv.data


if __name__ == '__main__':
unittest.main()

0 comments on commit 12bc55b

Please sign in to comment.
You can’t perform that action at this time.