Skip to content
Permalink
Browse files

API endpoint for exporting entities and documents from search result …

…as a zip archive
  • Loading branch information...
sunu committed Mar 6, 2019
1 parent f238a3d commit 96a9fc14dd3c5b087fc7dab5779a78f7c493ab06
Showing with 108 additions and 1 deletion.
  1. +17 −1 aleph/views/entities_api.py
  2. +89 −0 aleph/views/export.py
  3. +2 −0 requirements-generic.txt
@@ -1,5 +1,5 @@
import logging
from flask import Blueprint, request
from flask import Blueprint, request, Response
from werkzeug.exceptions import BadRequest
from followthemoney import model
from followthemoney.types import registry
@@ -20,6 +20,8 @@
from aleph.views.serializers import EntitySerializer
from aleph.views.forms import EntityCreateSchema, EntityUpdateSchema

from aleph.views.export import export_entities

log = logging.getLogger(__name__)
blueprint = Blueprint('entities_api', __name__)

@@ -33,6 +35,20 @@ def index():
return EntitySerializer.jsonify_result(result)


@blueprint.route('/api/2/search/export/<any(csv, excel):format>', methods=['GET']) # noqa
@blueprint.route('/api/2/entities/export/<any(csv, excel):format>', methods=['GET']) # noqa
def export(format):
parser = SearchQueryParser(request.args, request.authz)
result = EntitiesQuery.handle(request, parser=parser)
results = result.to_dict(serializer=EntitySerializer)['results']
entities = [model.get_proxy(ent) for ent in results]
response = Response(
export_entities(entities, format), mimetype='application/zip'
)
response.headers['Content-Disposition'] = 'attachment; filename={}'.format('export.zip') # noqa
return response


@blueprint.route('/api/2/match', methods=['POST'])
def match():
entity = parse_request(EntityUpdateSchema)
@@ -0,0 +1,89 @@
import io
import logging
import os
import mimetypes

import requests
import zipstream

from followthemoney.export.csv import (
write_entity as write_entity_csv, write_headers
)
from followthemoney.export.excel import (
get_workbook, get_sheet, write_entity as write_entity_excel,
get_workbook_content,
)


FORMAT_CSV = 'csv'
FORMAT_EXCEL = 'excel'


log = logging.getLogger(__name__)


def write_document(zip_archive, entity):
parent = entity.context.get('collection')['label']
name = entity.context.get('name')
filetypes = entity.context.get('mimetypes')
# is it a folder?
if 'inode/directory' in filetypes:
return
ext = mimetypes.guess_extension(filetypes[0], strict=True)
if ext:
name = name + ext
path = os.path.join(parent, name)

file_url = entity.context['links'].get('file')
if file_url:
stream = requests.get(file_url, stream=True)
zip_archive.write_iter(path, stream.iter_content())


def export_entity_csv(handlers, entity):
fh = handlers.get(entity.schema.plural)
if fh is None:
handlers[entity.schema.plural] = fh = io.StringIO()
write_headers(fh, entity.schema, extra_headers=['url'])
if 'file' in entity.context['links']:
url = entity.context['links']['file']
else:
url = entity.context['links']['ui']
write_entity_csv(fh, entity, extra_fields={'url': url})


def export_entity_excel(workbook, entity):
sheet = get_sheet(entity.schema, workbook, extra_headers=['url'])
if 'file' in entity.context['links']:
url = entity.context['links']['file']
else:
url = entity.context['links']['ui']
write_entity_excel(sheet, entity, extra_fields={'url': url})


def export_entities(entities, format):
assert format in (FORMAT_CSV, FORMAT_EXCEL)
zip_archive = zipstream.ZipFile()

if format == FORMAT_EXCEL:
workbook = get_workbook()
for entity in entities:
export_entity_excel(workbook, entity)
if entity.schema.is_a('Document'):
write_document(zip_archive, entity)
content = io.BytesIO(get_workbook_content(workbook))
zip_archive.write_iter('export.xls', content)
elif format == FORMAT_CSV:
handlers = {}
for entity in entities:
export_entity_csv(handlers, entity)
if entity.schema.is_a('Document'):
write_document(zip_archive, entity)

for key in handlers:
content = handlers[key]
content.seek(0)
content = io.BytesIO(content.read().encode())
zip_archive.write_iter(key+'.csv', content)
for chunk in zip_archive:
yield chunk
@@ -1,4 +1,5 @@
Flask==1.0.2
requests-oauthlib<1.2.0
Flask-OAuthlib==0.9.5
Flask-SQLAlchemy==2.3.2
Flask-Script==2.0.6
@@ -35,6 +36,7 @@ google-cloud-vision==0.35.2
google-cloud-storage==1.13.2
networkx==2.2
msgpack==0.6.0
zipstream==1.1.4

# Testing dependencies
factory-boy==2.10.0

0 comments on commit 96a9fc1

Please sign in to comment.
You can’t perform that action at this time.