|
@@ -2,14 +2,14 @@ |
|
|
|
"translatorID": "94a8328a-ec87-4ba0-82b6-cf3000ea1dee", |
|
|
|
"label": "PyPI", |
|
|
|
"creator": "Philipp Zumstein", |
|
|
|
"target": "^https?://pypi\\.python\\.org/pypi", |
|
|
|
"target": "^https?://pypi\\.org/(project|search)", |
|
|
|
"minVersion": "3.0", |
|
|
|
"maxVersion": "", |
|
|
|
"priority": 100, |
|
|
|
"inRepository": true, |
|
|
|
"translatorType": 4, |
|
|
|
"browserSupport": "gcsibv", |
|
|
|
"lastUpdated": "2017-05-13 13:11:51" |
|
|
|
"lastUpdated": "2018-11-03 08:21:27" |
|
|
|
} |
|
|
|
|
|
|
|
/* |
|
@@ -37,9 +37,9 @@ |
|
|
|
|
|
|
|
|
|
|
|
function detectWeb(doc, url) { |
|
|
|
if (ZU.xpathText(doc, '//head/link[@rel="meta" and @title="DOAP"]/@href')) { |
|
|
|
if (url.includes('/project')) { |
|
|
|
return "computerProgram"; |
|
|
|
} else if ((url.indexOf('action=search')>-1 || url.indexOf('action=browse')>-1) && getSearchResults(doc, true)) { |
|
|
|
} else if (getSearchResults(doc, true)) { |
|
|
|
return "multiple"; |
|
|
|
} |
|
|
|
} |
|
@@ -48,7 +48,7 @@ function detectWeb(doc, url) { |
|
|
|
function getSearchResults(doc, checkOnly) { |
|
|
|
var items = {}; |
|
|
|
var found = false; |
|
|
|
var rows = ZU.xpath(doc, '//tr/td/a[contains(@href, "/pypi/")]'); |
|
|
|
var rows = ZU.xpath(doc, '//a[contains(@class, "package-snippet") and contains(@href, "/project/")]'); |
|
|
|
for (var i=0; i<rows.length; i++) { |
|
|
|
var href = rows[i].href; |
|
|
|
var title = ZU.trimInternal(rows[i].textContent); |
|
@@ -82,51 +82,58 @@ function doWeb(doc, url) { |
|
|
|
function scrape(doc, url) { |
|
|
|
var item = new Z.Item("computerProgram"); |
|
|
|
|
|
|
|
//info from h1 and following line |
|
|
|
item.title = ZU.xpathText(doc, '//h1[not(@id="logoheader")]'); |
|
|
|
var found = item.title.match(/\sv?([\d.]+)$/); |
|
|
|
// info from h1 and meta tag |
|
|
|
item.title = ZU.xpathText(doc, '//h1[contains(@class, "package-header__name")]'); |
|
|
|
var found = item.title.match(/\sv?([\d.]+)\s*$/); |
|
|
|
if (found) { |
|
|
|
item.title = item.title.slice(0,found.index); |
|
|
|
item.version = found[1]; |
|
|
|
} |
|
|
|
var subtitle = ZU.xpathText(doc, '//h1[not(@id="logoheader")]/following::p[1]'); |
|
|
|
var subtitle = ZU.xpathText(doc, '//meta[@property="og:description"]/@content'); |
|
|
|
if (subtitle) { |
|
|
|
item.title += ": " + subtitle; |
|
|
|
} |
|
|
|
|
|
|
|
//info from top level li |
|
|
|
var author = ZU.xpathText(doc, '//div[contains(@class, "section")]/ul/li[strong[contains(., "Author")]]/span'); |
|
|
|
var author = ZU.xpathText(doc, '//div[contains(@class, "vertical-tabs__tabs")]//p[strong[contains(., "Author")]]/a'); |
|
|
|
if (author) { |
|
|
|
item.creators.push(ZU.cleanAuthor(author, "author")); |
|
|
|
} |
|
|
|
item.url = ZU.xpathText(doc, '//div[contains(@class, "section")]/ul/li[strong[contains(., "Home Page")]]/a'); |
|
|
|
item.rights = ZU.xpathText(doc, '//div[contains(@class, "section")]/ul/li[strong[contains(., "License")]]/span'); |
|
|
|
item.system = ZU.xpathText(doc, '//div[contains(@class, "section")]/ul/li[strong[contains(., "Platform")]]/span'); |
|
|
|
item.url = ZU.xpathText(doc, '//div[contains(@class, "vertical-tabs__tabs")]//a[contains(., "Homepage")]/@href'); |
|
|
|
item.rights = ZU.xpathText(doc, '//div[contains(@class, "vertical-tabs__tabs")]//p[strong[contains(., "License")]]/text()'); |
|
|
|
var keywords = ZU.xpath(doc, '//div[contains(@class, "vertical-tabs__tabs")]//span[contains(@class, "package-keyword")]'); |
|
|
|
for (let keyword of keywords) { |
|
|
|
item.tags.push(keyword.textContent.trim()); |
|
|
|
} |
|
|
|
|
|
|
|
//info from linked categories |
|
|
|
var programmingLanguage = ZU.xpath(doc, '//li/a[contains(., "Programming Language ::")]'); |
|
|
|
for (var i=0; i<programmingLanguage.length; i++) { |
|
|
|
var split = programmingLanguage[i].textContent.split("::"); |
|
|
|
// info from linked categories |
|
|
|
var programmingLanguage = ZU.xpath(doc, '//div[contains(@class, "vertical-tabs__tabs")]//a[contains(@href, "/search/?c=Programming+Language+")]'); |
|
|
|
for (let i=0; i<programmingLanguage.length; i++) { |
|
|
|
let split = programmingLanguage[i].textContent.split("::"); |
|
|
|
let value = split[0].trim(); |
|
|
|
if (item.programmingLanguage) { |
|
|
|
if (item.programmingLanguage.indexOf(split[1].trim())==-1) { |
|
|
|
item.programmingLanguage += ", " + split[1].trim(); |
|
|
|
if (!item.programmingLanguage.includes(value)) { |
|
|
|
item.programmingLanguage += ", " + value; |
|
|
|
} |
|
|
|
} else { |
|
|
|
item.programmingLanguage = split[1].trim(); |
|
|
|
item.programmingLanguage = value; |
|
|
|
} |
|
|
|
} |
|
|
|
var topics = ZU.xpath(doc, '//li/a[contains(., "Topic ::")]'); |
|
|
|
for (var i=0; i<topics.length; i++) { |
|
|
|
var split = topics[i].textContent.split(" :: "); |
|
|
|
item.tags.push(split.slice(1).join(" - ")); |
|
|
|
var topics = ZU.xpath(doc, '//div[contains(@class, "vertical-tabs__tabs")]//a[contains(@href, "/search/?c=Topic+")]'); |
|
|
|
for (let topic of topics) { |
|
|
|
let split = topic.textContent.trim().split(" :: "); |
|
|
|
item.tags.push(split.join(" - ")); |
|
|
|
} |
|
|
|
var license = ZU.xpathText(doc, '//li/a[contains(., "License ::")]'); |
|
|
|
var license = ZU.xpathText(doc, '//div[contains(@class, "vertical-tabs__tabs")]//a[contains(@href, "/search/?c=License+")]'); |
|
|
|
if (license && !item.rights) { |
|
|
|
item.rights = license.split(" :: ")[2]; |
|
|
|
item.rights = license; |
|
|
|
} |
|
|
|
if (item.rights) { |
|
|
|
item.rights = item.rights.replace(/\s?\([^\)]+\)/g, ''); |
|
|
|
} |
|
|
|
var os = ZU.xpathText(doc, '//li/a[contains(., "Operating System ::")]'); |
|
|
|
if (os && !item.system) { |
|
|
|
item.system = os.split(" :: ")[1]; |
|
|
|
var osList = ZU.xpath(doc, '//div[contains(@class, "vertical-tabs__tabs")]//a[contains(@href, "/search/?c=Operating+System+")]'); |
|
|
|
if (osList && !item.system) { |
|
|
|
osList = osList.map(os => os.textContent.trim()); |
|
|
|
item.system = osList.join(', '); |
|
|
|
} |
|
|
|
|
|
|
|
item.attachments.push({ |
|
@@ -141,12 +148,12 @@ function scrape(doc, url) { |
|
|
|
var testCases = [ |
|
|
|
{ |
|
|
|
"type": "web", |
|
|
|
"url": "https://pypi.python.org/pypi?%3Aaction=search&term=zotero&submit=search", |
|
|
|
"url": "https://pypi.org/search/?q=zotero", |
|
|
|
"items": "multiple" |
|
|
|
}, |
|
|
|
{ |
|
|
|
"type": "web", |
|
|
|
"url": "https://pypi.python.org/pypi/simplejson/3.10.0", |
|
|
|
"url": "https://pypi.org/project/simplejson/3.10.0/", |
|
|
|
"items": [ |
|
|
|
{ |
|
|
|
"itemType": "computerProgram", |
|
@@ -160,18 +167,18 @@ var testCases = [ |
|
|
|
], |
|
|
|
"libraryCatalog": "PyPI", |
|
|
|
"programmingLanguage": "Python", |
|
|
|
"rights": "MIT License", |
|
|
|
"rights": "Academic Free License, MIT License", |
|
|
|
"shortTitle": "simplejson", |
|
|
|
"system": "any", |
|
|
|
"url": "http://github.com/simplejson/simplejson", |
|
|
|
"version": "3.10.0", |
|
|
|
"attachments": [ |
|
|
|
{ |
|
|
|
"title": "Snapshot" |
|
|
|
} |
|
|
|
], |
|
|
|
"tags": [ |
|
|
|
"Software Development - Libraries - Python Modules" |
|
|
|
{ |
|
|
|
"tag": "Software Development - Libraries - Python Modules" |
|
|
|
} |
|
|
|
], |
|
|
|
"notes": [], |
|
|
|
"seeAlso": [] |
|
@@ -180,12 +187,12 @@ var testCases = [ |
|
|
|
}, |
|
|
|
{ |
|
|
|
"type": "web", |
|
|
|
"url": "https://pypi.python.org/pypi?:action=browse&show=all&c=385&c=393", |
|
|
|
"url": "https://pypi.org/search/?c=Topic+%3A%3A+Scientific%2FEngineering+%3A%3A+Image+Recognition", |
|
|
|
"items": "multiple" |
|
|
|
}, |
|
|
|
{ |
|
|
|
"type": "web", |
|
|
|
"url": "https://pypi.python.org/pypi/lxml/3.7.3", |
|
|
|
"url": "https://pypi.org/project/lxml/3.7.3/", |
|
|
|
"items": [ |
|
|
|
{ |
|
|
|
"itemType": "computerProgram", |
|
@@ -203,16 +210,74 @@ var testCases = [ |
|
|
|
"shortTitle": "lxml", |
|
|
|
"system": "OS Independent", |
|
|
|
"url": "http://lxml.de/", |
|
|
|
"version": "3.7.3", |
|
|
|
"attachments": [ |
|
|
|
{ |
|
|
|
"title": "Snapshot" |
|
|
|
} |
|
|
|
], |
|
|
|
"tags": [ |
|
|
|
"Software Development - Libraries - Python Modules", |
|
|
|
"Text Processing - Markup - HTML", |
|
|
|
"Text Processing - Markup - XML" |
|
|
|
{ |
|
|
|
"tag": "Software Development - Libraries - Python Modules" |
|
|
|
}, |
|
|
|
{ |
|
|
|
"tag": "Text Processing - Markup - HTML" |
|
|
|
}, |
|
|
|
{ |
|
|
|
"tag": "Text Processing - Markup - XML" |
|
|
|
} |
|
|
|
], |
|
|
|
"notes": [], |
|
|
|
"seeAlso": [] |
|
|
|
} |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"type": "web", |
|
|
|
"url": "https://pypi.org/project/papis-zotero/", |
|
|
|
"items": [ |
|
|
|
{ |
|
|
|
"itemType": "computerProgram", |
|
|
|
"title": "papis-zotero: Interact with zotero using papis", |
|
|
|
"creators": [ |
|
|
|
{ |
|
|
|
"firstName": "Alejandro", |
|
|
|
"lastName": "Gallo", |
|
|
|
"creatorType": "author" |
|
|
|
} |
|
|
|
], |
|
|
|
"libraryCatalog": "PyPI", |
|
|
|
"programmingLanguage": "Python", |
|
|
|
"rights": "GNU General Public License v3", |
|
|
|
"shortTitle": "papis-zotero", |
|
|
|
"system": "MacOS, POSIX, Unix", |
|
|
|
"url": "https://github.com/papis/papis-zotero", |
|
|
|
"attachments": [ |
|
|
|
{ |
|
|
|
"title": "Snapshot" |
|
|
|
} |
|
|
|
], |
|
|
|
"tags": [ |
|
|
|
{ |
|
|
|
"tag": "Utilities" |
|
|
|
}, |
|
|
|
{ |
|
|
|
"tag": "bibtex," |
|
|
|
}, |
|
|
|
{ |
|
|
|
"tag": "biliography" |
|
|
|
}, |
|
|
|
{ |
|
|
|
"tag": "cli," |
|
|
|
}, |
|
|
|
{ |
|
|
|
"tag": "management," |
|
|
|
}, |
|
|
|
{ |
|
|
|
"tag": "papis," |
|
|
|
}, |
|
|
|
{ |
|
|
|
"tag": "zotero," |
|
|
|
} |
|
|
|
], |
|
|
|
"notes": [], |
|
|
|
"seeAlso": [] |
|
|
0 comments on commit
313362a