"translatorID": "c159dcfe-8a53-4301-a499-30f6549c340d",
"label": "DOI",
"creator": "Simon Kornblith",
"target": "",
"minVersion": "3.0",
"maxVersion": "",
"priority": 400,
"inRepository": true,
"translatorType": 4,
"browserSupport": "gcsv",
"lastUpdated": "2016-11-05 10:57:01"
// The variables items and selectArray will be filled during the first
// as well as the second retrieveDOIs function call and therefore they
// are defined global.
var items = {};
var selectArray = {};
// builds a list of DOIs
function getDOIs(doc) {
// TODO Detect DOIs more correctly.
// The actual rules for DOIs are very lax-- but we're more strict.
// Specifically, we should allow space characters, and all Unicode
// characters except for control characters. Here, we're cheating
// by not allowing ampersands, to fix an issue with getting DOIs
// out of URLs.
// Additionally, all content inside <noscript> is picked up as text()
// by the xpath, which we don't necessarily want to exclude, but
// that means that we can get DOIs inside node attributes and we should
// exclude quotes in this case.
// DOI should never end with a period or a comma (we hope)
// Description at:
const DOIre = /\b10\.[0-9]{4,}\/[^\s&"']*[^\s&"'.,]/g;
const DOIXPath = "//text()[contains(., '10.')]\
[not(parent::script or parent::style)]";
var dois = [];
var node, m, DOI;
var results = doc.evaluate(DOIXPath, doc, null, XPathResult.ANY_TYPE, null);
while (node = results.iterateNext()) {
DOIre.lastMatch = 0;
while (m = DOIre.exec(node.nodeValue)) {
DOI = m[0];
if (DOI.substr(-1) == ")" && DOI.indexOf("(") == -1) {
DOI = DOI.substr(0, DOI.length-1);
if (DOI.substr(-1) == "}" && DOI.indexOf("{") == -1) {
DOI = DOI.substr(0, DOI.length-1);
// only add new DOIs
if (dois.indexOf(DOI) == -1) {
return dois;
function detectWeb(doc, url) {
// Blacklist the advertising iframe in ScienceDirect guest mode:
// This can be removed from blacklist when 5c324134c636a3a3e0432f1d2f277a6bc2717c2a hits all clients (Z 3.0+)
const blacklistRe = /^https?:\/\/[^/]*(?:google\.com|sciencedirect\.com\/science\/advertisement\/)/i;
if (!blacklistRe.test(url)) {
var DOIs = getDOIs(doc);
if (DOIs.length) {
return "multiple";
return false;
function completeDOIs(doc) {
// all DOIs retrieved now
// check to see if there is more than one DOI
var numDOIs = 0;
for (var DOI in selectArray) {
if (numDOIs == 2) break;
if (numDOIs == 0) {
throw "DOI Translator: could not find DOI";
} else {
Zotero.selectItems(selectArray, function(selectedDOIs) {
if (!selectedDOIs) return true;
for (var DOI in selectedDOIs) {
function retrieveDOIs(dois, doc, providers) {
var numDois = dois.length;
var provider = providers.shift();
var remainingDOIs = dois.slice();//copy array but not by reference
for (var i=0, n=dois.length; i<n; i++) {
(function(doc, DOI) {
var translate = Zotero.loadTranslator("search");
var item = {"itemType":"journalArticle", "DOI":DOI};
// don't save when item is done
translate.setHandler("itemDone", function(translate, item) {
selectArray[item.DOI] = item.title;
if (!item.title) {
Zotero.debug("No title available for " + item.DOI);
item.title = "[No Title]";
selectArray[item.DOI] = "[" + item.DOI + "]";
items[item.DOI] = item;
// done means not remaining anymore
if (remainingDOIs.indexOf(item.DOI) > -1) {
remainingDOIs.splice(remainingDOIs.indexOf(item.DOI), 1);
} else {
Z.debug(item.DOI + " not anymore in the list of remainingDOIs = " + remainingDOIs);
translate.setHandler("done", function(translate) {
if (numDois <= 0) {
Z.debug("Done with " + + ". Remaining DOIs: " + remainingDOIs);
if (providers.length > 0 && remainingDOIs.length > 0) {
retrieveDOIs(remainingDOIs, doc, providers);
} else {
// Don't throw on error
translate.setHandler("error", function() {});
})(doc, dois[i]);
function doWeb(doc, url) {
var dois = getDOIs(doc);
var providers = [
id : "11645bd1-0420-45c1-badb-53fb41eeb753",
name : "CrossRef"
id : "9f1fb86b-92c8-4db7-b8ee-0b481d456428",
name : "DataCite"
retrieveDOIs(dois, doc, providers);
var testCases = [
"type": "web",
"url": "",
"items": "multiple"
"type": "web",
"url": "",
"items": "multiple"
"type": "web",
"url": "",
"items": "multiple"
"type": "web",
"url": "",
"items": "multiple"
"type": "web",
"url": "",
"items": "multiple"
