Permalink
Join GitHub today
GitHub is home to over 40 million developers working together to host and review code, manage projects, and build software together.
Sign up
Fetching contributors…
Cannot retrieve contributors at this time
<?PHP | |
require_once ( __DIR__ . '/lib/initialize.php' ) ; | |
$action = get_request ( 'action' , '' ) ; | |
$author_qids_string = get_request( 'id', '' ) ; | |
$article_limit = get_request ( 'limit', '' ) ; | |
if ($article_limit == '' ) $article_limit = 500 ; | |
$limit_options = [10, 50, 200, 500] ; | |
$author_qids = preg_split("/[^[:alnum:]]+/",$author_qids_string); | |
$name_strings = get_request ( 'name_strings' , array() ) ; | |
$input_names = array(); | |
foreach ($name_strings AS $q => $name_list) { | |
$input_names[$q] = preg_split('/[\r\n]+/', $name_list); | |
} | |
print get_common_header ( '' , 'Author Disambiguator' ) ; | |
print "<h3>Multi-author match</h3>"; | |
if ( $action == 'match' ) { | |
print "<form method='post' class='form' action='https://tools.wmflabs.org/quickstatements/api.php'>" ; | |
print "<input type='hidden' name='action' value='import' />" ; | |
print "<input type='hidden' name='temporary' value='1' />" ; | |
print "<input type='hidden' name='openpage' value='1' />" ; | |
$papers = get_request ( 'papers' , array() ) ; | |
$commands = match_authors_qs_commands ( $papers ) ; | |
print "Quickstatements V1 commands for replacing author name strings with author item:" ; | |
print "<textarea name='data' rows=20>" . implode("\n",$commands) . "</textarea>" ; | |
print "<input type='submit' class='btn btn-primary' name='qs' value='Send to Quickstatements' />" ; | |
print "</form>" ; | |
exit ( 0 ) ; | |
} | |
print "<form method='get' class='form'> | |
<div style='display:table'> | |
<div style='display:table-row'> | |
<div style='display:table-cell'>Author Wikidata IDs (space-separated):</div> | |
<div style='display:table-cell'> | |
<input name='id' value='" . escape_attribute(implode(" ", $author_qids)) . "' type='text' placeholder='Qxxxxx Qyyyyy' size=65 /> </div></div></div>" ; | |
$wil = new WikidataItemList ; | |
$wil->loadItems ( $author_qids ) ; | |
$names = array(); | |
foreach ($author_qids AS $author_qid) { | |
if (isset($input_names[$author_qid]) && count($input_names[$author_qid]) > 0) continue; | |
$author_item = $wil->getItem($author_qid); | |
if ( !isset($author_item) ) { | |
print "<h2>Warning: $author_qid not found!</h2>" ; | |
print_footer() ; | |
exit ( 0 ) ; | |
} | |
$auth_name = $author_item->getLabel() ; | |
$nm = new NameModel($auth_name); | |
$names[$author_qid] = $nm->default_search_strings(); | |
} | |
$author_qids_for_sparql = 'wd:' . implode ( ' wd:' , $author_qids) ; | |
$sparql = "SELECT DISTINCT ?author_qid ?name { VALUES ?author_qid { $author_qids_for_sparql } . | |
?q p:P50 ?auth_statement . | |
?auth_statement ps:P50 ?author_qid ; | |
pq:P1932 ?name . | |
}" ; | |
$query_result = getSPARQL( $sparql ) ; | |
$bindings = $query_result->results->bindings ; | |
foreach ( $bindings AS $binding ) { | |
$author_qid = item_id_from_uri($binding->author_qid->value) ; | |
if (isset($input_names[$author_qid]) && count($input_names[$author_qid]) > 0) continue; | |
$names[$author_qid][] = $binding->name->value ; | |
} | |
print "<div style='display:table'>Name strings for matching (one per line)</div>"; | |
foreach ($author_qids as $author_qid) { | |
if (isset($input_names[$author_qid]) && count($input_names[$author_qid]) > 0) { | |
$names[$author_qid] = $input_names[$author_qid] ; | |
} | |
$names[$author_qid] = array_unique($names[$author_qid]) ; | |
$author_item = $wil->getItem($author_qid); | |
print "<div style='display:table-cell'>"; | |
print wikidata_link($author_qid, $author_item->getLabel(), 'blue'); | |
print ": <textarea name='name_strings[$author_qid]' rows=5>" . implode("\n",$names[$author_qid]) . "</textarea></div>" ; | |
} | |
print "</div><div> | |
Limit: <select name='limit'>" ; | |
foreach ($limit_options AS $limit_option) { | |
print "<option value='$limit_option'" ; | |
if ($article_limit == $limit_option) print ' selected' ; | |
print ">$limit_option</option>" ; | |
} | |
print "</select>"; | |
print "<input type='submit' class='btn btn-primary' name='doit' value='Find papers by these authors' /></div>" ; | |
print "</form>" ; | |
$name_hash = array(); | |
foreach ($author_qids as $author_qid) { | |
foreach ($names[$author_qid] as $name) { | |
if ( isset($name_hash[$name]) ) { | |
$match_q = $name_hash[$name]; | |
if ($match_q != $author_qid) { | |
print "WARNING: $author_qid and $match_q both map to the name '$name'"; | |
} | |
} else { | |
$name_hash[$name] = $author_qid ; | |
} | |
} | |
} | |
// Want to find all papers with all of these author names, or with one | |
// of the authors as P50 (author) and the other(s) as names | |
$values_choices = array(); | |
$name_selectors = array(); | |
foreach ($author_qids as $num => $author_qid) { | |
$label = "?name$num" ; | |
$values_choices[$num] = "VALUES $label { " . '"' . implode('" "', $names[$author_qid]) . '" } .' ; | |
$name_selectors[$num] = "wdt:P2093 $label" ; | |
} | |
$all_names_query = "{ " . implode(" ", $values_choices) . " ?q " . implode(" ; ", $name_selectors) . " . }"; | |
$part_names_queries = [] ; | |
foreach ($author_qids as $num => $author_qid) { | |
$name_selectors_copy = $name_selectors; | |
unset($name_selectors_copy[$num]); | |
$part_names_queries[] = "{ " . implode(" ", $values_choices) . " ?q wdt:P50 wd:$author_qid ; " . implode(" ; ", $name_selectors_copy) . " . }"; | |
} | |
$sparql = "SELECT ?q WHERE { $all_names_query UNION " . implode(" UNION ", $part_names_queries) . " . } LIMIT $article_limit" ; | |
$items_papers = getSPARQLitems ( $sparql ) ; | |
$limit_reached = (count($items_papers) == $article_limit) ; | |
$items_papers = array_unique( $items_papers ); | |
$article_items = generate_article_entries( $items_papers ); | |
$to_load = array() ; | |
foreach ( $article_items AS $article ) { | |
foreach ( $article->authors AS $auth ) $to_load[] = $auth ; | |
foreach ( $article->published_in AS $pub ) $to_load[] = $pub ; | |
foreach ( $article->topics AS $topic ) $to_load[] = $topic ; | |
} | |
$to_load = array_unique( $to_load ); | |
$wil->loadItems ( $to_load ) ; | |
usort( $article_items, 'WikidataArticleEntry::dateCompare' ) ; | |
print "<form method='post' class='form' target='_blank' action='?'> | |
<input type='hidden' name='action' value='match' /> | |
<input type='hidden' name='id' value='" . escape_attribute(implode(" ", $author_qids)) . "' />" ; | |
// Publications | |
$name_counter = array() ; | |
$author_qid_counter = array() ; | |
print "<h2>Listed Publications</h2>" ; | |
print "<p>" . count($article_items) . " publications found</p>" ; | |
print "<div class='group'>" ; | |
?> | |
<div> | |
<a href='#' onclick='$($(this).parents("div.group")).find("input[type=checkbox]").prop("checked",true);return false'>Check all</a> | | |
<a href='#' onclick='$($(this).parents("div.group")).find("input[type=checkbox]").prop("checked",false);return false'>Uncheck all</a> | |
</div> | |
<?PHP | |
print "<table class='table table-striped table-condensed'>" ; | |
print "<tbody>" ; | |
print "<tr><th>Title</th>" ; | |
print "<th>Authors (<span style='color:green'>identified</span>)</th>" ; | |
print "<th>Published In</th><th>Identifier(s)</th>" ; | |
print "<th>Topic</th><th>Published Date</th></tr>" ; | |
foreach ( $article_items AS $article ) { | |
$q = $article->q ; | |
$formatted_authors = array(); | |
$highlighted_authors = array(); | |
foreach ( $article->author_names AS $num => $a ) { | |
if ( isset($name_hash[$a]) ) { | |
$match_q = $name_hash[$a] ; | |
$highlighted_authors[] = $num ; | |
$formatted_authors[$num] = "[$num]<b>$a</b>" . | |
"<input type='checkbox' name='papers[$q:$num:$match_q]' value='$q:$num:$match_q' />=$match_q</b>" ; | |
} else { | |
$formatted_authors[$num] = "[$num]<a href='index.php?name=" . urlencode($a) . "'>$a</a>" ; | |
} | |
$name_counter[$a] = isset($name_counter[$a]) ? $name_counter[$a]+1 : 1 ; | |
} | |
foreach ( $article->authors AS $num => $qt ) { | |
$i2 = $wil->getItem ( $qt ) ; | |
$label = $i2->getLabel() ; | |
$display_num = $num ; | |
if (isset($formatted_authors[$num])) { | |
$display_num = "$num-$qt"; | |
} | |
$formatted_authors[$display_num] = "[$display_num]<a href='author_item.php?id=" . $i2->getQ() . "' style='color:green'>$label</a>" ; | |
if ( in_array($qt, $author_qids) ) { | |
$highlighted_authors[] = $display_num ; | |
} else { | |
$author_qid_counter[$qt] = isset($author_qid_counter[$qt]) ? $author_qid_counter[$qt]+1 : 1 ; | |
} | |
} | |
ksort($formatted_authors); | |
$authors_list = implode ( ', ' , compress_display_list($formatted_authors, $highlighted_authors, 20, 10, 2)) ; | |
$published_in = array() ; | |
foreach ( $article->published_in AS $qt ) { | |
$i2 = $wil->getItem ( $qt ) ; | |
if ( isset($i2) ) $published_in[] = wikidata_link($i2->getQ(), $i2->getLabel(), 'black') . " [<a href='https://tools.wmflabs.org/scholia/venue/" . $i2->getQ() . "/missing' target='_blank'>missing</a>]" ; | |
} | |
$published_in_list = implode ( ', ', $published_in ) ; | |
print "<tr>" ; | |
print "<td style='width:20%;font-size:10pt'>" . wikidata_link($q, $article->title, '') . " <a href='work_item.php?id=" . $q . "'>[work]</a></td>" ; | |
print "<td style='width:50%;font-size:9pt'>$authors_list</td>" ; | |
print "<td style='font-size:9pt'>$published_in_list</td>" ; | |
print "<td style='font-size:9pt'>" ; | |
if ( $article->doi != '' ) { | |
print "DOI: <a target='_blank' href='https://doi.org/$article->doi'>$article->doi</a><br/>" ; | |
} | |
if ( $article->pmid != '' ) { | |
print "PubMed: <a target='_blank' href='https://www.ncbi.nlm.nih.gov/pubmed/?term=$article->pmid'>$article->pmid</a>" ; | |
} | |
print "</td>" ; | |
print "<td style='font-size:9pt'>" ; | |
if ( count($article->topics) > 0 ) { | |
$topics = [] ; | |
foreach ( $article->topics AS $qt ) { | |
$i2 = $wil->getItem($qt) ; | |
if ( !isset($i2) ) continue ; | |
$topics[] = wikidata_link($i2->getQ(), $i2->getLabel(), 'brown') . " [<a href='https://tools.wmflabs.org/scholia/topic/" . $i2->getQ() . "/missing' target='_blank'>missing</a>]" ; | |
} | |
print implode ( '; ' , $topics ) ; | |
} | |
print "</td>" ; | |
print "<td style='font-size:9pt'>" ; | |
print $article->formattedPublicationDate () ; | |
print "</td>" ; | |
print "</tr>" ; | |
} | |
print "</tbody></table></div>" ; | |
print "<div style='margin:20px'><input type='submit' name='doit' value='Quickstatements to add selected authorships for these authors' class='btn btn-primary' /></div>" ; | |
print "</form>"; | |
arsort ( $author_qid_counter, SORT_NUMERIC ) ; | |
print "</div><h2>Common author items in these papers</h2>" ; | |
print "<ul>" ; | |
foreach ( $author_qid_counter AS $qt => $cnt ) { | |
if ( $cnt == 1 ) break ; | |
$i2 = $wil->getItem($qt) ; | |
$label = $i2->getLabel() ; | |
print "<li><a href='author_item.php?limit=50&id=$qt' style='color:green'>$label</a> ($cnt×) - <a href='match_multi_authors.php?limit=50&id=$author_qids_string+$qt'>Add to search</a></li>" ; | |
} | |
print "</ul>" ; | |
arsort ( $name_counter , SORT_NUMERIC ) ; | |
print "<h2>Common names in these papers</h2>" ; | |
print "<ul>" ; | |
foreach ( $name_counter AS $a => $cnt ) { | |
if ( $cnt == 1 ) break ; | |
print "<li><a href='index.php?limit=50&name=" . urlencode($a) . "'>$a</a> ($cnt×)</li>" ; | |
} | |
print "</ul><div>" ; | |
print_footer() ; | |
?> |