Skip to content
Please note that GitHub no longer supports your web browser.

We recommend upgrading to the latest Google Chrome or Firefox.

Learn more
Permalink
Browse files

Finalize converting NER Classifier to WANE Format (#378).

- Fully resolves #297 
- Overrides NER Classifier output to PERSON -> persons, LOCATION -> locations, ORGANIZATION -> organizations
  • Loading branch information...
SinghGursimran authored and ruebot committed Nov 14, 2019
1 parent c353dae commit f9ce82698995cda48b46eb4e012560c400c43643
Showing with 13 additions and 1 deletion.
  1. +13 −1 src/main/scala/io/archivesunleashed/app/ExtractEntities.scala
@@ -48,6 +48,18 @@ object ExtractEntities {
extractAndOutput(iNerClassifierFile, rdd, outputFile)
}

/** Converts output of NER classifier to WANE format
*
* @param output of NER Classifier
* @return output of NER Classifier in WANE format
*/
def waneFormat(input: String): String = {
var output = input.replaceAll("PERSON\":","persons\":")
output = output.replaceAll("ORGANIZATION\":","organizations\":")
output = output.replaceAll("LOCATION\":","locations\":")
return output;
}

/** Saves the NER output to file from a given RDD.
*
* @param iNerClassifierFile path of classifier file
@@ -61,7 +73,7 @@ object ExtractEntities {
val r = rdd.mapPartitions(iter => {
NERClassifier.apply(iNerClassifierFile)
iter.map(r => (("{" + r._1), r._2,
("\"named_entities\":" + NERClassifier.classify(r._3)), (r._4 + "}")))
("\"named_entities\":" + waneFormat(NERClassifier.classify(r._3).toString)), (r._4 + "}")))
})
r.map(r => r._1 + "," + r._2 + "," + r._3 + "," + r._4)
.saveAsTextFile(outputFile)

0 comments on commit f9ce826

Please sign in to comment.
You can’t perform that action at this time.