Skip to content
Permalink
Browse files

Makes ArchiveRecordImpl serializable by removing non-serializable ARC…

…Record and WARCRecord variables. Also removes unused headerResponseFormat variable. (#316)
  • Loading branch information...
jrwiebe authored and ruebot committed Apr 22, 2019
1 parent 8504190 commit 5cb05f72ed18852364f084adf3417eced2cb8e91
Showing with 23 additions and 31 deletions.
  1. +23 −31 src/main/scala/io/archivesunleashed/ArchiveRecord.scala
@@ -69,54 +69,47 @@ trait ArchiveRecord extends Serializable {
* @param r the serialized record
*/
class ArchiveRecordImpl(r: SerializableWritable[ArchiveRecordWritable]) extends ArchiveRecord {
// Option<t> would require refactor of methods. Ignore.
// scalastyle:off null
var arcRecord: ARCRecord = null
var warcRecord: WARCRecord = null
// scalastyle:on null
var headerResponseFormat: String = "US-ASCII"

if (r.t.getFormat == ArchiveRecordWritable.ArchiveFormat.ARC) {
arcRecord = r.t.getRecord.asInstanceOf[ARCRecord]
} else if (r.t.getFormat == ArchiveRecordWritable.ArchiveFormat.WARC) {
warcRecord = r.t.getRecord.asInstanceOf[WARCRecord]
}
val recordFormat = r.t.getFormat
val ISO8601 = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssX")

val getArchiveFilename: String = {
if (r.t.getFormat == ArchiveRecordWritable.ArchiveFormat.ARC){
arcRecord.getMetaData.getReaderIdentifier()
if (recordFormat == ArchiveRecordWritable.ArchiveFormat.ARC){
r.t.getRecord.asInstanceOf[ARCRecord].getMetaData.getReaderIdentifier()
} else {
warcRecord.getHeader.getReaderIdentifier()
r.t.getRecord.asInstanceOf[WARCRecord].getHeader.getReaderIdentifier()
}
}

val getCrawlDate: String = {
if (r.t.getFormat == ArchiveRecordWritable.ArchiveFormat.ARC){
ExtractDate(arcRecord.getMetaData.getDate, ExtractDate.DateComponent.YYYYMMDD)
if (recordFormat == ArchiveRecordWritable.ArchiveFormat.ARC){
ExtractDate(r.t.getRecord.asInstanceOf[ARCRecord].getMetaData.getDate,
ExtractDate.DateComponent.YYYYMMDD)
} else {
ExtractDate(
ArchiveUtils.get14DigitDate(
ISO8601.parse(warcRecord.getHeader.getDate)), ExtractDate.DateComponent.YYYYMMDD)
ISO8601.parse(r.t.getRecord.asInstanceOf[WARCRecord].getHeader.getDate)),
ExtractDate.DateComponent.YYYYMMDD)
}
}

val getCrawlMonth: String = {
if (r.t.getFormat == ArchiveRecordWritable.ArchiveFormat.ARC) {
ExtractDate(arcRecord.getMetaData.getDate, ExtractDate.DateComponent.YYYYMM)
if (recordFormat == ArchiveRecordWritable.ArchiveFormat.ARC) {
ExtractDate(r.t.getRecord.asInstanceOf[ARCRecord].getMetaData.getDate,
ExtractDate.DateComponent.YYYYMM)
} else {
ExtractDate(
ArchiveUtils.get14DigitDate(
ISO8601.parse(warcRecord.getHeader.getDate)), ExtractDate.DateComponent.YYYYMM)
ISO8601.parse(r.t.getRecord.asInstanceOf[WARCRecord].getHeader.getDate)),
ExtractDate.DateComponent.YYYYMM)
}
}

val getContentBytes: Array[Byte] = {
if (r.t.getFormat == ArchiveRecordWritable.ArchiveFormat.ARC)
if (recordFormat == ArchiveRecordWritable.ArchiveFormat.ARC)
{
ArcRecordUtils.getBodyContent(arcRecord)
ArcRecordUtils.getBodyContent(r.t.getRecord.asInstanceOf[ARCRecord])
} else {
WarcRecordUtils.getContent(warcRecord)
WarcRecordUtils.getContent(r.t.getRecord.asInstanceOf[WARCRecord])
}
}

@@ -125,25 +118,24 @@ class ArchiveRecordImpl(r: SerializableWritable[ArchiveRecordWritable]) extends
}

val getMimeType: String = {
if (r.t.getFormat == ArchiveRecordWritable.ArchiveFormat.ARC) {
Option(arcRecord.getMetaData.getMimetype).getOrElse("unknown")
if (recordFormat == ArchiveRecordWritable.ArchiveFormat.ARC) {
Option(r.t.getRecord.asInstanceOf[ARCRecord].getMetaData.getMimetype).getOrElse("unknown")
} else {
Option(WarcRecordUtils.getWarcResponseMimeType(getContentBytes))
.getOrElse("unknown")
Option(WarcRecordUtils.getWarcResponseMimeType(getContentBytes)).getOrElse("unknown")
}
}

val getUrl: String = {
if (r.t.getFormat == ArchiveRecordWritable.ArchiveFormat.ARC) {
arcRecord.getMetaData.getUrl
r.t.getRecord.asInstanceOf[ARCRecord].getMetaData.getUrl
} else {
warcRecord.getHeader.getUrl
r.t.getRecord.asInstanceOf[WARCRecord].getHeader.getUrl
}
}

val getHttpStatus: String = {
if (r.t.getFormat == ArchiveRecordWritable.ArchiveFormat.ARC) {
Option(arcRecord.getMetaData.getStatusCode).getOrElse("000")
Option(r.t.getRecord.asInstanceOf[ARCRecord].getMetaData.getStatusCode).getOrElse("000")
} else {
Try(new StatusLine(new String(HttpParser.readRawLine
(new ByteArrayInputStream(getContentBytes))))

0 comments on commit 5cb05f7

Please sign in to comment.
You can’t perform that action at this time.