Skip to content
Permalink
Browse files

Log closing of ARC and WARC files, resolves #156 (#301)

* Log opening and closing of archive files as per #156
* Remove redundant log message. Spark already logs the file that is to be read when an executor computes an RDD.
  • Loading branch information...
jrwiebe authored and ruebot committed Jan 31, 2019
1 parent 1e69040 commit fc0178d7588871681da75a478195c7b6724f3487
Showing with 15 additions and 1 deletion.
  1. +15 −1 src/main/java/io/archivesunleashed/data/ArchiveRecordInputFormat.java
@@ -18,6 +18,7 @@
package io.archivesunleashed.data;

import io.archivesunleashed.data.ArchiveRecordWritable.ArchiveFormat;
import org.apache.log4j.Logger;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
@@ -47,6 +48,12 @@
*/
public class ArchiveRecordInputFormat extends FileInputFormat<LongWritable,
ArchiveRecordWritable> {
/**
* Setup logger.
*/
private static final Logger LOG =
Logger.getLogger(ArchiveRecordInputFormat.class);

@Override
public final RecordReader<LongWritable,
ArchiveRecordWritable> createRecordReader(final InputSplit split,
@@ -102,6 +109,11 @@ protected final boolean isSplitable(final JobContext context,
*/
private ArchiveRecordWritable value = null;

/**
* Archive file name.
*/
private String fileName;

/**
* Seekable file position.
*/
@@ -124,8 +136,9 @@ public final void initialize(final InputSplit archiveRecordSplit,

FileSystem fs = file.getFileSystem(job);
FSDataInputStream fileIn = fs.open(split.getPath());
fileName = split.getPath().toString();

reader = ArchiveReaderFactory.get(split.getPath().toString(),
reader = ArchiveReaderFactory.get(fileName,
new BufferedInputStream(fileIn), true);

if (reader instanceof ARCReader) {
@@ -223,6 +236,7 @@ public final float getProgress() throws IOException {
@Override
public final synchronized void close() throws IOException {
reader.close();
LOG.info("Closed archive file " + fileName);
}
}
}

0 comments on commit fc0178d

Please sign in to comment.
You can’t perform that action at this time.