Runbook for ECIR 2019 axiomatic semantic term matching paper (#535)

castorini · Jan 12, 2019 · 7b41ae38dc3a1a429cff0142530ea63d49937367 · 7b41ae3
1 parent 7c0eee7
commit 7b41ae38dc3a1a429cff0142530ea63d49937367
diff --git a/.gitignore b/.gitignore
@@ -17,4 +17,11 @@ log.*
 *.log
 out.*
 runs.regression/
-  
-runs.jdiq2018/
+  
+runs.jdiq2018/
+  
+# automatically generated by ECIR2019_axiomatic scripts
+  
+src/main/resources/topics-and-qrels/qrels.cw09.all.txt
+  
+src/main/resources/topics-and-qrels/qrels.cw12.all.txt
+  
+src/main/resources/topics-and-qrels/qrels.disk12.all.txt
+  
+src/main/resources/topics-and-qrels/qrels.gov2.all.txt
+  
+src/main/resources/topics-and-qrels/qrels.mb11.all.txt
+  
+src/main/resources/topics-and-qrels/qrels.mb13.all.txt
diff --git a/src/main/python/ecir2019_axiomatic/README.md b/src/main/python/ecir2019_axiomatic/README.md
@@ -0,0 +1,23 @@
+  
+### Requirements
+  
+
+  
+Python>=2.6 or Python>=3.5
+  
+`pip install -r src/main/python/requirements.txt`
+  
+
+  
+### Run the Parameter Sensitivity (Fig 1,2,3 in the paper)
+  
+
+  
+*** Users will need to change the index path at `src/main/resources/fine_tuning/collections.yaml`
+  
+(the program will go through the `index_roots` and concatenate with collection's `index_path`. the first match will be the index path)
+  
+
+  
+
+  
+```
+  
+python src/main/python/ecir2019_axiomatic/run_batch.py --collection disk12 --models bm25 ql f2exp --n 32 --run --plot
+  
+python src/main/python/ecir2019_axiomatic/run_batch.py --collection robust04 --models bm25 ql f2exp --n 32 --run --plot
+  
+python src/main/python/ecir2019_axiomatic/run_batch.py --collection robust05 --models bm25 ql f2exp --n 32 --run --plot
+  
+python src/main/python/ecir2019_axiomatic/run_batch.py --collection core17 --models bm25 ql f2exp --n 32 --run --plot
+  
+python src/main/python/ecir2019_axiomatic/run_batch.py --collection wt10g --models bm25 ql f2exp --n 32 --run --plot
+  
+python src/main/python/ecir2019_axiomatic/run_batch.py --collection gov2 --models bm25 ql f2exp --n 32 --run --plot
+  
+python src/main/python/ecir2019_axiomatic/run_batch.py --collection cw09b --models bm25 ql f2exp --n 32 --run --plot
+  
+python src/main/python/ecir2019_axiomatic/run_batch.py --collection cw12b13 --models bm25 ql f2exp --n 32 --run --plot
+  
+python src/main/python/ecir2019_axiomatic/run_batch.py --collection mb11 --models bm25 ql f2exp --n 32 --run --plot
+  
+python src/main/python/ecir2019_axiomatic/run_batch.py --collection mb13 --models bm25 ql f2exp --n 32 --run --plot
+  
+```
diff --git a/src/main/python/ecir2019_axiomatic/__init__.py b/src/main/python/ecir2019_axiomatic/__init__.py
@@ -0,0 +1 @@
+  
+
diff --git a/src/main/python/ecir2019_axiomatic/effectiveness.py b/src/main/python/ecir2019_axiomatic/effectiveness.py
@@ -0,0 +1,94 @@
+  
+# -*- coding: utf-8 -*-
+  
+#
+  
+# Anserini: A toolkit for reproducible information retrieval research built on Lucene
+  
+#
+  
+# Licensed under the Apache License, Version 2.0 (the "License");
+  
+# you may not use this file except in compliance with the License.
+  
+# You may obtain a copy of the License at
+  
+#
+  
+# http://www.apache.org/licenses/LICENSE-2.0
+  
+#
+  
+# Unless required by applicable law or agreed to in writing, software
+  
+# distributed under the License is distributed on an "AS IS" BASIS,
+  
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  
+# See the License for the specific language governing permissions and
+  
+# limitations under the License.
+  
+
+  
+import os
+  
+import json
+  
+import ast
+  
+from operator import itemgetter
+  
+from inspect import currentframe, getframeinfo
+  
+import logging
+  
+
+  
+logging.basicConfig()
+  
+
+  
+class Effectiveness(object):
+  
+    """Handles the effectiveness.
+  
+    
+  
+    For example, get all the effectiveness of one method (has multiple parameters).
+  
+    When constructing, pass the index path."""
+  
+    def __init__(self, index_path):
+  
+        self.logger = logging.getLogger('effectiveness.Effectiveness')
+  
+        self.index_path = os.path.abspath(index_path)
+  
+        if not os.path.exists(self.index_path):
+  
+            frameinfo = getframeinfo(currentframe())
+  
+            self.logger.error(frameinfo.filename, frameinfo.lineno)
+  
+            self.logger.error('[Effectiveness Constructor]:Please provide a valid index path - ' + self.index_path)
+  
+            exit(1)
+  
+
+  
+        self.run_files_root = 'run_files'
+  
+        self.eval_files_root = 'eval_files'
+  
+        self.effectiveness_root = 'effectiveness_files'
+  
+
+  
+    def output_effectiveness(self, output_root):
+  
+        if not os.path.exists(os.path.join(output_root, self.effectiveness_root)):
+  
+            os.makedirs(os.path.join(output_root, self.effectiveness_root))
+  
+        all_params = []
+  
+        all_results = {}
+  
+        for metric_dir in os.listdir(os.path.join(output_root, self.eval_files_root)):
+  
+            for fn in os.listdir(os.path.join(output_root, self.eval_files_root, metric_dir)):
+  
+                if len(fn.split('_')) == 3:
+  
+                    basemodel, model, model_params = fn.split('_')
+  
+                elif len(fn.split('_')) == 2:
+  
+                    basemodel, model = fn.split('_')
+  
+                eval_res = self.read_eval_file(os.path.join(output_root, self.eval_files_root, metric_dir, fn))
+  
+                for metric in eval_res:
+  
+                    if metric not in all_results:
+  
+                        all_results[metric] = {}
+  
+                    if basemodel not in all_results[metric]:
+  
+                        all_results[metric][basemodel] = []
+  
+                    all_results[metric][basemodel].append(eval_res[metric]['all'])
+  
+
+  
+        for metric in all_results:
+  
+            with open(os.path.join(output_root, self.effectiveness_root, 'axiom_paras_sensitivity_%s.csv' % metric), 'w') as f:
+  
+                for basemodel in all_results[metric]:
+  
+                    all_results[metric][basemodel].sort(key = itemgetter(0))
+  
+                    for ele in all_results[metric][basemodel]:
+  
+                        f.write('%s,%.1f,%.4f\n' % (basemodel, ele[0], ele[1]))
+  
+
+  
+    def read_eval_file(self, fn):
+  
+        """return {qid: {metric: [(value, para), ...]}}"""
+  
+        split_fn = os.path.basename(fn).split('_')
+  
+        params = split_fn[-1] if len(split_fn) == 3 else ''
+  
+        res = {}
+  
+        with open(fn) as _in:
+  
+            for line in _in:
+  
+                line = line.strip()
+  
+                if line:
+  
+                    row = line.split()
+  
+                    metric = row[0]
+  
+                    qid = row[1]
+  
+                    try:
+  
+                        value = ast.literal_eval(row[2])
+  
+                    except:
+  
+                        continue
+  
+                    if metric not in res:
+  
+                        res[metric] = {}
+  
+                    for param in params.split(','):
+  
+                        if 'axiom.beta' in param:
+  
+                            beta = float(param.split(':')[1])
+  
+                            res[metric][qid] = (beta, value)
+  
+                    if split_fn[1] == 'baseline': # baseline
+  
+                        res[metric][qid] = (-1, value)
+  
+        return res
diff --git a/src/main/python/ecir2019_axiomatic/evaluation.py b/src/main/python/ecir2019_axiomatic/evaluation.py
@@ -0,0 +1,80 @@
+  
+# -*- coding: utf-8 -*-
+  
+#
+  
+# Anserini: A toolkit for reproducible information retrieval research built on Lucene
+  
+#
+  
+# Licensed under the Apache License, Version 2.0 (the "License");
+  
+# you may not use this file except in compliance with the License.
+  
+# You may obtain a copy of the License at
+  
+#
+  
+# http://www.apache.org/licenses/LICENSE-2.0
+  
+#
+  
+# Unless required by applicable law or agreed to in writing, software
+  
+# distributed under the License is distributed on an "AS IS" BASIS,
+  
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  
+# See the License for the specific language governing permissions and
+  
+# limitations under the License.
+  
+
+  
+import os
+  
+from inspect import currentframe, getframeinfo
+  
+from subprocess import Popen, PIPE
+  
+import logging
+  
+
+  
+logging.basicConfig()
+  
+class Evaluation(object):
+  
+    """Get the evaluation of a corpus for a result."""
+  
+    def __init__(self, index_path):
+  
+        self.logger = logging.getLogger('evalation.Evaluation')
+  
+        self.index_path = os.path.abspath(index_path)
+  
+        if not os.path.exists(self.index_path):
+  
+            frameinfo = getframeinfo(currentframe())
+  
+            self.logger.error(frameinfo.filename, frameinfo.lineno)
+  
+            self.logger.error('[Search Constructor]:Please provide a valid index path - ' + self.index_path)
+  
+            exit(1)
+  
+
+  
+        self.run_files_root = 'run_files'
+  
+        self.eval_files_root = 'eval_files'
+  
+
+  
+    def gen_batch_eval_params(self, output_root, metric):
+  
+        if not os.path.exists(os.path.join(output_root, self.eval_files_root, metric)):
+  
+            os.makedirs(os.path.join(output_root, self.eval_files_root, metric))
+  
+        all_params = []
+  
+        for fn in os.listdir(os.path.join(output_root, self.run_files_root)):
+  
+            if not os.path.exists( os.path.join(output_root, self.eval_files_root, metric, fn) ):
+  
+                all_params.append((
+  
+                    os.path.join(output_root, self.run_files_root, fn),
+  
+                    os.path.join(output_root, self.eval_files_root, metric, fn)
+  
+                ))
+  
+        return all_params
+  
+
+  
+
+  
+    @classmethod
+  
+    def output_all_evaluations(self, qrel_programs, qrel_file_path, result_file_path, output_path):
+  
+        """Returns various effectiveness figures.
+  
+
+  
+        @Return: a dict of all performances 
+  
+        """
+  
+        for i, qrel_program in enumerate(qrel_programs):
+  
+            process = Popen(' '.join([qrel_program, qrel_file_path, result_file_path]), shell=True, stdout=PIPE)
+  
+            stdout, stderr = process.communicate()
+  
+            if process.returncode == 0:
+  
+                try:
+  
+                    if i == 0:
+  
+                        o = open( output_path, 'w')
+  
+                    else:
+  
+                        o = open( output_path, 'a')
+  
+                    if 'trec_eval' in qrel_program:
+  
+                        o.write(stdout)
+  
+                    elif 'gdeval' in qrel_program:
+  
+                        for line in stdout.split('\n')[1:-1]:
+  
+                            line = line.strip()
+  
+                            if line:
+  
+                                row = line.split(',')
+  
+                                qid = row[-3]
+  
+                                ndcg20 = row[-2]
+  
+                                err20 = row[-1]
+  
+                                o.write('ndcg20\t%s\t%s\n' % (qid if qid != 'amean' else 'all', ndcg20))
+  
+                                o.write('err20\t%s\t%s\n' % (qid if qid != 'amean' else 'all', err20))
+  
+                finally:
+  
+                    o.close()
+  
+            else:
+  
+                logger.error('ERROR when running the evaluation for:' + result_file_path)
diff --git a/src/main/python/ecir2019_axiomatic/plot_para_sensitivity.py b/src/main/python/ecir2019_axiomatic/plot_para_sensitivity.py
@@ -0,0 +1,91 @@
+  
+# -*- coding: utf-8 -*-
+  
+#
+  
+# Anserini: A toolkit for reproducible information retrieval research built on Lucene
+  
+#
+  
+# Licensed under the Apache License, Version 2.0 (the "License");
+  
+# you may not use this file except in compliance with the License.
+  
+# You may obtain a copy of the License at
+  
+#
+  
+# http://www.apache.org/licenses/LICENSE-2.0
+  
+#
+  
+# Unless required by applicable law or agreed to in writing, software
+  
+# distributed under the License is distributed on an "AS IS" BASIS,
+  
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  
+# See the License for the specific language governing permissions and
+  
+# limitations under the License.
+  
+
+  
+from __future__ import print_function
+  
+import os, sys
+  
+import csv
+  
+import logging
+  
+from operator import itemgetter
+  
+import matplotlib
+  
+
+  
+# https://stackoverflow.com/questions/37604289/tkinter-tclerror-no-display-name-and-no-display-environment-variable
+  
+if os.environ.get('DISPLAY','') == '':
+  
+    print('no display found. Using non-interactive Agg backend')
+  
+    matplotlib.use('Agg')
+  
+
+  
+import matplotlib.pyplot as plt
+  
+
+  
+plt.style.use('ggplot')
+  
+
+  
+logging.basicConfig()
+  
+
+  
+class Plots(object):
+  
+    def __init__(self):
+  
+        self.logger = logging.getLogger('plot.Plots')
+  
+        self.run_files_root = 'run_files'
+  
+        self.eval_files_root = 'eval_files'
+  
+        self.effectiveness_root = 'effectiveness_files'
+  
+        self.plots_root = 'plots'
+  
+
+  
+    def read_data(self, fn):
+  
+        all_results = {}
+  
+        with open(fn) as f:
+  
+            r = csv.reader(f)
+  
+            for row in r:
+  
+                model, beta, score = row
+  
+                if model not in all_results:
+  
+                    all_results[model] = []
+  
+                all_results[model].append((float(beta), float(score)))
+  
+        return all_results
+  
+
+  
+    def plot_params_sensitivity(self, collection, output_root):
+  
+        if not os.path.exists(os.path.join(output_root, self.plots_root)):
+  
+            os.makedirs(os.path.join(output_root, self.plots_root))
+  
+        title_mappings = {
+  
+            'disk12': 'Disk 1 & 2',
+  
+            'robust04': 'Disks 4 & 5',
+  
+            'robust05': 'AQUAINT',
+  
+            'core17': 'New York Times',
+  
+            'core18': 'Washington Post',
+  
+            'wt10g': 'WT10g',
+  
+            'gov2': 'Gov2',
+  
+            'cw09b': 'ClueWeb09b',
+  
+            'cw12b13': 'ClueWeb12-B13',
+  
+            'cw12': 'ClueWeb12',
+  
+            'mb11': 'Tweets 2011',
+  
+            'mb13': 'Tweets 2013'
+  
+        }
+  
+
+  
+        for fn in os.listdir(os.path.join(output_root, self.effectiveness_root)):
+  
+            all_results = self.read_data(os.path.join(output_root, self.effectiveness_root, fn))
+  
+            ls = ['-', '--', ':']
+  
+            colors = ['r', 'g', 'b']
+  
+            fig, ax = plt.subplots(1, 1, figsize=(6, 4))
+  
+            for (model, linestyle, color) in zip(sorted(all_results), ls, colors):
+  
+                all_results[model].sort(key = itemgetter(0))
+  
+                x = [float(ele[0]) for ele in all_results[model] if ele[0] > 0]
+  
+                y = [float(ele[1]) for ele in all_results[model] if ele[0] > 0]
+  
+                ax.plot(x, y, linestyle=linestyle, marker='o', ms=5, label=model.upper()+'+Ax', color=color)
+  
+                baseline = [float(ele[1]) for ele in all_results[model] if ele[0] < 0]
+  
+                if len(baseline) == 1:
+  
+                    ax.axhline(baseline[0], linestyle=linestyle, color=color, label=model.upper())
+  
+                ax.grid(True)
+  
+                ax.set_title(collection if collection not in title_mappings else title_mappings[collection])
+  
+                ax.set_xlabel(r'$\beta$')
+  
+                ax.set_ylabel('MAP' if not 'cw' in collection else 'NDCG@20')
+  
+                ax.legend(loc=4)
+  
+            output_fn = os.path.join(output_root, self.plots_root, 'params_sensitivity_{}.eps'.format(collection))
+  
+            plt.savefig(output_fn, bbox_inches='tight', format='eps')