Permalink
Browse files

Replace bash script with more flexible Python script (#3)

See additional discussion in #2
  • Loading branch information...
r-clancy authored and lintool committed Oct 15, 2018
1 parent c7c608c commit 7321535a1185060b6ad58feedb76b419e59f4a16
Showing with 132 additions and 39 deletions.
  1. +0 −11 .docker/load.sh
  2. +0 −17 .docker/run.sh
  3. +2 −0 .gitignore
  4. +1 −4 Dockerfile
  5. +6 −7 README.md
  6. +16 −0 config.json
  7. 0 {.docker → }/configsets/core17/conf/lang/contractions_ca.txt
  8. 0 {.docker → }/configsets/core17/conf/lang/contractions_fr.txt
  9. 0 {.docker → }/configsets/core17/conf/lang/contractions_ga.txt
  10. 0 {.docker → }/configsets/core17/conf/lang/contractions_it.txt
  11. 0 {.docker → }/configsets/core17/conf/lang/hyphenations_ga.txt
  12. 0 {.docker → }/configsets/core17/conf/lang/stemdict_nl.txt
  13. 0 {.docker → }/configsets/core17/conf/lang/stoptags_ja.txt
  14. 0 {.docker → }/configsets/core17/conf/lang/stopwords_ar.txt
  15. 0 {.docker → }/configsets/core17/conf/lang/stopwords_bg.txt
  16. 0 {.docker → }/configsets/core17/conf/lang/stopwords_ca.txt
  17. 0 {.docker → }/configsets/core17/conf/lang/stopwords_cz.txt
  18. 0 {.docker → }/configsets/core17/conf/lang/stopwords_da.txt
  19. 0 {.docker → }/configsets/core17/conf/lang/stopwords_de.txt
  20. 0 {.docker → }/configsets/core17/conf/lang/stopwords_el.txt
  21. 0 {.docker → }/configsets/core17/conf/lang/stopwords_en.txt
  22. 0 {.docker → }/configsets/core17/conf/lang/stopwords_es.txt
  23. 0 {.docker → }/configsets/core17/conf/lang/stopwords_eu.txt
  24. 0 {.docker → }/configsets/core17/conf/lang/stopwords_fa.txt
  25. 0 {.docker → }/configsets/core17/conf/lang/stopwords_fi.txt
  26. 0 {.docker → }/configsets/core17/conf/lang/stopwords_fr.txt
  27. 0 {.docker → }/configsets/core17/conf/lang/stopwords_ga.txt
  28. 0 {.docker → }/configsets/core17/conf/lang/stopwords_gl.txt
  29. 0 {.docker → }/configsets/core17/conf/lang/stopwords_hi.txt
  30. 0 {.docker → }/configsets/core17/conf/lang/stopwords_hu.txt
  31. 0 {.docker → }/configsets/core17/conf/lang/stopwords_hy.txt
  32. 0 {.docker → }/configsets/core17/conf/lang/stopwords_id.txt
  33. 0 {.docker → }/configsets/core17/conf/lang/stopwords_it.txt
  34. 0 {.docker → }/configsets/core17/conf/lang/stopwords_ja.txt
  35. 0 {.docker → }/configsets/core17/conf/lang/stopwords_lv.txt
  36. 0 {.docker → }/configsets/core17/conf/lang/stopwords_nl.txt
  37. 0 {.docker → }/configsets/core17/conf/lang/stopwords_no.txt
  38. 0 {.docker → }/configsets/core17/conf/lang/stopwords_pt.txt
  39. 0 {.docker → }/configsets/core17/conf/lang/stopwords_ro.txt
  40. 0 {.docker → }/configsets/core17/conf/lang/stopwords_ru.txt
  41. 0 {.docker → }/configsets/core17/conf/lang/stopwords_sv.txt
  42. 0 {.docker → }/configsets/core17/conf/lang/stopwords_th.txt
  43. 0 {.docker → }/configsets/core17/conf/lang/stopwords_tr.txt
  44. 0 {.docker → }/configsets/core17/conf/lang/userdict_ja.txt
  45. 0 {.docker → }/configsets/core17/conf/managed-schema
  46. 0 {.docker → }/configsets/core17/conf/params.json
  47. 0 {.docker → }/configsets/core17/conf/protwords.txt
  48. 0 {.docker → }/configsets/core17/conf/solrconfig.xml
  49. 0 {.docker → }/configsets/core17/conf/stopwords.txt
  50. 0 {.docker → }/configsets/core17/conf/synonyms.txt
  51. 0 {.docker → }/configsets/mb11/conf/lang/contractions_ca.txt
  52. 0 {.docker → }/configsets/mb11/conf/lang/contractions_fr.txt
  53. 0 {.docker → }/configsets/mb11/conf/lang/contractions_ga.txt
  54. 0 {.docker → }/configsets/mb11/conf/lang/contractions_it.txt
  55. 0 {.docker → }/configsets/mb11/conf/lang/hyphenations_ga.txt
  56. 0 {.docker → }/configsets/mb11/conf/lang/stemdict_nl.txt
  57. 0 {.docker → }/configsets/mb11/conf/lang/stoptags_ja.txt
  58. 0 {.docker → }/configsets/mb11/conf/lang/stopwords_ar.txt
  59. 0 {.docker → }/configsets/mb11/conf/lang/stopwords_bg.txt
  60. 0 {.docker → }/configsets/mb11/conf/lang/stopwords_ca.txt
  61. 0 {.docker → }/configsets/mb11/conf/lang/stopwords_cz.txt
  62. 0 {.docker → }/configsets/mb11/conf/lang/stopwords_da.txt
  63. 0 {.docker → }/configsets/mb11/conf/lang/stopwords_de.txt
  64. 0 {.docker → }/configsets/mb11/conf/lang/stopwords_el.txt
  65. 0 {.docker → }/configsets/mb11/conf/lang/stopwords_en.txt
  66. 0 {.docker → }/configsets/mb11/conf/lang/stopwords_es.txt
  67. 0 {.docker → }/configsets/mb11/conf/lang/stopwords_eu.txt
  68. 0 {.docker → }/configsets/mb11/conf/lang/stopwords_fa.txt
  69. 0 {.docker → }/configsets/mb11/conf/lang/stopwords_fi.txt
  70. 0 {.docker → }/configsets/mb11/conf/lang/stopwords_fr.txt
  71. 0 {.docker → }/configsets/mb11/conf/lang/stopwords_ga.txt
  72. 0 {.docker → }/configsets/mb11/conf/lang/stopwords_gl.txt
  73. 0 {.docker → }/configsets/mb11/conf/lang/stopwords_hi.txt
  74. 0 {.docker → }/configsets/mb11/conf/lang/stopwords_hu.txt
  75. 0 {.docker → }/configsets/mb11/conf/lang/stopwords_hy.txt
  76. 0 {.docker → }/configsets/mb11/conf/lang/stopwords_id.txt
  77. 0 {.docker → }/configsets/mb11/conf/lang/stopwords_it.txt
  78. 0 {.docker → }/configsets/mb11/conf/lang/stopwords_ja.txt
  79. 0 {.docker → }/configsets/mb11/conf/lang/stopwords_lv.txt
  80. 0 {.docker → }/configsets/mb11/conf/lang/stopwords_nl.txt
  81. 0 {.docker → }/configsets/mb11/conf/lang/stopwords_no.txt
  82. 0 {.docker → }/configsets/mb11/conf/lang/stopwords_pt.txt
  83. 0 {.docker → }/configsets/mb11/conf/lang/stopwords_ro.txt
  84. 0 {.docker → }/configsets/mb11/conf/lang/stopwords_ru.txt
  85. 0 {.docker → }/configsets/mb11/conf/lang/stopwords_sv.txt
  86. 0 {.docker → }/configsets/mb11/conf/lang/stopwords_th.txt
  87. 0 {.docker → }/configsets/mb11/conf/lang/stopwords_tr.txt
  88. 0 {.docker → }/configsets/mb11/conf/lang/userdict_ja.txt
  89. 0 {.docker → }/configsets/mb11/conf/managed-schema
  90. 0 {.docker → }/configsets/mb11/conf/params.json
  91. 0 {.docker → }/configsets/mb11/conf/protwords.txt
  92. 0 {.docker → }/configsets/mb11/conf/solrconfig.xml
  93. 0 {.docker → }/configsets/mb11/conf/stopwords.txt
  94. 0 {.docker → }/configsets/mb11/conf/synonyms.txt
  95. +107 −0 run.py

This file was deleted.

Oops, something went wrong.

This file was deleted.

Oops, something went wrong.
@@ -0,0 +1,2 @@
anserini.jar
.idea/
@@ -16,10 +16,7 @@ WORKDIR /opt/solr/
COPY --chown=solr anserini.jar lib/

# Copy the configsets.
COPY --chown=solr .docker/configsets/. server/solr/configsets

# Copy the script to symlink the index directories.
COPY --chown=solr .docker/load.sh .
COPY --chown=solr configsets/. server/solr/configsets

# Create cores
RUN precreate-core core17 server/solr/configsets/core17
@@ -7,6 +7,8 @@ Docker

In order to integrate Anserini and Solr, we'll be using [Docker](https://www.docker.com/) - make sure this is setup on your machine before continuing.

Additionally, ensure that the Docker SDK for Python is installed via `pip install docker`

Overview
========

@@ -24,10 +26,7 @@ Instructions

Build Anserini and copy the fatjar (important) artifact into the root directory of the SolrAnserini repo, changing the name to `anserini.jar`.

1. Build the Docker image for anserini-solr
- `docker build -t anserini-solr .`
2. Edit the `.docker/run.sh` file to point at the directory where your Anserini generated Lucene indices are.
3. Execute the `.docker/run.sh` file.
4. Execute the `load.sh` script within the Docker container.
- `docker exec solr ./load.sh`
5. Wait about 10 seconds and reload each core from the admin UI (`http://localhost:8983`).
1. Edit the `config.json` file to point to the index and config locations on the host machine.
2. Run the Python script to build the Docker image with index and config volumes mounted.
- `python run.py` (optionally specifying `--config <config_location>`)
3. Wait about 20 seconds and reload each core from the admin UI (`http://localhost:8983`).
@@ -0,0 +1,16 @@
{
"image_name": "solr-anserini",
"index_mount": "/",
"indexes": [
{
"name": "core17",
"index_path": "../index/lucene-index.core17.pos+docvectors+rawdocs",
"config_path": "configsets/core17"
},
{
"name": "mb11",
"index_path": "../index/lucene-index.mb11.pos+docvectors+rawdocs",
"config_path": "configsets/mb11"
}
]
}
107 run.py
@@ -0,0 +1,107 @@
import argparse
import json
import os
import time

import docker
from docker.errors import NotFound


def run(config_path):
# Load config file
with open(config_path) as file:
config = json.load(file)

# Docker API client
client = docker.from_env()

# Remove any existing image
remove_existing(client, config)

# Build the container
build_container(client, config)

# Run the container
container = run_container(client, config)

print("Sleeping for 10 seconds while Solr starts...")
time.sleep(10)

for index in config["indexes"]:
# Remove the lock file, if exists.
container.exec_run("rm -f %s" % os.path.join(config["index_mount"], index["name"], "write.lock"), user='solr')

# Remove Solr generated index data.
container.exec_run("rm -rf %s" % os.path.join("/opt/solr/server/solr/mycores", index["name"], "data/index"),
user='solr')

# Create link to data volume
container.exec_run("ln -s %s %s" % (os.path.join(config["index_mount"], index["name"]),
os.path.join("/opt/solr/server/solr/mycores", index["name"], "data/index")),
user='solr')


# Remove any existing containers
def remove_existing(client, config):
try:
container = client.containers.get(config["image_name"])
container.stop()
container.remove()
except NotFound:
pass


# Build the container
def build_container(client, config):
for line in client.api.build(path=".", tag=config["image_name"]):
print(line)


def get_volumes(config):
volumes = {}

for index in config["indexes"]:
# Path on host for index
index_path_host = os.path.join(os.getcwd(), index["index_path"])

# Path in container for index
index_path_container = os.path.join(config["index_mount"], index["name"])

# Path on host for configs
config_path_host = os.path.join(os.getcwd(), index["config_path"])

# Path in container for configs
config_path_container = os.path.join("/opt/solr/server/solr/configsets", index["name"])

# Add the binding for index paths
volumes[index_path_host] = {
"bind": index_path_container,
"mode": "rw"
}

# Add the binding for config paths
volumes[config_path_host] = {
"bind": config_path_container,
"mode": "ro"
}

return volumes


def run_container(client, config):
volumes = get_volumes(config)
return client.containers.run(config["image_name"],
detach=True,
name=config["image_name"],
ports={"8983": "8983"},
user="solr",
volumes=volumes)


if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--config", default="config.json", type=str, help="The config file location.")

args = parser.parse_args()

run(args.config)

0 comments on commit 7321535

Please sign in to comment.