Permalink
Browse files

Add mb13, robust04, and wash18 collections. (#7)

  • Loading branch information...
r-clancy committed Nov 5, 2018
1 parent 7935981 commit 34abd17d0567fa81ed8726ae656f8c5f8831bfbe
Showing with 25,412 additions and 3 deletions.
  1. +3 −0 Dockerfile
  2. +18 −3 config.json
  3. +8 −0 configsets/mb13/conf/lang/contractions_ca.txt
  4. +15 −0 configsets/mb13/conf/lang/contractions_fr.txt
  5. +5 −0 configsets/mb13/conf/lang/contractions_ga.txt
  6. +23 −0 configsets/mb13/conf/lang/contractions_it.txt
  7. +5 −0 configsets/mb13/conf/lang/hyphenations_ga.txt
  8. +6 −0 configsets/mb13/conf/lang/stemdict_nl.txt
  9. +420 −0 configsets/mb13/conf/lang/stoptags_ja.txt
  10. +125 −0 configsets/mb13/conf/lang/stopwords_ar.txt
  11. +193 −0 configsets/mb13/conf/lang/stopwords_bg.txt
  12. +220 −0 configsets/mb13/conf/lang/stopwords_ca.txt
  13. +172 −0 configsets/mb13/conf/lang/stopwords_cz.txt
  14. +110 −0 configsets/mb13/conf/lang/stopwords_da.txt
  15. +294 −0 configsets/mb13/conf/lang/stopwords_de.txt
  16. +78 −0 configsets/mb13/conf/lang/stopwords_el.txt
  17. +54 −0 configsets/mb13/conf/lang/stopwords_en.txt
  18. +356 −0 configsets/mb13/conf/lang/stopwords_es.txt
  19. +99 −0 configsets/mb13/conf/lang/stopwords_eu.txt
  20. +313 −0 configsets/mb13/conf/lang/stopwords_fa.txt
  21. +97 −0 configsets/mb13/conf/lang/stopwords_fi.txt
  22. +186 −0 configsets/mb13/conf/lang/stopwords_fr.txt
  23. +110 −0 configsets/mb13/conf/lang/stopwords_ga.txt
  24. +161 −0 configsets/mb13/conf/lang/stopwords_gl.txt
  25. +235 −0 configsets/mb13/conf/lang/stopwords_hi.txt
  26. +211 −0 configsets/mb13/conf/lang/stopwords_hu.txt
  27. +46 −0 configsets/mb13/conf/lang/stopwords_hy.txt
  28. +359 −0 configsets/mb13/conf/lang/stopwords_id.txt
  29. +303 −0 configsets/mb13/conf/lang/stopwords_it.txt
  30. +127 −0 configsets/mb13/conf/lang/stopwords_ja.txt
  31. +172 −0 configsets/mb13/conf/lang/stopwords_lv.txt
  32. +119 −0 configsets/mb13/conf/lang/stopwords_nl.txt
  33. +194 −0 configsets/mb13/conf/lang/stopwords_no.txt
  34. +253 −0 configsets/mb13/conf/lang/stopwords_pt.txt
  35. +233 −0 configsets/mb13/conf/lang/stopwords_ro.txt
  36. +243 −0 configsets/mb13/conf/lang/stopwords_ru.txt
  37. +133 −0 configsets/mb13/conf/lang/stopwords_sv.txt
  38. +119 −0 configsets/mb13/conf/lang/stopwords_th.txt
  39. +212 −0 configsets/mb13/conf/lang/stopwords_tr.txt
  40. +29 −0 configsets/mb13/conf/lang/userdict_ja.txt
  41. +986 −0 configsets/mb13/conf/managed-schema
  42. +20 −0 configsets/mb13/conf/params.json
  43. +21 −0 configsets/mb13/conf/protwords.txt
  44. +1,367 −0 configsets/mb13/conf/solrconfig.xml
  45. +14 −0 configsets/mb13/conf/stopwords.txt
  46. +29 −0 configsets/mb13/conf/synonyms.txt
  47. +8 −0 configsets/robust04/conf/lang/contractions_ca.txt
  48. +15 −0 configsets/robust04/conf/lang/contractions_fr.txt
  49. +5 −0 configsets/robust04/conf/lang/contractions_ga.txt
  50. +23 −0 configsets/robust04/conf/lang/contractions_it.txt
  51. +5 −0 configsets/robust04/conf/lang/hyphenations_ga.txt
  52. +6 −0 configsets/robust04/conf/lang/stemdict_nl.txt
  53. +420 −0 configsets/robust04/conf/lang/stoptags_ja.txt
  54. +125 −0 configsets/robust04/conf/lang/stopwords_ar.txt
  55. +193 −0 configsets/robust04/conf/lang/stopwords_bg.txt
  56. +220 −0 configsets/robust04/conf/lang/stopwords_ca.txt
  57. +172 −0 configsets/robust04/conf/lang/stopwords_cz.txt
  58. +110 −0 configsets/robust04/conf/lang/stopwords_da.txt
  59. +294 −0 configsets/robust04/conf/lang/stopwords_de.txt
  60. +78 −0 configsets/robust04/conf/lang/stopwords_el.txt
  61. +54 −0 configsets/robust04/conf/lang/stopwords_en.txt
  62. +356 −0 configsets/robust04/conf/lang/stopwords_es.txt
  63. +99 −0 configsets/robust04/conf/lang/stopwords_eu.txt
  64. +313 −0 configsets/robust04/conf/lang/stopwords_fa.txt
  65. +97 −0 configsets/robust04/conf/lang/stopwords_fi.txt
  66. +186 −0 configsets/robust04/conf/lang/stopwords_fr.txt
  67. +110 −0 configsets/robust04/conf/lang/stopwords_ga.txt
  68. +161 −0 configsets/robust04/conf/lang/stopwords_gl.txt
  69. +235 −0 configsets/robust04/conf/lang/stopwords_hi.txt
  70. +211 −0 configsets/robust04/conf/lang/stopwords_hu.txt
  71. +46 −0 configsets/robust04/conf/lang/stopwords_hy.txt
  72. +359 −0 configsets/robust04/conf/lang/stopwords_id.txt
  73. +303 −0 configsets/robust04/conf/lang/stopwords_it.txt
  74. +127 −0 configsets/robust04/conf/lang/stopwords_ja.txt
  75. +172 −0 configsets/robust04/conf/lang/stopwords_lv.txt
  76. +119 −0 configsets/robust04/conf/lang/stopwords_nl.txt
  77. +194 −0 configsets/robust04/conf/lang/stopwords_no.txt
  78. +253 −0 configsets/robust04/conf/lang/stopwords_pt.txt
  79. +233 −0 configsets/robust04/conf/lang/stopwords_ro.txt
  80. +243 −0 configsets/robust04/conf/lang/stopwords_ru.txt
  81. +133 −0 configsets/robust04/conf/lang/stopwords_sv.txt
  82. +119 −0 configsets/robust04/conf/lang/stopwords_th.txt
  83. +212 −0 configsets/robust04/conf/lang/stopwords_tr.txt
  84. +29 −0 configsets/robust04/conf/lang/userdict_ja.txt
  85. +972 −0 configsets/robust04/conf/managed-schema
  86. +20 −0 configsets/robust04/conf/params.json
  87. +21 −0 configsets/robust04/conf/protwords.txt
  88. +1,364 −0 configsets/robust04/conf/solrconfig.xml
  89. +14 −0 configsets/robust04/conf/stopwords.txt
  90. +29 −0 configsets/robust04/conf/synonyms.txt
  91. +8 −0 configsets/wash18/conf/lang/contractions_ca.txt
  92. +15 −0 configsets/wash18/conf/lang/contractions_fr.txt
  93. +5 −0 configsets/wash18/conf/lang/contractions_ga.txt
  94. +23 −0 configsets/wash18/conf/lang/contractions_it.txt
  95. +5 −0 configsets/wash18/conf/lang/hyphenations_ga.txt
  96. +6 −0 configsets/wash18/conf/lang/stemdict_nl.txt
  97. +420 −0 configsets/wash18/conf/lang/stoptags_ja.txt
  98. +125 −0 configsets/wash18/conf/lang/stopwords_ar.txt
  99. +193 −0 configsets/wash18/conf/lang/stopwords_bg.txt
  100. +220 −0 configsets/wash18/conf/lang/stopwords_ca.txt
  101. +172 −0 configsets/wash18/conf/lang/stopwords_cz.txt
  102. +110 −0 configsets/wash18/conf/lang/stopwords_da.txt
  103. +294 −0 configsets/wash18/conf/lang/stopwords_de.txt
  104. +78 −0 configsets/wash18/conf/lang/stopwords_el.txt
  105. +54 −0 configsets/wash18/conf/lang/stopwords_en.txt
  106. +356 −0 configsets/wash18/conf/lang/stopwords_es.txt
  107. +99 −0 configsets/wash18/conf/lang/stopwords_eu.txt
  108. +313 −0 configsets/wash18/conf/lang/stopwords_fa.txt
  109. +97 −0 configsets/wash18/conf/lang/stopwords_fi.txt
  110. +186 −0 configsets/wash18/conf/lang/stopwords_fr.txt
  111. +110 −0 configsets/wash18/conf/lang/stopwords_ga.txt
  112. +161 −0 configsets/wash18/conf/lang/stopwords_gl.txt
  113. +235 −0 configsets/wash18/conf/lang/stopwords_hi.txt
  114. +211 −0 configsets/wash18/conf/lang/stopwords_hu.txt
  115. +46 −0 configsets/wash18/conf/lang/stopwords_hy.txt
  116. +359 −0 configsets/wash18/conf/lang/stopwords_id.txt
  117. +303 −0 configsets/wash18/conf/lang/stopwords_it.txt
  118. +127 −0 configsets/wash18/conf/lang/stopwords_ja.txt
  119. +172 −0 configsets/wash18/conf/lang/stopwords_lv.txt
  120. +119 −0 configsets/wash18/conf/lang/stopwords_nl.txt
  121. +194 −0 configsets/wash18/conf/lang/stopwords_no.txt
  122. +253 −0 configsets/wash18/conf/lang/stopwords_pt.txt
  123. +233 −0 configsets/wash18/conf/lang/stopwords_ro.txt
  124. +243 −0 configsets/wash18/conf/lang/stopwords_ru.txt
  125. +133 −0 configsets/wash18/conf/lang/stopwords_sv.txt
  126. +119 −0 configsets/wash18/conf/lang/stopwords_th.txt
  127. +212 −0 configsets/wash18/conf/lang/stopwords_tr.txt
  128. +29 −0 configsets/wash18/conf/lang/userdict_ja.txt
  129. +972 −0 configsets/wash18/conf/managed-schema
  130. +20 −0 configsets/wash18/conf/params.json
  131. +21 −0 configsets/wash18/conf/protwords.txt
  132. +1,364 −0 configsets/wash18/conf/solrconfig.xml
  133. +14 −0 configsets/wash18/conf/stopwords.txt
  134. +29 −0 configsets/wash18/conf/synonyms.txt
@@ -21,6 +21,9 @@ COPY --chown=solr configsets/. server/solr/configsets
# Create cores
RUN precreate-core core17 server/solr/configsets/core17
RUN precreate-core mb11 server/solr/configsets/mb11
RUN precreate-core mb13 server/solr/configsets/mb13
RUN precreate-core robust04 server/solr/configsets/robust04
RUN precreate-core wash18 server/solr/configsets/wash18

# Start the server.
CMD solr-foreground
@@ -4,13 +4,28 @@
"indexes": [
{
"name": "core17",
"index_path": "../index/lucene-index.core17.pos+docvectors+rawdocs",
"index_path": "/tuna1/indexes/lucene-index.core17.pos+docvectors+rawdocs",
"config_path": "configsets/core17"
},
{
"name": "mb11",
"index_path": "../index/lucene-index.mb11.pos+docvectors+rawdocs",
"index_path": "/tuna1/indexes/lucene-index.mb11.pos+docvectors+rawdocs",
"config_path": "configsets/mb11"
},
{
"name": "mb13",
"index_path": "/tuna1/indexes/lucene-index.mb13.pos+docvectors+rawdocs",
"config_path": "configsets/mb13"
},
{
"name": "robust04",
"index_path": "/tuna1/indexes/lucene-index.robust04.pos+docvectors+rawdocs",
"config_path": "configsets/robust04"
},
{
"name": "wash18",
"index_path": "/tuna1/indexes/lucene-index.wash18.pos+docvectors+rawdocs",
"config_path": "configsets/wash18"
}
]
}
}
@@ -0,0 +1,8 @@
# Set of Catalan contractions for ElisionFilter
# TODO: load this as a resource from the analyzer and sync it in build.xml
d
l
m
n
s
t
@@ -0,0 +1,15 @@
# Set of French contractions for ElisionFilter
# TODO: load this as a resource from the analyzer and sync it in build.xml
l
m
t
qu
n
s
j
d
c
jusqu
quoiqu
lorsqu
puisqu
@@ -0,0 +1,5 @@
# Set of Irish contractions for ElisionFilter
# TODO: load this as a resource from the analyzer and sync it in build.xml
d
m
b
@@ -0,0 +1,23 @@
# Set of Italian contractions for ElisionFilter
# TODO: load this as a resource from the analyzer and sync it in build.xml
c
l
all
dall
dell
nell
sull
coll
pell
gl
agl
dagl
degl
negl
sugl
un
m
t
s
v
d
@@ -0,0 +1,5 @@
# Set of Irish hyphenations for StopFilter
# TODO: load this as a resource from the analyzer and sync it in build.xml
h
n
t
@@ -0,0 +1,6 @@
# Set of overrides for the dutch stemmer
# TODO: load this as a resource from the analyzer and sync it in build.xml
fiets fiets
bromfiets bromfiets
ei eier
kind kinder
Oops, something went wrong.

0 comments on commit 34abd17

Please sign in to comment.