Spaces:
Runtime error
Runtime error
Update config-docker.yml
Browse files- config-docker.yml +19 -8
config-docker.yml
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
version: "0.8.
|
| 2 |
|
| 3 |
corpusPath: "./resources/dataset/dataseer/corpus"
|
| 4 |
templatePath: "./resources/dataset/dataseer/crfpp-templates/dataseer.template"
|
|
@@ -8,18 +8,18 @@ tmpPath: "/opt/grobid/grobid-home/tmp/"
|
|
| 8 |
# path to Pub2TEI repository as available at https://github.com/kermitt2/Pub2TEI
|
| 9 |
pub2teiPath: "/opt/Pub2TEI/"
|
| 10 |
|
| 11 |
-
gluttonHost:
|
| 12 |
gluttonPort:
|
| 13 |
|
| 14 |
# entity-fishing server information for performing entity disambiguation
|
| 15 |
# for https, indicate 443 as port
|
| 16 |
-
entityFishingHost:
|
| 17 |
entityFishingPort: 443
|
| 18 |
#entityFishingHost: localhost
|
| 19 |
#entityFishingPort: 8090
|
| 20 |
|
| 21 |
# if true we use binary classifiers for the contexts, otherwise use a single multi-label classifier
|
| 22 |
-
# binary classifiers perform better, but
|
| 23 |
useBinaryContextClassifiers: false
|
| 24 |
|
| 25 |
# sequence labeling model (identify data-related sections)
|
|
@@ -35,7 +35,7 @@ models:
|
|
| 35 |
window: 20
|
| 36 |
nbMaxIterations: 2000
|
| 37 |
|
| 38 |
-
# classifier model, dataset binary (
|
| 39 |
- name: "dataseer-binary"
|
| 40 |
engine: "delft"
|
| 41 |
delft:
|
|
@@ -130,17 +130,18 @@ corsAllowedHeaders: "X-Requested-With,Content-Type,Accept,Origin"
|
|
| 130 |
|
| 131 |
server:
|
| 132 |
type: custom
|
| 133 |
-
idleTimeout: 120 seconds
|
| 134 |
applicationConnectors:
|
| 135 |
- type: http
|
| 136 |
port: 8060
|
|
|
|
|
|
|
| 137 |
adminConnectors:
|
| 138 |
- type: http
|
| 139 |
port: 8061
|
| 140 |
registerDefaultExceptionMappers: false
|
| 141 |
maxThreads: 2048
|
| 142 |
maxQueuedRequests: 2048
|
| 143 |
-
|
| 144 |
requestLog:
|
| 145 |
appenders: []
|
| 146 |
|
|
@@ -157,4 +158,14 @@ logging:
|
|
| 157 |
timeZone: UTC
|
| 158 |
# uncomment to have the logs in json format
|
| 159 |
#layout:
|
| 160 |
-
# type: json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version: "0.8.1"
|
| 2 |
|
| 3 |
corpusPath: "./resources/dataset/dataseer/corpus"
|
| 4 |
templatePath: "./resources/dataset/dataseer/crfpp-templates/dataseer.template"
|
|
|
|
| 8 |
# path to Pub2TEI repository as available at https://github.com/kermitt2/Pub2TEI
|
| 9 |
pub2teiPath: "/opt/Pub2TEI/"
|
| 10 |
|
| 11 |
+
gluttonHost:
|
| 12 |
gluttonPort:
|
| 13 |
|
| 14 |
# entity-fishing server information for performing entity disambiguation
|
| 15 |
# for https, indicate 443 as port
|
| 16 |
+
entityFishingHost: cloud.science-miner.com/nerd
|
| 17 |
entityFishingPort: 443
|
| 18 |
#entityFishingHost: localhost
|
| 19 |
#entityFishingPort: 8090
|
| 20 |
|
| 21 |
# if true we use binary classifiers for the contexts, otherwise use a single multi-label classifier
|
| 22 |
+
# binary classifiers perform better, but heavier to use
|
| 23 |
useBinaryContextClassifiers: false
|
| 24 |
|
| 25 |
# sequence labeling model (identify data-related sections)
|
|
|
|
| 35 |
window: 20
|
| 36 |
nbMaxIterations: 2000
|
| 37 |
|
| 38 |
+
# classifier model, dataset binary (dataset or not dataset in the current sentence)
|
| 39 |
- name: "dataseer-binary"
|
| 40 |
engine: "delft"
|
| 41 |
delft:
|
|
|
|
| 130 |
|
| 131 |
server:
|
| 132 |
type: custom
|
|
|
|
| 133 |
applicationConnectors:
|
| 134 |
- type: http
|
| 135 |
port: 8060
|
| 136 |
+
idleTimeout: 120 seconds
|
| 137 |
+
acceptQueueSize: 2048
|
| 138 |
adminConnectors:
|
| 139 |
- type: http
|
| 140 |
port: 8061
|
| 141 |
registerDefaultExceptionMappers: false
|
| 142 |
maxThreads: 2048
|
| 143 |
maxQueuedRequests: 2048
|
| 144 |
+
|
| 145 |
requestLog:
|
| 146 |
appenders: []
|
| 147 |
|
|
|
|
| 158 |
timeZone: UTC
|
| 159 |
# uncomment to have the logs in json format
|
| 160 |
#layout:
|
| 161 |
+
# type: json
|
| 162 |
+
# - type: file
|
| 163 |
+
# currentLogFilename: logs/datastet-service.log
|
| 164 |
+
# threshold: INFO
|
| 165 |
+
# archive: true
|
| 166 |
+
# archivedLogFilenamePattern: logs/datastet-service-%d.log
|
| 167 |
+
# archivedFileCount: 7
|
| 168 |
+
# timeZone: UTC
|
| 169 |
+
# uncomment to have the logs in json format
|
| 170 |
+
#layout:
|
| 171 |
+
# type: json
|