softcite-software-mentions / config-docker.yml
lfoppiano's picture
Update config-docker.yml
57e11a5 verified
version: "0.8.2"
grobidHome: /opt/grobid/grobid-home
# entity-fishing server information for performing entity disambiguation
# for https, indicate 443 as port
entityFishingHost: traces1.inria.fr/nerd
entityFishingPort: 443
#entityFishingHost: localhost
#entityFishingPort: 8090
corpusPath: resources/dataset/
tmpPath: tmp/
# path to Pub2TEI repository as available at https://github.com/kermitt2/Pub2TEI
pub2teiPath: "../../Pub2TEI/"
# if true we use binary classifiers for the contexts, otherwise use a single multi-label classifier
# binary classifiers perform better, but havier to use
useBinaryContextClassifiers: true
models:
- name: "software"
#engine: "wapiti"
engine: "delft"
wapiti:
# wapiti training parameters, only considered when wapiti is used as engine for the model,
# these parameters are be used at training time only
epsilon: 0.00001
window: 30
nbMaxIterations: 1500
delft:
# deep learning parameters
#architecture: "BidLSTM_CRF"
#useELMo: false
#embeddings_name: "glove-840B"
architecture: "BERT"
transformer: "allenai/scibert_scivocab_cased"
#transformer: "michiyasunaga/LinkBERT-basecased"
- name: "software-type"
#engine: "wapiti"
engine: "delft"
wapiti:
# wapiti training parameters, only considered when wapiti is used as engine for the model,
# these parameters are be used at training time only
epsilon: 0.00001
window: 30
nbMaxIterations: 1500
delft:
# deep learning parameters
#architecture: "BidLSTM_CRF"
#useELMo: false
#embeddings_name: "glove-840B"
architecture: "BERT_CRF"
transformer: "michiyasunaga/LinkBERT-basecased"
- name: "context"
# multi-label classifier for the context
engine: "delft"
delft:
#architecture: "gru"
#embeddings_name: "glove-840B"
architecture: "bert"
transformer: "michiyasunaga/LinkBERT-basecased"
- name: "context_used"
# binary classifier to predict if the context of the mention correspond to a usage of the software
engine: "delft"
delft:
#architecture: "gru"
#embeddings_name: "glove-840B"
architecture: "bert"
transformer: "michiyasunaga/LinkBERT-basecased"
- name: "context_creation"
# binary classifier to predict if the context of the mention correspond to a creation of the software
engine: "delft"
delft:
#architecture: "gru"
#embeddings_name: "glove-840B"
architecture: "bert"
transformer: "michiyasunaga/LinkBERT-basecased"
- name: "context_shared"
# binary classifier to predict if the context of the mention correspond to a sharing of the software
engine: "delft"
delft:
#architecture: "gru"
#embeddings_name: "glove-840B"
architecture: "bert"
transformer: "michiyasunaga/LinkBERT-basecased"
# Limit the maximum number of requests
maxParallelRequests: 0
# CORS configuration for the web API service
corsAllowedOrigins: "*"
corsAllowedMethods: "OPTIONS,GET,PUT,POST,DELETE,HEAD"
corsAllowedHeaders: "X-Requested-With,Content-Type,Accept,Origin"
server:
type: custom
applicationConnectors:
- type: http
port: 8060
idleTimeout: 120 seconds
acceptQueueSize: 2048
adminConnectors:
- type: http
port: 8061
registerDefaultExceptionMappers: false
maxThreads: 2048
maxQueuedRequests: 2048
# change the following for having all http requests logged
requestLog:
appenders: []
# these logging settings apply to the service usage mode
logging:
level: INFO
loggers:
org.apache.pdfbox.pdmodel.font.PDSimpleFont: "OFF"
org.glassfish.jersey.internal: "OFF"
com.squarespace.jersey2.guice.JerseyGuiceUtils: "OFF"
appenders:
- type: console
threshold: INFO
timeZone: UTC