| version: "0.8.2" | |
| grobidHome: /opt/grobid/grobid-home | |
| # entity-fishing server information for performing entity disambiguation | |
| # for https, indicate 443 as port | |
| entityFishingHost: traces1.inria.fr/nerd | |
| entityFishingPort: 443 | |
| #entityFishingHost: localhost | |
| #entityFishingPort: 8090 | |
| corpusPath: resources/dataset/ | |
| tmpPath: tmp/ | |
| # path to Pub2TEI repository as available at https://github.com/kermitt2/Pub2TEI | |
| pub2teiPath: "../../Pub2TEI/" | |
| # if true we use binary classifiers for the contexts, otherwise use a single multi-label classifier | |
| # binary classifiers perform better, but havier to use | |
| useBinaryContextClassifiers: true | |
| models: | |
| - name: "software" | |
| #engine: "wapiti" | |
| engine: "delft" | |
| wapiti: | |
| # wapiti training parameters, only considered when wapiti is used as engine for the model, | |
| # these parameters are be used at training time only | |
| epsilon: 0.00001 | |
| window: 30 | |
| nbMaxIterations: 1500 | |
| delft: | |
| # deep learning parameters | |
| #architecture: "BidLSTM_CRF" | |
| #useELMo: false | |
| #embeddings_name: "glove-840B" | |
| architecture: "BERT" | |
| transformer: "allenai/scibert_scivocab_cased" | |
| #transformer: "michiyasunaga/LinkBERT-basecased" | |
| - name: "software-type" | |
| #engine: "wapiti" | |
| engine: "delft" | |
| wapiti: | |
| # wapiti training parameters, only considered when wapiti is used as engine for the model, | |
| # these parameters are be used at training time only | |
| epsilon: 0.00001 | |
| window: 30 | |
| nbMaxIterations: 1500 | |
| delft: | |
| # deep learning parameters | |
| #architecture: "BidLSTM_CRF" | |
| #useELMo: false | |
| #embeddings_name: "glove-840B" | |
| architecture: "BERT_CRF" | |
| transformer: "michiyasunaga/LinkBERT-basecased" | |
| - name: "context" | |
| # multi-label classifier for the context | |
| engine: "delft" | |
| delft: | |
| #architecture: "gru" | |
| #embeddings_name: "glove-840B" | |
| architecture: "bert" | |
| transformer: "michiyasunaga/LinkBERT-basecased" | |
| - name: "context_used" | |
| # binary classifier to predict if the context of the mention correspond to a usage of the software | |
| engine: "delft" | |
| delft: | |
| #architecture: "gru" | |
| #embeddings_name: "glove-840B" | |
| architecture: "bert" | |
| transformer: "michiyasunaga/LinkBERT-basecased" | |
| - name: "context_creation" | |
| # binary classifier to predict if the context of the mention correspond to a creation of the software | |
| engine: "delft" | |
| delft: | |
| #architecture: "gru" | |
| #embeddings_name: "glove-840B" | |
| architecture: "bert" | |
| transformer: "michiyasunaga/LinkBERT-basecased" | |
| - name: "context_shared" | |
| # binary classifier to predict if the context of the mention correspond to a sharing of the software | |
| engine: "delft" | |
| delft: | |
| #architecture: "gru" | |
| #embeddings_name: "glove-840B" | |
| architecture: "bert" | |
| transformer: "michiyasunaga/LinkBERT-basecased" | |
| # Limit the maximum number of requests | |
| maxParallelRequests: 0 | |
| # CORS configuration for the web API service | |
| corsAllowedOrigins: "*" | |
| corsAllowedMethods: "OPTIONS,GET,PUT,POST,DELETE,HEAD" | |
| corsAllowedHeaders: "X-Requested-With,Content-Type,Accept,Origin" | |
| server: | |
| type: custom | |
| applicationConnectors: | |
| - type: http | |
| port: 8060 | |
| idleTimeout: 120 seconds | |
| acceptQueueSize: 2048 | |
| adminConnectors: | |
| - type: http | |
| port: 8061 | |
| registerDefaultExceptionMappers: false | |
| maxThreads: 2048 | |
| maxQueuedRequests: 2048 | |
| # change the following for having all http requests logged | |
| requestLog: | |
| appenders: [] | |
| # these logging settings apply to the service usage mode | |
| logging: | |
| level: INFO | |
| loggers: | |
| org.apache.pdfbox.pdmodel.font.PDSimpleFont: "OFF" | |
| org.glassfish.jersey.internal: "OFF" | |
| com.squarespace.jersey2.guice.JerseyGuiceUtils: "OFF" | |
| appenders: | |
| - type: console | |
| threshold: INFO | |
| timeZone: UTC | |