version: "0.8.2" grobidHome: /opt/grobid/grobid-home # entity-fishing server information for performing entity disambiguation # for https, indicate 443 as port entityFishingHost: traces1.inria.fr/nerd entityFishingPort: 443 #entityFishingHost: localhost #entityFishingPort: 8090 corpusPath: resources/dataset/ tmpPath: tmp/ # path to Pub2TEI repository as available at https://github.com/kermitt2/Pub2TEI pub2teiPath: "../../Pub2TEI/" # if true we use binary classifiers for the contexts, otherwise use a single multi-label classifier # binary classifiers perform better, but havier to use useBinaryContextClassifiers: true models: - name: "software" #engine: "wapiti" engine: "delft" wapiti: # wapiti training parameters, only considered when wapiti is used as engine for the model, # these parameters are be used at training time only epsilon: 0.00001 window: 30 nbMaxIterations: 1500 delft: # deep learning parameters #architecture: "BidLSTM_CRF" #useELMo: false #embeddings_name: "glove-840B" architecture: "BERT" transformer: "allenai/scibert_scivocab_cased" #transformer: "michiyasunaga/LinkBERT-basecased" - name: "software-type" #engine: "wapiti" engine: "delft" wapiti: # wapiti training parameters, only considered when wapiti is used as engine for the model, # these parameters are be used at training time only epsilon: 0.00001 window: 30 nbMaxIterations: 1500 delft: # deep learning parameters #architecture: "BidLSTM_CRF" #useELMo: false #embeddings_name: "glove-840B" architecture: "BERT_CRF" transformer: "michiyasunaga/LinkBERT-basecased" - name: "context" # multi-label classifier for the context engine: "delft" delft: #architecture: "gru" #embeddings_name: "glove-840B" architecture: "bert" transformer: "michiyasunaga/LinkBERT-basecased" - name: "context_used" # binary classifier to predict if the context of the mention correspond to a usage of the software engine: "delft" delft: #architecture: "gru" #embeddings_name: "glove-840B" architecture: "bert" transformer: "michiyasunaga/LinkBERT-basecased" - name: "context_creation" # binary classifier to predict if the context of the mention correspond to a creation of the software engine: "delft" delft: #architecture: "gru" #embeddings_name: "glove-840B" architecture: "bert" transformer: "michiyasunaga/LinkBERT-basecased" - name: "context_shared" # binary classifier to predict if the context of the mention correspond to a sharing of the software engine: "delft" delft: #architecture: "gru" #embeddings_name: "glove-840B" architecture: "bert" transformer: "michiyasunaga/LinkBERT-basecased" # Limit the maximum number of requests maxParallelRequests: 0 # CORS configuration for the web API service corsAllowedOrigins: "*" corsAllowedMethods: "OPTIONS,GET,PUT,POST,DELETE,HEAD" corsAllowedHeaders: "X-Requested-With,Content-Type,Accept,Origin" server: type: custom applicationConnectors: - type: http port: 8060 idleTimeout: 120 seconds acceptQueueSize: 2048 adminConnectors: - type: http port: 8061 registerDefaultExceptionMappers: false maxThreads: 2048 maxQueuedRequests: 2048 # change the following for having all http requests logged requestLog: appenders: [] # these logging settings apply to the service usage mode logging: level: INFO loggers: org.apache.pdfbox.pdmodel.font.PDSimpleFont: "OFF" org.glassfish.jersey.internal: "OFF" com.squarespace.jersey2.guice.JerseyGuiceUtils: "OFF" appenders: - type: console threshold: INFO timeZone: UTC