softcite-software-mentions

Sleeping

File size: 3,861 Bytes

version: "0.8.2"

grobidHome: /opt/grobid/grobid-home

# entity-fishing server information for performing entity disambiguation
# for https, indicate 443 as port
entityFishingHost: traces1.inria.fr/nerd
entityFishingPort: 443
#entityFishingHost: localhost
#entityFishingPort: 8090

corpusPath: resources/dataset/
tmpPath: tmp/

# path to Pub2TEI repository as available at https://github.com/kermitt2/Pub2TEI
pub2teiPath: "../../Pub2TEI/"

# if true we use binary classifiers for the contexts, otherwise use a single multi-label classifier
# binary classifiers perform better, but havier to use
useBinaryContextClassifiers: true

models:
  - name: "software"
    #engine: "wapiti"
    engine: "delft"
    wapiti:
      # wapiti training parameters, only considered when wapiti is used as engine for the model, 
      # these parameters are be used at training time only
      epsilon: 0.00001
      window: 30
      nbMaxIterations: 1500
    delft:
      # deep learning parameters
      #architecture: "BidLSTM_CRF"
      #useELMo: false
      #embeddings_name: "glove-840B"
      architecture: "BERT"
      transformer: "allenai/scibert_scivocab_cased"
      #transformer: "michiyasunaga/LinkBERT-basecased"

  - name: "software-type"
    #engine: "wapiti"
    engine: "delft"
    wapiti:
      # wapiti training parameters, only considered when wapiti is used as engine for the model, 
      # these parameters are be used at training time only
      epsilon: 0.00001
      window: 30
      nbMaxIterations: 1500
    delft:
      # deep learning parameters
      #architecture: "BidLSTM_CRF"
      #useELMo: false
      #embeddings_name: "glove-840B"
      architecture: "BERT_CRF"
      transformer: "michiyasunaga/LinkBERT-basecased"

  - name: "context"
    # multi-label classifier for the context 
    engine: "delft"
    delft:
      #architecture: "gru"
      #embeddings_name: "glove-840B"
      architecture: "bert"
      transformer: "michiyasunaga/LinkBERT-basecased"

  - name: "context_used"
    # binary classifier to predict if the context of the mention correspond to a usage of the software 
    engine: "delft"
    delft:
      #architecture: "gru"
      #embeddings_name: "glove-840B"
      architecture: "bert"
      transformer: "michiyasunaga/LinkBERT-basecased"

  - name: "context_creation"
    # binary classifier to predict if the context of the mention correspond to a creation of the software 
    engine: "delft"
    delft:
      #architecture: "gru"
      #embeddings_name: "glove-840B"
      architecture: "bert"
      transformer: "michiyasunaga/LinkBERT-basecased"

  - name: "context_shared"
    # binary classifier to predict if the context of the mention correspond to a sharing of the software 
    engine: "delft"
    delft:
      #architecture: "gru"
      #embeddings_name: "glove-840B"
      architecture: "bert"
      transformer: "michiyasunaga/LinkBERT-basecased"

# Limit the maximum number of requests
maxParallelRequests: 0

# CORS configuration for the web API service
corsAllowedOrigins: "*"
corsAllowedMethods: "OPTIONS,GET,PUT,POST,DELETE,HEAD"
corsAllowedHeaders: "X-Requested-With,Content-Type,Accept,Origin"

server:
  type: custom
  applicationConnectors:
    - type: http
      port: 8060
      idleTimeout: 120 seconds
      acceptQueueSize: 2048
  adminConnectors:
    - type: http
      port: 8061
  registerDefaultExceptionMappers: false
  maxThreads: 2048
  maxQueuedRequests: 2048

  # change the following for having all http requests logged
  requestLog:
    appenders: []

# these logging settings apply to the service usage mode
logging:
  level: INFO
  loggers:
    org.apache.pdfbox.pdmodel.font.PDSimpleFont: "OFF"
    org.glassfish.jersey.internal: "OFF"
    com.squarespace.jersey2.guice.JerseyGuiceUtils: "OFF"
  appenders:
    - type: console
      threshold: INFO
      timeZone: UTC