File size: 3,861 Bytes
05299e9
e2cd6d9
 
 
 
 
57e11a5
 
e2cd6d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b472a03
 
e2cd6d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b472a03
 
e2cd6d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2055f66
 
e2cd6d9
 
 
 
 
 
2055f66
e2cd6d9
 
 
 
 
 
 
 
 
 
 
 
 
2055f66
e2cd6d9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
version: "0.8.2"

grobidHome: /opt/grobid/grobid-home

# entity-fishing server information for performing entity disambiguation
# for https, indicate 443 as port
entityFishingHost: traces1.inria.fr/nerd
entityFishingPort: 443
#entityFishingHost: localhost
#entityFishingPort: 8090

corpusPath: resources/dataset/
tmpPath: tmp/

# path to Pub2TEI repository as available at https://github.com/kermitt2/Pub2TEI
pub2teiPath: "../../Pub2TEI/"

# if true we use binary classifiers for the contexts, otherwise use a single multi-label classifier
# binary classifiers perform better, but havier to use
useBinaryContextClassifiers: true

models:
  - name: "software"
    #engine: "wapiti"
    engine: "delft"
    wapiti:
      # wapiti training parameters, only considered when wapiti is used as engine for the model, 
      # these parameters are be used at training time only
      epsilon: 0.00001
      window: 30
      nbMaxIterations: 1500
    delft:
      # deep learning parameters
      #architecture: "BidLSTM_CRF"
      #useELMo: false
      #embeddings_name: "glove-840B"
      architecture: "BERT"
      transformer: "allenai/scibert_scivocab_cased"
      #transformer: "michiyasunaga/LinkBERT-basecased"

  - name: "software-type"
    #engine: "wapiti"
    engine: "delft"
    wapiti:
      # wapiti training parameters, only considered when wapiti is used as engine for the model, 
      # these parameters are be used at training time only
      epsilon: 0.00001
      window: 30
      nbMaxIterations: 1500
    delft:
      # deep learning parameters
      #architecture: "BidLSTM_CRF"
      #useELMo: false
      #embeddings_name: "glove-840B"
      architecture: "BERT_CRF"
      transformer: "michiyasunaga/LinkBERT-basecased"

  - name: "context"
    # multi-label classifier for the context 
    engine: "delft"
    delft:
      #architecture: "gru"
      #embeddings_name: "glove-840B"
      architecture: "bert"
      transformer: "michiyasunaga/LinkBERT-basecased"

  - name: "context_used"
    # binary classifier to predict if the context of the mention correspond to a usage of the software 
    engine: "delft"
    delft:
      #architecture: "gru"
      #embeddings_name: "glove-840B"
      architecture: "bert"
      transformer: "michiyasunaga/LinkBERT-basecased"

  - name: "context_creation"
    # binary classifier to predict if the context of the mention correspond to a creation of the software 
    engine: "delft"
    delft:
      #architecture: "gru"
      #embeddings_name: "glove-840B"
      architecture: "bert"
      transformer: "michiyasunaga/LinkBERT-basecased"

  - name: "context_shared"
    # binary classifier to predict if the context of the mention correspond to a sharing of the software 
    engine: "delft"
    delft:
      #architecture: "gru"
      #embeddings_name: "glove-840B"
      architecture: "bert"
      transformer: "michiyasunaga/LinkBERT-basecased"

# Limit the maximum number of requests
maxParallelRequests: 0

# CORS configuration for the web API service
corsAllowedOrigins: "*"
corsAllowedMethods: "OPTIONS,GET,PUT,POST,DELETE,HEAD"
corsAllowedHeaders: "X-Requested-With,Content-Type,Accept,Origin"

server:
  type: custom
  applicationConnectors:
    - type: http
      port: 8060
      idleTimeout: 120 seconds
      acceptQueueSize: 2048
  adminConnectors:
    - type: http
      port: 8061
  registerDefaultExceptionMappers: false
  maxThreads: 2048
  maxQueuedRequests: 2048

  # change the following for having all http requests logged
  requestLog:
    appenders: []

# these logging settings apply to the service usage mode
logging:
  level: INFO
  loggers:
    org.apache.pdfbox.pdmodel.font.PDSimpleFont: "OFF"
    org.glassfish.jersey.internal: "OFF"
    com.squarespace.jersey2.guice.JerseyGuiceUtils: "OFF"
  appenders:
    - type: console
      threshold: INFO
      timeZone: UTC