File size: 3,861 Bytes
05299e9 e2cd6d9 57e11a5 e2cd6d9 b472a03 e2cd6d9 b472a03 e2cd6d9 2055f66 e2cd6d9 2055f66 e2cd6d9 2055f66 e2cd6d9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | version: "0.8.2"
grobidHome: /opt/grobid/grobid-home
# entity-fishing server information for performing entity disambiguation
# for https, indicate 443 as port
entityFishingHost: traces1.inria.fr/nerd
entityFishingPort: 443
#entityFishingHost: localhost
#entityFishingPort: 8090
corpusPath: resources/dataset/
tmpPath: tmp/
# path to Pub2TEI repository as available at https://github.com/kermitt2/Pub2TEI
pub2teiPath: "../../Pub2TEI/"
# if true we use binary classifiers for the contexts, otherwise use a single multi-label classifier
# binary classifiers perform better, but havier to use
useBinaryContextClassifiers: true
models:
- name: "software"
#engine: "wapiti"
engine: "delft"
wapiti:
# wapiti training parameters, only considered when wapiti is used as engine for the model,
# these parameters are be used at training time only
epsilon: 0.00001
window: 30
nbMaxIterations: 1500
delft:
# deep learning parameters
#architecture: "BidLSTM_CRF"
#useELMo: false
#embeddings_name: "glove-840B"
architecture: "BERT"
transformer: "allenai/scibert_scivocab_cased"
#transformer: "michiyasunaga/LinkBERT-basecased"
- name: "software-type"
#engine: "wapiti"
engine: "delft"
wapiti:
# wapiti training parameters, only considered when wapiti is used as engine for the model,
# these parameters are be used at training time only
epsilon: 0.00001
window: 30
nbMaxIterations: 1500
delft:
# deep learning parameters
#architecture: "BidLSTM_CRF"
#useELMo: false
#embeddings_name: "glove-840B"
architecture: "BERT_CRF"
transformer: "michiyasunaga/LinkBERT-basecased"
- name: "context"
# multi-label classifier for the context
engine: "delft"
delft:
#architecture: "gru"
#embeddings_name: "glove-840B"
architecture: "bert"
transformer: "michiyasunaga/LinkBERT-basecased"
- name: "context_used"
# binary classifier to predict if the context of the mention correspond to a usage of the software
engine: "delft"
delft:
#architecture: "gru"
#embeddings_name: "glove-840B"
architecture: "bert"
transformer: "michiyasunaga/LinkBERT-basecased"
- name: "context_creation"
# binary classifier to predict if the context of the mention correspond to a creation of the software
engine: "delft"
delft:
#architecture: "gru"
#embeddings_name: "glove-840B"
architecture: "bert"
transformer: "michiyasunaga/LinkBERT-basecased"
- name: "context_shared"
# binary classifier to predict if the context of the mention correspond to a sharing of the software
engine: "delft"
delft:
#architecture: "gru"
#embeddings_name: "glove-840B"
architecture: "bert"
transformer: "michiyasunaga/LinkBERT-basecased"
# Limit the maximum number of requests
maxParallelRequests: 0
# CORS configuration for the web API service
corsAllowedOrigins: "*"
corsAllowedMethods: "OPTIONS,GET,PUT,POST,DELETE,HEAD"
corsAllowedHeaders: "X-Requested-With,Content-Type,Accept,Origin"
server:
type: custom
applicationConnectors:
- type: http
port: 8060
idleTimeout: 120 seconds
acceptQueueSize: 2048
adminConnectors:
- type: http
port: 8061
registerDefaultExceptionMappers: false
maxThreads: 2048
maxQueuedRequests: 2048
# change the following for having all http requests logged
requestLog:
appenders: []
# these logging settings apply to the service usage mode
logging:
level: INFO
loggers:
org.apache.pdfbox.pdmodel.font.PDSimpleFont: "OFF"
org.glassfish.jersey.internal: "OFF"
com.squarespace.jersey2.guice.JerseyGuiceUtils: "OFF"
appenders:
- type: console
threshold: INFO
timeZone: UTC
|