lfoppiano commited on
Commit
da7c1b2
·
verified ·
1 Parent(s): 45a7593

Upload 2 files

Browse files
Files changed (2) hide show
  1. dataseer-ml.yml +70 -0
  2. server.yml +24 -0
dataseer-ml.yml ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ corpusPath: "./resources/dataset/dataseer/corpus"
3
+ templatePath: "./resources/dataset/dataseer/crfpp-templates/dataseer.template"
4
+ grobidHome: "../grobid-home"
5
+ tmpPath: "tmp/"
6
+
7
+ # path to Pub2TEI repository as available at https://github.com/kermitt2/Pub2TEI
8
+ pub2teiPath: "../../Pub2TEI/"
9
+
10
+ gluttonHost: "https://cloud.science-miner.com/glutton"
11
+ gluttonPort:
12
+
13
+ # sequence labeling model (identify data-related sections)
14
+ models:
15
+
16
+ # model for zones
17
+ - name: "dataseer"
18
+ engine: "wapiti"
19
+ #engine: "delft"
20
+ wapiti:
21
+ # wapiti training parameters, they will be used at training time only
22
+ epsilon: 0.00001
23
+ window: 20
24
+ nbMaxIterations: 2000
25
+
26
+ # model for dataset mention recognition
27
+ - name: "dataseer-mention"
28
+ engine: "wapiti"
29
+ #engine: "delft"
30
+ wapiti:
31
+ # wapiti training parameters, they will be used at training time only
32
+ epsilon: 0.00001
33
+ window: 20
34
+ nbMaxIterations: 2000
35
+ delft:
36
+ # deep learning parameters
37
+ architecture: "BidLSTM_CRF"
38
+ #architecture: "scibert"
39
+ useELMo: false
40
+ embeddings_name: "glove-840B"
41
+
42
+ # classifier model, dataset binary (datset or not dataset in the current sentence)
43
+ - name: "dataseer-binary"
44
+ engine: "delft"
45
+ delft:
46
+ # deep learning parameters
47
+ architecture: "gru"
48
+ #architecture: "bert"
49
+ embeddings_name: "word2vec"
50
+ #transformer: "allenai/scibert_scivocab_cased"
51
+
52
+ # identification of the data type (first level hierarchy)
53
+ - name: "dataseer-first"
54
+ engine: "delft"
55
+ delft:
56
+ # deep learning parameters
57
+ architecture: "gru"
58
+ #architecture: "bert"
59
+ embeddings_name: "word2vec"
60
+ #transformer: "allenai/scibert_scivocab_cased"
61
+
62
+ # mention context classification (reuse binary for the moment)
63
+ - name: "dataseer-reuse"
64
+ engine: "delft"
65
+ delft:
66
+ # deep learning parameters
67
+ architecture: "gru"
68
+ #architecture: "bert"
69
+ embeddings_name: "word2vec"
70
+ #transformer: "allenai/scibert_scivocab_cased"
server.yml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ grobidHome: ../grobid-home
2
+
3
+ server:
4
+ type: custom
5
+ idleTimeout: 120 seconds
6
+ applicationConnectors:
7
+ - type: http
8
+ port: 8060
9
+ adminConnectors:
10
+ - type: http
11
+ port: 8061
12
+ registerDefaultExceptionMappers: false
13
+ maxThreads: 2048
14
+ maxQueuedRequests: 2048
15
+ acceptQueueSize: 2048
16
+
17
+ logging:
18
+ level: INFO
19
+
20
+ appenders:
21
+ - type: console
22
+ threshold: INFO
23
+
24
+ timeZone: UTC