asrith05 commited on
Commit
1d4491c
·
verified ·
1 Parent(s): 100a610

Upload fine-tuned multilingual entity extraction model

Browse files
README.md CHANGED
@@ -24,7 +24,7 @@ This is a fine-tuned multilingual model specialized for entity extraction tasks,
24
  - **Languages**: English, Telugu, Sanskrit
25
  - **Task**: Entity extraction and named entity recognition
26
  - **Fine-tuning**: Specialized for entity extraction from structured data
27
- - **Size**: ~419MB
28
 
29
  ## Description
30
 
 
24
  - **Languages**: English, Telugu, Sanskrit
25
  - **Task**: Entity extraction and named entity recognition
26
  - **Fine-tuning**: Specialized for entity extraction from structured data
27
+ - **Size**: ~1251MB
28
 
29
  ## Description
30
 
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3911c5699f8c2ecbeb5dd9b91b71516e360e56407ae49afb537a9864083677eb
3
  size 436535952
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92f27d2429cf8bac590b7540b2508aa36f1e57e4147d4331290e04b4fb5680ff
3
  size 436535952
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04c1b6c8540afd7401dc5a22a9b6d46e6ed013fd66f322eff9f78eeb91b5d795
3
+ size 873103610
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcee86453a034bc93bce36650a505c1b9ade9a72e96c2994f975c854179c8d8c
3
+ size 14244
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ea4f37f7ff3fbad45f9447dce69739d3ea3d5eff08699469cdccbc46b7430e1
3
+ size 1064
tokenizer_config.json CHANGED
@@ -69,7 +69,11 @@
69
  "clean_up_tokenization_spaces": false,
70
  "eos_token": "</s>",
71
  "extra_special_tokens": {},
 
72
  "model_max_length": 1000000000000000019884624838656,
73
  "pad_token": "</s>",
74
- "tokenizer_class": "PreTrainedTokenizer"
 
 
 
75
  }
 
69
  "clean_up_tokenization_spaces": false,
70
  "eos_token": "</s>",
71
  "extra_special_tokens": {},
72
+ "max_length": 512,
73
  "model_max_length": 1000000000000000019884624838656,
74
  "pad_token": "</s>",
75
+ "stride": 0,
76
+ "tokenizer_class": "PreTrainedTokenizer",
77
+ "truncation_side": "right",
78
+ "truncation_strategy": "longest_first"
79
  }
trainer_state.json ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 1000,
3
+ "best_metric": 0.4237593412399292,
4
+ "best_model_checkpoint": "./finetuned_entity_extraction_v2_telugu\\checkpoint-1000",
5
+ "epoch": 1.0,
6
+ "eval_steps": 200,
7
+ "global_step": 1000,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.05,
14
+ "grad_norm": 1.7661869525909424,
15
+ "learning_rate": 1.9600000000000002e-05,
16
+ "loss": 2.2122,
17
+ "step": 50
18
+ },
19
+ {
20
+ "epoch": 0.1,
21
+ "grad_norm": 1.6054468154907227,
22
+ "learning_rate": 1.9869002134404235e-05,
23
+ "loss": 1.0335,
24
+ "step": 100
25
+ },
26
+ {
27
+ "epoch": 0.15,
28
+ "grad_norm": 1.5563281774520874,
29
+ "learning_rate": 1.946885829634935e-05,
30
+ "loss": 0.8195,
31
+ "step": 150
32
+ },
33
+ {
34
+ "epoch": 0.2,
35
+ "grad_norm": 1.888305425643921,
36
+ "learning_rate": 1.8810428687441415e-05,
37
+ "loss": 0.7121,
38
+ "step": 200
39
+ },
40
+ {
41
+ "epoch": 0.2,
42
+ "eval_loss": 0.6703252196311951,
43
+ "eval_runtime": 28.9212,
44
+ "eval_samples_per_second": 69.153,
45
+ "eval_steps_per_second": 17.288,
46
+ "step": 200
47
+ },
48
+ {
49
+ "epoch": 0.25,
50
+ "grad_norm": 1.5473520755767822,
51
+ "learning_rate": 1.7911673551013553e-05,
52
+ "loss": 0.6368,
53
+ "step": 250
54
+ },
55
+ {
56
+ "epoch": 0.3,
57
+ "grad_norm": 1.4704129695892334,
58
+ "learning_rate": 1.6797108584307732e-05,
59
+ "loss": 0.5804,
60
+ "step": 300
61
+ },
62
+ {
63
+ "epoch": 0.35,
64
+ "grad_norm": 1.578823208808899,
65
+ "learning_rate": 1.5497136214161662e-05,
66
+ "loss": 0.549,
67
+ "step": 350
68
+ },
69
+ {
70
+ "epoch": 0.4,
71
+ "grad_norm": 1.5565356016159058,
72
+ "learning_rate": 1.4047216298057872e-05,
73
+ "loss": 0.525,
74
+ "step": 400
75
+ },
76
+ {
77
+ "epoch": 0.4,
78
+ "eval_loss": 0.5127490758895874,
79
+ "eval_runtime": 28.1888,
80
+ "eval_samples_per_second": 70.95,
81
+ "eval_steps_per_second": 17.738,
82
+ "step": 400
83
+ },
84
+ {
85
+ "epoch": 0.45,
86
+ "grad_norm": 1.5739060640335083,
87
+ "learning_rate": 1.2486898871648552e-05,
88
+ "loss": 0.4904,
89
+ "step": 450
90
+ },
91
+ {
92
+ "epoch": 0.5,
93
+ "grad_norm": 1.7133370637893677,
94
+ "learning_rate": 1.0858745326882172e-05,
95
+ "loss": 0.4795,
96
+ "step": 500
97
+ },
98
+ {
99
+ "epoch": 0.55,
100
+ "grad_norm": 1.4752830266952515,
101
+ "learning_rate": 9.20716744818044e-06,
102
+ "loss": 0.4592,
103
+ "step": 550
104
+ },
105
+ {
106
+ "epoch": 0.6,
107
+ "grad_norm": 1.4320343732833862,
108
+ "learning_rate": 7.577215974732139e-06,
109
+ "loss": 0.4531,
110
+ "step": 600
111
+ },
112
+ {
113
+ "epoch": 0.6,
114
+ "eval_loss": 0.45128172636032104,
115
+ "eval_runtime": 30.6969,
116
+ "eval_samples_per_second": 65.153,
117
+ "eval_steps_per_second": 16.288,
118
+ "step": 600
119
+ },
120
+ {
121
+ "epoch": 0.65,
122
+ "grad_norm": 1.5366995334625244,
123
+ "learning_rate": 6.0133517337665504e-06,
124
+ "loss": 0.4392,
125
+ "step": 650
126
+ },
127
+ {
128
+ "epoch": 0.7,
129
+ "grad_norm": 1.3664205074310303,
130
+ "learning_rate": 4.5582328650874095e-06,
131
+ "loss": 0.4371,
132
+ "step": 700
133
+ },
134
+ {
135
+ "epoch": 0.75,
136
+ "grad_norm": 1.2980847358703613,
137
+ "learning_rate": 3.25155121822048e-06,
138
+ "loss": 0.4341,
139
+ "step": 750
140
+ },
141
+ {
142
+ "epoch": 0.8,
143
+ "grad_norm": 1.4085419178009033,
144
+ "learning_rate": 2.1289496622274754e-06,
145
+ "loss": 0.4254,
146
+ "step": 800
147
+ },
148
+ {
149
+ "epoch": 0.8,
150
+ "eval_loss": 0.42791882157325745,
151
+ "eval_runtime": 31.0012,
152
+ "eval_samples_per_second": 64.514,
153
+ "eval_steps_per_second": 16.128,
154
+ "step": 800
155
+ },
156
+ {
157
+ "epoch": 0.85,
158
+ "grad_norm": 1.3011988401412964,
159
+ "learning_rate": 1.2210498411520256e-06,
160
+ "loss": 0.4215,
161
+ "step": 850
162
+ },
163
+ {
164
+ "epoch": 0.9,
165
+ "grad_norm": 1.3734039068222046,
166
+ "learning_rate": 5.526168953948752e-07,
167
+ "loss": 0.4205,
168
+ "step": 900
169
+ },
170
+ {
171
+ "epoch": 0.95,
172
+ "grad_norm": 1.1741344928741455,
173
+ "learning_rate": 1.4188393324163663e-07,
174
+ "loss": 0.4243,
175
+ "step": 950
176
+ },
177
+ {
178
+ "epoch": 1.0,
179
+ "grad_norm": 1.3026286363601685,
180
+ "learning_rate": 5.467919892865326e-11,
181
+ "loss": 0.4166,
182
+ "step": 1000
183
+ },
184
+ {
185
+ "epoch": 1.0,
186
+ "eval_loss": 0.4237593412399292,
187
+ "eval_runtime": 29.6043,
188
+ "eval_samples_per_second": 67.558,
189
+ "eval_steps_per_second": 16.889,
190
+ "step": 1000
191
+ }
192
+ ],
193
+ "logging_steps": 50,
194
+ "max_steps": 1000,
195
+ "num_input_tokens_seen": 0,
196
+ "num_train_epochs": 1,
197
+ "save_steps": 500,
198
+ "stateful_callbacks": {
199
+ "TrainerControl": {
200
+ "args": {
201
+ "should_epoch_stop": false,
202
+ "should_evaluate": false,
203
+ "should_log": false,
204
+ "should_save": true,
205
+ "should_training_stop": true
206
+ },
207
+ "attributes": {}
208
+ }
209
+ },
210
+ "total_flos": 907581238726656.0,
211
+ "train_batch_size": 4,
212
+ "trial_name": null,
213
+ "trial_params": null
214
+ }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:60c7847651ba100fba41ba9e9f55902c3a6b622e9391e57e0594d6ce38b560e2
3
- size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25441c9d147648c8be9b3d80109cdec8c18c037a532d7f3deac188475fdb93ed
3
+ size 5368