PredictiveManish commited on
Commit
bf9cee5
·
verified ·
1 Parent(s): 45247fb

Upload config.json

Browse files
Files changed (1) hide show
  1. config.json +153 -0
config.json ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_id": "trimurti-lm",
3
+ "model_name": "Trimurti-LM",
4
+ "model_type": "language_model",
5
+ "architecture": "GPT2",
6
+ "framework": "transformers",
7
+
8
+ "languages": {
9
+ "supported": ["en", "hi", "pa"],
10
+ "language_tags": ["[EN]", "[HI]", "[PA]"],
11
+ "description": "Trilingual language model supporting English, Hindi, and Punjabi"
12
+ },
13
+
14
+ "model_config": {
15
+ "vocab_size": 8000,
16
+ "n_positions": 128,
17
+ "n_embd": 256,
18
+ "n_layer": 4,
19
+ "n_head": 4,
20
+ "n_inner": 512,
21
+ "activation_function": "gelu_new",
22
+ "attn_pdrop": 0.1,
23
+ "embd_pdrop": 0.1,
24
+ "resid_pdrop": 0.1,
25
+ "estimated_parameters": "4.7M"
26
+ },
27
+
28
+ "tokenizer": {
29
+ "type": "sentencepiece",
30
+ "model_type": "unigram",
31
+ "vocab_size": 8000,
32
+ "character_coverage": 0.9995,
33
+ "byte_fallback": true,
34
+ "model_path": "final_corpus/multilingual_spm.model"
35
+ },
36
+
37
+ "training": {
38
+ "corpus": "final_corpus/multilingual_corpus_train.txt",
39
+ "validation": "final_corpus/multilingual_corpus_val.txt",
40
+ "total_steps": 5000,
41
+ "batch_size": 2,
42
+ "gradient_accumulation": 8,
43
+ "learning_rate": 2e-4,
44
+ "warmup_steps": 1000,
45
+ "effective_batch_size": 16
46
+ },
47
+
48
+ "checkpoints": {
49
+ "path": "checkpoints_tiny",
50
+ "available_checkpoints": [
51
+ "step1000",
52
+ "step2000",
53
+ "step3000",
54
+ "step4000",
55
+ "step5000",
56
+ "final"
57
+ ]
58
+ },
59
+
60
+ "evaluation": {
61
+ "overall_accuracy": 100.0,
62
+ "english_accuracy": 100.0,
63
+ "hindi_accuracy": 100.0,
64
+ "punjabi_accuracy": 100.0,
65
+ "mixed_accuracy": 100.0,
66
+ "avg_english_perplexity": 42.29,
67
+ "avg_hindi_perplexity": 50.56,
68
+ "avg_punjabi_perplexity": 63.42
69
+ },
70
+
71
+ "entry_points": {
72
+ "training": "python train_model.py",
73
+ "testing": "python test_model.py",
74
+ "evaluation": "python evaluate_model.py",
75
+ "preprocessing": "python preprocess.py",
76
+ "web_interface": "python web_interface.py"
77
+ },
78
+
79
+ "dependencies": [
80
+ "torch",
81
+ "transformers",
82
+ "sentencepiece",
83
+ "tqdm",
84
+ "gradio",
85
+ "pandas",
86
+ "numpy"
87
+ ],
88
+
89
+ "filter": [
90
+ {
91
+ "bool": {
92
+ "should": [
93
+ {
94
+ "term": { "path": "model_index.json" }
95
+ },
96
+ {
97
+ "regexp": { "path": "[^/]*\\.safetensors" }
98
+ },
99
+ {
100
+ "regexp": { "path": "[^/]*\\.ckpt" }
101
+ },
102
+ {
103
+ "regexp": { "path": "[^/]*\\.bin" }
104
+ }
105
+ ],
106
+ "minimum_should_match": 1
107
+ }
108
+ }
109
+ ],
110
+
111
+ "files": [
112
+ {
113
+ "path": "checkpoints_tiny/final/model.safetensors",
114
+ "description": "Final trained model weights"
115
+ },
116
+ {
117
+ "path": "checkpoints_tiny/final/config.json",
118
+ "description": "Model configuration"
119
+ },
120
+ {
121
+ "path": "checkpoints_tiny/final/generation_config.json",
122
+ "description": "Generation settings"
123
+ },
124
+ {
125
+ "path": "final_corpus/multilingual_spm.model",
126
+ "description": "SentencePiece tokenizer model"
127
+ },
128
+ {
129
+ "path": "final_corpus/multilingual_spm.vocab",
130
+ "description": "Tokenizer vocabulary"
131
+ },
132
+ {
133
+ "path": "train_model.py",
134
+ "description": "Training script"
135
+ },
136
+ {
137
+ "path": "test_model.py",
138
+ "description": "Testing and inference script"
139
+ },
140
+ {
141
+ "path": "evaluate_model.py",
142
+ "description": "Evaluation script"
143
+ },
144
+ {
145
+ "path": "preprocess.py",
146
+ "description": "Data preprocessing script"
147
+ },
148
+ {
149
+ "path": "web_interface.py",
150
+ "description": "Gradio web interface"
151
+ }
152
+ ]
153
+ }