Trimurti-LM / config.json
PredictiveManish's picture
Upload config.json
bf9cee5 verified
{
"model_id": "trimurti-lm",
"model_name": "Trimurti-LM",
"model_type": "language_model",
"architecture": "GPT2",
"framework": "transformers",
"languages": {
"supported": ["en", "hi", "pa"],
"language_tags": ["[EN]", "[HI]", "[PA]"],
"description": "Trilingual language model supporting English, Hindi, and Punjabi"
},
"model_config": {
"vocab_size": 8000,
"n_positions": 128,
"n_embd": 256,
"n_layer": 4,
"n_head": 4,
"n_inner": 512,
"activation_function": "gelu_new",
"attn_pdrop": 0.1,
"embd_pdrop": 0.1,
"resid_pdrop": 0.1,
"estimated_parameters": "4.7M"
},
"tokenizer": {
"type": "sentencepiece",
"model_type": "unigram",
"vocab_size": 8000,
"character_coverage": 0.9995,
"byte_fallback": true,
"model_path": "final_corpus/multilingual_spm.model"
},
"training": {
"corpus": "final_corpus/multilingual_corpus_train.txt",
"validation": "final_corpus/multilingual_corpus_val.txt",
"total_steps": 5000,
"batch_size": 2,
"gradient_accumulation": 8,
"learning_rate": 2e-4,
"warmup_steps": 1000,
"effective_batch_size": 16
},
"checkpoints": {
"path": "checkpoints_tiny",
"available_checkpoints": [
"step1000",
"step2000",
"step3000",
"step4000",
"step5000",
"final"
]
},
"evaluation": {
"overall_accuracy": 100.0,
"english_accuracy": 100.0,
"hindi_accuracy": 100.0,
"punjabi_accuracy": 100.0,
"mixed_accuracy": 100.0,
"avg_english_perplexity": 42.29,
"avg_hindi_perplexity": 50.56,
"avg_punjabi_perplexity": 63.42
},
"entry_points": {
"training": "python train_model.py",
"testing": "python test_model.py",
"evaluation": "python evaluate_model.py",
"preprocessing": "python preprocess.py",
"web_interface": "python web_interface.py"
},
"dependencies": [
"torch",
"transformers",
"sentencepiece",
"tqdm",
"gradio",
"pandas",
"numpy"
],
"filter": [
{
"bool": {
"should": [
{
"term": { "path": "model_index.json" }
},
{
"regexp": { "path": "[^/]*\\.safetensors" }
},
{
"regexp": { "path": "[^/]*\\.ckpt" }
},
{
"regexp": { "path": "[^/]*\\.bin" }
}
],
"minimum_should_match": 1
}
}
],
"files": [
{
"path": "checkpoints_tiny/final/model.safetensors",
"description": "Final trained model weights"
},
{
"path": "checkpoints_tiny/final/config.json",
"description": "Model configuration"
},
{
"path": "checkpoints_tiny/final/generation_config.json",
"description": "Generation settings"
},
{
"path": "final_corpus/multilingual_spm.model",
"description": "SentencePiece tokenizer model"
},
{
"path": "final_corpus/multilingual_spm.vocab",
"description": "Tokenizer vocabulary"
},
{
"path": "train_model.py",
"description": "Training script"
},
{
"path": "test_model.py",
"description": "Testing and inference script"
},
{
"path": "evaluate_model.py",
"description": "Evaluation script"
},
{
"path": "preprocess.py",
"description": "Data preprocessing script"
},
{
"path": "web_interface.py",
"description": "Gradio web interface"
}
]
}