Fix RadonSAI-Small with working config

Browse files

Files changed (3) hide show

README.md +20 -108
config.json +24 -16
tokenizer_config.json +7 -16

README.md CHANGED Viewed

@@ -1,108 +1,20 @@
----
-license: apache-2.0
-language:
-- ru
-- en
-tags:
-- mistral
-- russian
-- english
-- code
-- machine-learning
-- nlp
-- transformer
-- small
-- demo
-pipeline_tag: text-generation
-size_categories: 100M
----
-# RADON-Small - Compact Mistral-based Russian-English Transformer
-## Model Description
-RADON-Small is a compact version of the RADON transformer model, optimized for development, testing, and resource-constrained environments.
-### Key Features
-- **Architecture**: Mistral with Llama 3 innovations (GQA, RMSNorm, SwiGLU, RoPE)
-- **Parameters**: ~50M parameters (small version)
-- **Context**: 2K tokens
-- **Tokenizer**: Hybrid Unigram+BPE for Russian-English
-- **Status**: Initialized with random weights (training required)
-- **Use Case**: Development, testing, prototyping
-### Model Weights
-This is a small model with initialized weights:
-- **Format**: PyTorch (.bin) and Safetensors (.safetensors)
-- **Dtype**: float16
-- **Initialization**: Random
-- **Size**: ~100MB (50M parameters)
-### Usage
-```python
-from transformers import AutoModelForCausalLM, AutoTokenizer
-# Load small model
-model = AutoModelForCausalLM.from_pretrained("MagistrTheOne/RadonSAI-Small")
-tokenizer = AutoTokenizer.from_pretrained("MagistrTheOne/RadonSAI-Small")
-# Note: This model has random weights and needs training
-# For inference, you should use a trained version
-# Generate text (will produce random output)
-prompt = "Машинное обучение - это"
-inputs = tokenizer(prompt, return_tensors="pt")
-outputs = model.generate(**inputs, max_length=50, temperature=0.7)
-result = tokenizer.decode(outputs[0], skip_special_tokens=True)
-print(result)
-```
-### Training
-This small model is perfect for:
-1. **Development and testing**
-2. **Learning transformer architectures**
-3. **Prototyping new ideas**
-4. **Resource-constrained environments**
-### Model Architecture
-```
-RADON-Small:
-- Hidden size: 512
-- Layers: 6
-- Attention heads: 8 (2 KV heads)
-- Intermediate size: 1024
-- Vocabulary: 8K
-- Context window: 2K tokens
-```
-### Related Models
-- **Full Model**: [MagistrTheOne/RadonSAI](https://huggingface.co/MagistrTheOne/RadonSAI)
-- **Datasets**: [MagistrTheOne/radon-examples](https://huggingface.co/datasets/MagistrTheOne/radon-examples)
-### Citation
-```bibtex
-@misc{radon2024small,
-  title={RADON-Small: Compact Mistral-based Russian-English Transformer},
-  author={MagistrTheOne},
-  year={2024},
-  url={https://github.com/MagistrTheOne/Radon2BMistral}
-}
-```
-### License
-Apache 2.0 License
-### Contact
-- GitHub: [MagistrTheOne/Radon2BMistral](https://github.com/MagistrTheOne/Radon2BMistral)
-- Hugging Face: [MagistrTheOne/RadonSAI-Small](https://huggingface.co/MagistrTheOne/RadonSAI-Small)

+---
+license: apache-2.0
+tags:
+- radon
+- gpt2
+- 22mb
+- fixed
+---
+# RadonSAI-Small (Fixed)
+Исправленная версия RadonSAI-Small с рабочей конфигурацией.
+## Использование
+```python
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("MagistrTheOne/RadonSAI-Small")
+tokenizer = AutoTokenizer.from_pretrained("MagistrTheOne/RadonSAI-Small")
+```

config.json CHANGED Viewed

@@ -1,20 +1,28 @@
 {
-  "model_name": "radon",
-  "model_type": "gpt2",
-  "vocab_size": 32000,
-  "hidden_size": 256,
-  "num_layers": 6,
-  "num_attention_heads": 8,
-  "intermediate_size": 1024,
-  "max_position_embeddings": 512,
-  "dropout": 0.1,
-  "attention_dropout": 0.1,
-  "activation_function": "gelu",
-  "layer_norm_eps": 1e-05,
-  "initializer_range": 0.02,
-  "use_cache": true,
-  "torch_dtype": "float32",
   "architectures": [
     "GPT2LMHeadModel"
-  ]
 }

 {
   "architectures": [
     "GPT2LMHeadModel"
+  ],
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 512,
+  "n_head": 8,
+  "n_layer": 6,
+  "n_positions": 1024,
+  "vocab_size": 50257,
+  "torch_dtype": "float16",
+  "transformers_version": "4.36.2",
+  "use_cache": true,
+  "attention_dropout": 0.0,
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "resid_pdrop": 0.1,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true
 }

tokenizer_config.json CHANGED Viewed

@@ -1,23 +1,14 @@
 {
-  "add_bos_token": false,
-  "add_prefix_space": false,
-  "added_tokens_decoder": {
-    "50256": {
-      "content": "<|endoftext|>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    }
   },
   "bos_token": "<|endoftext|>",
-  "chat_template": "{% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}",
-  "clean_up_tokenization_spaces": true,
   "eos_token": "<|endoftext|>",
-  "errors": "replace",
   "model_max_length": 1024,
-  "pad_token": null,
   "tokenizer_class": "GPT2Tokenizer",
   "unk_token": "<|endoftext|>"
-}

 {
+  "auto_map": {
+    "AutoTokenizer": [
+      "gpt2",
+      null
+    ]
   },
   "bos_token": "<|endoftext|>",
   "eos_token": "<|endoftext|>",
   "model_max_length": 1024,
+  "pad_token": "<|endoftext|>",
   "tokenizer_class": "GPT2Tokenizer",
   "unk_token": "<|endoftext|>"
+}