Updated for safetensors
Browse files
README.md
CHANGED
|
@@ -32,7 +32,7 @@ A 120M parameter language model with modern architecture improvements developed
|
|
| 32 |
wget https://huggingface.co/MistyozAI/CosmicFish-120M/resolve/main/chat.py
|
| 33 |
|
| 34 |
# Install dependencies
|
| 35 |
-
pip install transformers huggingface-hub termcolor
|
| 36 |
|
| 37 |
# Run the chat interface (automatically downloads model)
|
| 38 |
python chat.py
|
|
@@ -49,13 +49,14 @@ The `chat.py` script handles all model loading, generation, and provides the bes
|
|
| 49 |
- **Training Data**: CosmicSet 1.0
|
| 50 |
- **Developer**: Mistyoz AI
|
| 51 |
- **Repository**: MistyozAI/CosmicFish-120M
|
|
|
|
| 52 |
|
| 53 |
## Usage
|
| 54 |
|
| 55 |
### Installation
|
| 56 |
|
| 57 |
```bash
|
| 58 |
-
pip install transformers huggingface-hub termcolor
|
| 59 |
```
|
| 60 |
|
| 61 |
### Quick Chat Interface
|
|
@@ -63,6 +64,7 @@ pip install transformers huggingface-hub termcolor
|
|
| 63 |
```python
|
| 64 |
from transformers import GPT2Tokenizer
|
| 65 |
from huggingface_hub import snapshot_download
|
|
|
|
| 66 |
import torch
|
| 67 |
import json
|
| 68 |
import os
|
|
@@ -77,8 +79,8 @@ tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
|
| 77 |
with open(os.path.join(cache_dir, "config.json")) as f:
|
| 78 |
config_dict = json.load(f)
|
| 79 |
|
| 80 |
-
# Load model weights
|
| 81 |
-
state_dict =
|
| 82 |
|
| 83 |
# Note: Full model class available in the repository
|
| 84 |
print("Model downloaded and ready for use!")
|
|
@@ -87,7 +89,7 @@ print("Model downloaded and ready for use!")
|
|
| 87 |
### Advanced Generation with Repetition Penalty
|
| 88 |
|
| 89 |
```python
|
| 90 |
-
def generate_with_repetition_penalty(model, tokenizer, prompt, max_tokens=100, temperature=0.
|
| 91 |
input_ids = torch.tensor(tokenizer.encode(prompt)).unsqueeze(0)
|
| 92 |
generated = input_ids.clone()
|
| 93 |
|
|
@@ -116,6 +118,49 @@ def generate_with_repetition_penalty(model, tokenizer, prompt, max_tokens=100, t
|
|
| 116 |
return tokenizer.decode(generated[0], skip_special_tokens=True)
|
| 117 |
```
|
| 118 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
### Chat Interface
|
| 120 |
|
| 121 |
```python
|
|
@@ -166,13 +211,23 @@ CosmicFish uses several modern improvements over standard transformers:
|
|
| 166 |
## Performance
|
| 167 |
|
| 168 |
- **Speed**: Varies by hardware (not benchmarked)
|
| 169 |
-
- **Memory**: ~500MB RAM
|
| 170 |
- **File Size**: 243MB
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
|
| 172 |
## Limitations
|
| 173 |
|
| 174 |
- Small model size (120M parameters) may produce less accurate responses
|
| 175 |
- 512 token context limit
|
|
|
|
| 176 |
- Training data cutoff applies
|
| 177 |
- May generate incorrect information
|
| 178 |
- Cannot browse internet or access real-time data
|
|
|
|
| 32 |
wget https://huggingface.co/MistyozAI/CosmicFish-120M/resolve/main/chat.py
|
| 33 |
|
| 34 |
# Install dependencies
|
| 35 |
+
pip install transformers huggingface-hub termcolor safetensors
|
| 36 |
|
| 37 |
# Run the chat interface (automatically downloads model)
|
| 38 |
python chat.py
|
|
|
|
| 49 |
- **Training Data**: CosmicSet 1.0
|
| 50 |
- **Developer**: Mistyoz AI
|
| 51 |
- **Repository**: MistyozAI/CosmicFish-120M
|
| 52 |
+
- **Format**: Safetensors
|
| 53 |
|
| 54 |
## Usage
|
| 55 |
|
| 56 |
### Installation
|
| 57 |
|
| 58 |
```bash
|
| 59 |
+
pip install transformers huggingface-hub termcolor safetensors
|
| 60 |
```
|
| 61 |
|
| 62 |
### Quick Chat Interface
|
|
|
|
| 64 |
```python
|
| 65 |
from transformers import GPT2Tokenizer
|
| 66 |
from huggingface_hub import snapshot_download
|
| 67 |
+
from safetensors.torch import load_file
|
| 68 |
import torch
|
| 69 |
import json
|
| 70 |
import os
|
|
|
|
| 79 |
with open(os.path.join(cache_dir, "config.json")) as f:
|
| 80 |
config_dict = json.load(f)
|
| 81 |
|
| 82 |
+
# Load model weights from safetensors
|
| 83 |
+
state_dict = load_file(os.path.join(cache_dir, "model.safetensors"))
|
| 84 |
|
| 85 |
# Note: Full model class available in the repository
|
| 86 |
print("Model downloaded and ready for use!")
|
|
|
|
| 89 |
### Advanced Generation with Repetition Penalty
|
| 90 |
|
| 91 |
```python
|
| 92 |
+
def generate_with_repetition_penalty(model, tokenizer, prompt, max_tokens=100, temperature=0.5, penalty=1.2):
|
| 93 |
input_ids = torch.tensor(tokenizer.encode(prompt)).unsqueeze(0)
|
| 94 |
generated = input_ids.clone()
|
| 95 |
|
|
|
|
| 118 |
return tokenizer.decode(generated[0], skip_special_tokens=True)
|
| 119 |
```
|
| 120 |
|
| 121 |
+
### Loading Model with Safetensors
|
| 122 |
+
|
| 123 |
+
```python
|
| 124 |
+
from safetensors.torch import load_file
|
| 125 |
+
from modeling_cosmicfish import CosmicFish, CosmicConfig
|
| 126 |
+
import json
|
| 127 |
+
|
| 128 |
+
def load_cosmicfish_model(model_path):
|
| 129 |
+
# Load config
|
| 130 |
+
with open(os.path.join(model_path, "config.json")) as f:
|
| 131 |
+
config_dict = json.load(f)
|
| 132 |
+
|
| 133 |
+
# Create model config
|
| 134 |
+
config = CosmicConfig(
|
| 135 |
+
vocab_size=config_dict["vocab_size"],
|
| 136 |
+
block_size=config_dict["block_size"],
|
| 137 |
+
n_layer=config_dict["n_layer"],
|
| 138 |
+
n_head=config_dict["n_head"],
|
| 139 |
+
n_embd=config_dict["n_embd"],
|
| 140 |
+
bias=config_dict["bias"],
|
| 141 |
+
dropout=0.0,
|
| 142 |
+
use_rotary=config_dict["use_rotary"],
|
| 143 |
+
use_swiglu=config_dict["use_swiglu"],
|
| 144 |
+
use_gqa=config_dict["use_gqa"],
|
| 145 |
+
n_query_groups=config_dict["n_query_groups"]
|
| 146 |
+
)
|
| 147 |
+
|
| 148 |
+
# Create model
|
| 149 |
+
model = CosmicFish(config)
|
| 150 |
+
|
| 151 |
+
# Load weights from safetensors (secure format)
|
| 152 |
+
state_dict = load_file(os.path.join(model_path, "model.safetensors"))
|
| 153 |
+
|
| 154 |
+
# Handle weight sharing (lm_head.weight shares with transformer.wte.weight)
|
| 155 |
+
if 'lm_head.weight' not in state_dict and 'transformer.wte.weight' in state_dict:
|
| 156 |
+
state_dict['lm_head.weight'] = state_dict['transformer.wte.weight']
|
| 157 |
+
|
| 158 |
+
model.load_state_dict(state_dict)
|
| 159 |
+
model.eval()
|
| 160 |
+
|
| 161 |
+
return model
|
| 162 |
+
```
|
| 163 |
+
|
| 164 |
### Chat Interface
|
| 165 |
|
| 166 |
```python
|
|
|
|
| 211 |
## Performance
|
| 212 |
|
| 213 |
- **Speed**: Varies by hardware (not benchmarked)
|
| 214 |
+
- **Memory**: ~500MB RAM
|
| 215 |
- **File Size**: 243MB
|
| 216 |
+
- **Loading**: Fast and secure with safetensors
|
| 217 |
+
|
| 218 |
+
## Model Format
|
| 219 |
+
|
| 220 |
+
This model uses **safetensors** format for:
|
| 221 |
+
- **Security**: Safe loading without arbitrary code execution
|
| 222 |
+
- **Performance**: Faster loading compared to pickle-based formats
|
| 223 |
+
- **Memory efficiency**: Zero-copy loading when possible
|
| 224 |
+
- **Cross-platform compatibility**: Works consistently across different environments
|
| 225 |
|
| 226 |
## Limitations
|
| 227 |
|
| 228 |
- Small model size (120M parameters) may produce less accurate responses
|
| 229 |
- 512 token context limit
|
| 230 |
+
- English only
|
| 231 |
- Training data cutoff applies
|
| 232 |
- May generate incorrect information
|
| 233 |
- Cannot browse internet or access real-time data
|