akkiisfrommars commited on
Commit
3df5616
·
verified ·
1 Parent(s): 8e1e9ad

Updated for safetensors

Browse files
Files changed (1) hide show
  1. README.md +61 -6
README.md CHANGED
@@ -32,7 +32,7 @@ A 120M parameter language model with modern architecture improvements developed
32
  wget https://huggingface.co/MistyozAI/CosmicFish-120M/resolve/main/chat.py
33
 
34
  # Install dependencies
35
- pip install transformers huggingface-hub termcolor
36
 
37
  # Run the chat interface (automatically downloads model)
38
  python chat.py
@@ -49,13 +49,14 @@ The `chat.py` script handles all model loading, generation, and provides the bes
49
  - **Training Data**: CosmicSet 1.0
50
  - **Developer**: Mistyoz AI
51
  - **Repository**: MistyozAI/CosmicFish-120M
 
52
 
53
  ## Usage
54
 
55
  ### Installation
56
 
57
  ```bash
58
- pip install transformers huggingface-hub termcolor
59
  ```
60
 
61
  ### Quick Chat Interface
@@ -63,6 +64,7 @@ pip install transformers huggingface-hub termcolor
63
  ```python
64
  from transformers import GPT2Tokenizer
65
  from huggingface_hub import snapshot_download
 
66
  import torch
67
  import json
68
  import os
@@ -77,8 +79,8 @@ tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
77
  with open(os.path.join(cache_dir, "config.json")) as f:
78
  config_dict = json.load(f)
79
 
80
- # Load model weights
81
- state_dict = torch.load(os.path.join(cache_dir, "pytorch_model.bin"), map_location="cpu")
82
 
83
  # Note: Full model class available in the repository
84
  print("Model downloaded and ready for use!")
@@ -87,7 +89,7 @@ print("Model downloaded and ready for use!")
87
  ### Advanced Generation with Repetition Penalty
88
 
89
  ```python
90
- def generate_with_repetition_penalty(model, tokenizer, prompt, max_tokens=100, temperature=0.7, penalty=1.2):
91
  input_ids = torch.tensor(tokenizer.encode(prompt)).unsqueeze(0)
92
  generated = input_ids.clone()
93
 
@@ -116,6 +118,49 @@ def generate_with_repetition_penalty(model, tokenizer, prompt, max_tokens=100, t
116
  return tokenizer.decode(generated[0], skip_special_tokens=True)
117
  ```
118
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  ### Chat Interface
120
 
121
  ```python
@@ -166,13 +211,23 @@ CosmicFish uses several modern improvements over standard transformers:
166
  ## Performance
167
 
168
  - **Speed**: Varies by hardware (not benchmarked)
169
- - **Memory**: ~500MB RAM (FP16)
170
  - **File Size**: 243MB
 
 
 
 
 
 
 
 
 
171
 
172
  ## Limitations
173
 
174
  - Small model size (120M parameters) may produce less accurate responses
175
  - 512 token context limit
 
176
  - Training data cutoff applies
177
  - May generate incorrect information
178
  - Cannot browse internet or access real-time data
 
32
  wget https://huggingface.co/MistyozAI/CosmicFish-120M/resolve/main/chat.py
33
 
34
  # Install dependencies
35
+ pip install transformers huggingface-hub termcolor safetensors
36
 
37
  # Run the chat interface (automatically downloads model)
38
  python chat.py
 
49
  - **Training Data**: CosmicSet 1.0
50
  - **Developer**: Mistyoz AI
51
  - **Repository**: MistyozAI/CosmicFish-120M
52
+ - **Format**: Safetensors
53
 
54
  ## Usage
55
 
56
  ### Installation
57
 
58
  ```bash
59
+ pip install transformers huggingface-hub termcolor safetensors
60
  ```
61
 
62
  ### Quick Chat Interface
 
64
  ```python
65
  from transformers import GPT2Tokenizer
66
  from huggingface_hub import snapshot_download
67
+ from safetensors.torch import load_file
68
  import torch
69
  import json
70
  import os
 
79
  with open(os.path.join(cache_dir, "config.json")) as f:
80
  config_dict = json.load(f)
81
 
82
+ # Load model weights from safetensors
83
+ state_dict = load_file(os.path.join(cache_dir, "model.safetensors"))
84
 
85
  # Note: Full model class available in the repository
86
  print("Model downloaded and ready for use!")
 
89
  ### Advanced Generation with Repetition Penalty
90
 
91
  ```python
92
+ def generate_with_repetition_penalty(model, tokenizer, prompt, max_tokens=100, temperature=0.5, penalty=1.2):
93
  input_ids = torch.tensor(tokenizer.encode(prompt)).unsqueeze(0)
94
  generated = input_ids.clone()
95
 
 
118
  return tokenizer.decode(generated[0], skip_special_tokens=True)
119
  ```
120
 
121
+ ### Loading Model with Safetensors
122
+
123
+ ```python
124
+ from safetensors.torch import load_file
125
+ from modeling_cosmicfish import CosmicFish, CosmicConfig
126
+ import json
127
+
128
+ def load_cosmicfish_model(model_path):
129
+ # Load config
130
+ with open(os.path.join(model_path, "config.json")) as f:
131
+ config_dict = json.load(f)
132
+
133
+ # Create model config
134
+ config = CosmicConfig(
135
+ vocab_size=config_dict["vocab_size"],
136
+ block_size=config_dict["block_size"],
137
+ n_layer=config_dict["n_layer"],
138
+ n_head=config_dict["n_head"],
139
+ n_embd=config_dict["n_embd"],
140
+ bias=config_dict["bias"],
141
+ dropout=0.0,
142
+ use_rotary=config_dict["use_rotary"],
143
+ use_swiglu=config_dict["use_swiglu"],
144
+ use_gqa=config_dict["use_gqa"],
145
+ n_query_groups=config_dict["n_query_groups"]
146
+ )
147
+
148
+ # Create model
149
+ model = CosmicFish(config)
150
+
151
+ # Load weights from safetensors (secure format)
152
+ state_dict = load_file(os.path.join(model_path, "model.safetensors"))
153
+
154
+ # Handle weight sharing (lm_head.weight shares with transformer.wte.weight)
155
+ if 'lm_head.weight' not in state_dict and 'transformer.wte.weight' in state_dict:
156
+ state_dict['lm_head.weight'] = state_dict['transformer.wte.weight']
157
+
158
+ model.load_state_dict(state_dict)
159
+ model.eval()
160
+
161
+ return model
162
+ ```
163
+
164
  ### Chat Interface
165
 
166
  ```python
 
211
  ## Performance
212
 
213
  - **Speed**: Varies by hardware (not benchmarked)
214
+ - **Memory**: ~500MB RAM
215
  - **File Size**: 243MB
216
+ - **Loading**: Fast and secure with safetensors
217
+
218
+ ## Model Format
219
+
220
+ This model uses **safetensors** format for:
221
+ - **Security**: Safe loading without arbitrary code execution
222
+ - **Performance**: Faster loading compared to pickle-based formats
223
+ - **Memory efficiency**: Zero-copy loading when possible
224
+ - **Cross-platform compatibility**: Works consistently across different environments
225
 
226
  ## Limitations
227
 
228
  - Small model size (120M parameters) may produce less accurate responses
229
  - 512 token context limit
230
+ - English only
231
  - Training data cutoff applies
232
  - May generate incorrect information
233
  - Cannot browse internet or access real-time data