Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,6 +3,9 @@ import gradio as gr
|
|
| 3 |
from transformers import AutoModelForCausalLM, AutoTokenizer, PreTrainedModel, PretrainedConfig
|
| 4 |
from huggingface_hub import hf_hub_download
|
| 5 |
import json
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
# Define the model architecture
|
| 8 |
class SmolLM2Config(PretrainedConfig):
|
|
@@ -56,26 +59,35 @@ class SmolLM2ForCausalLM(PreTrainedModel):
|
|
| 56 |
def __init__(self, config):
|
| 57 |
super().__init__(config)
|
| 58 |
self.config = config
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
config=config,
|
| 64 |
-
torch_dtype=torch.float16,
|
| 65 |
-
low_cpu_mem_usage=True,
|
| 66 |
-
trust_remote_code=True
|
| 67 |
-
)
|
| 68 |
-
|
| 69 |
def forward(self, input_ids=None, attention_mask=None, labels=None, **kwargs):
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
|
| 77 |
def prepare_inputs_for_generation(self, input_ids, **kwargs):
|
| 78 |
-
return
|
|
|
|
|
|
|
|
|
|
| 79 |
|
| 80 |
# Register the model
|
| 81 |
AutoModelForCausalLM.register(SmolLM2Config, SmolLM2ForCausalLM)
|
|
@@ -111,19 +123,20 @@ def initialize():
|
|
| 111 |
}
|
| 112 |
TOKENIZER.add_special_tokens(special_tokens)
|
| 113 |
|
| 114 |
-
# Load model
|
| 115 |
print("Loading model...")
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
)
|
|
|
|
| 123 |
|
| 124 |
# Move model to device
|
| 125 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 126 |
-
MODEL.to(device)
|
| 127 |
|
| 128 |
print(f"Model loaded successfully on {device}")
|
| 129 |
|
|
|
|
| 3 |
from transformers import AutoModelForCausalLM, AutoTokenizer, PreTrainedModel, PretrainedConfig
|
| 4 |
from huggingface_hub import hf_hub_download
|
| 5 |
import json
|
| 6 |
+
import torch.nn as nn
|
| 7 |
+
import torch.nn.functional as F
|
| 8 |
+
import math
|
| 9 |
|
| 10 |
# Define the model architecture
|
| 11 |
class SmolLM2Config(PretrainedConfig):
|
|
|
|
| 59 |
def __init__(self, config):
|
| 60 |
super().__init__(config)
|
| 61 |
self.config = config
|
| 62 |
+
self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size)
|
| 63 |
+
self.layers = nn.ModuleList([LlamaDecoderLayer(config) for _ in range(config.num_hidden_layers)])
|
| 64 |
+
self.norm = RMSNorm(config.hidden_size, config.rms_norm_eps)
|
| 65 |
+
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
|
| 66 |
|
| 67 |
+
if config.tie_word_embeddings:
|
| 68 |
+
self.lm_head.weight = self.embed_tokens.weight
|
| 69 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
def forward(self, input_ids=None, attention_mask=None, labels=None, **kwargs):
|
| 71 |
+
hidden_states = self.embed_tokens(input_ids)
|
| 72 |
+
|
| 73 |
+
# Process through layers
|
| 74 |
+
for layer in self.layers:
|
| 75 |
+
hidden_states = layer(hidden_states, attention_mask)
|
| 76 |
+
|
| 77 |
+
hidden_states = self.norm(hidden_states)
|
| 78 |
+
logits = self.lm_head(hidden_states)
|
| 79 |
+
|
| 80 |
+
loss = None
|
| 81 |
+
if labels is not None:
|
| 82 |
+
loss = F.cross_entropy(logits.view(-1, logits.size(-1)), labels.view(-1))
|
| 83 |
+
|
| 84 |
+
return logits if loss is None else (loss, logits)
|
| 85 |
|
| 86 |
def prepare_inputs_for_generation(self, input_ids, **kwargs):
|
| 87 |
+
return {
|
| 88 |
+
"input_ids": input_ids,
|
| 89 |
+
"attention_mask": kwargs.get("attention_mask", None)
|
| 90 |
+
}
|
| 91 |
|
| 92 |
# Register the model
|
| 93 |
AutoModelForCausalLM.register(SmolLM2Config, SmolLM2ForCausalLM)
|
|
|
|
| 123 |
}
|
| 124 |
TOKENIZER.add_special_tokens(special_tokens)
|
| 125 |
|
| 126 |
+
# Load model weights
|
| 127 |
print("Loading model...")
|
| 128 |
+
weights_path = hf_hub_download(repo_id=model_id, filename="pytorch_model.bin")
|
| 129 |
+
|
| 130 |
+
# Initialize model
|
| 131 |
+
MODEL = SmolLM2ForCausalLM(config)
|
| 132 |
+
|
| 133 |
+
# Load state dict
|
| 134 |
+
state_dict = torch.load(weights_path, map_location="cpu")
|
| 135 |
+
MODEL.load_state_dict(state_dict)
|
| 136 |
|
| 137 |
# Move model to device
|
| 138 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 139 |
+
MODEL = MODEL.to(device)
|
| 140 |
|
| 141 |
print(f"Model loaded successfully on {device}")
|
| 142 |
|