Spaces:

HSinghHuggingFace
/

SmolLM2-135-Text-Generator

Build error

App Files Files Community

HSinghHuggingFace commited on Jan 21, 2025

Commit

f98cc3f

1 Parent(s): 333126d

Huggingface app

Browse files

Files changed (8) hide show

.gitignore +7 -0
README.md +32 -2
app.py +193 -0
model.pt +3 -0
requirements.txt +5 -0
src/config/model_config.py +10 -0
src/models/gpt.py +177 -0
src/utils/device_utils.py +10 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,7 @@

+__pycache__/
+*.pyc
+.env
+.venv
+venv/
+ENV/
+.DS_Store

README.md CHANGED Viewed

@@ -8,7 +8,37 @@ sdk_version: 1.41.1
 app_file: app.py
 pinned: false
 license: apache-2.0
-short_description: SmolLM2-135-Text-Generator
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 app_file: app.py
 pinned: false
 license: apache-2.0
+short_description: Text generation using SmolLM2-135 model
 ---
+# SmolLM2-135 Text Generator
+This is a Streamlit-based text generation application using a fine-tuned SmolLM2-135 model. The application allows users to:
+- Input custom prompts
+- Control the length of generated text
+- Generate multiple text sequences
+- View token information
+## Features
+- Interactive text input
+- Adjustable text generation length
+- Multiple sequence generation
+- Real-time text generation
+- Token information display
+## Usage
+1. Enter your prompt in the text area
+2. Adjust the length of text to be generated
+3. Select the number of sequences to generate
+4. Click "Generate" to create text
+## Technical Details
+The application uses:
+- SmolLM2-135 model architecture
+- Tiktoken tokenizer
+- PyTorch for model inference
+- Streamlit for the user interface

app.py ADDED Viewed

	@@ -0,0 +1,193 @@

+import streamlit as st
+import torch
+import tiktoken
+import sys
+import os
+import logging
+import warnings
+# Configure logging and warnings
+logging.getLogger('streamlit').setLevel(logging.ERROR)
+warnings.filterwarnings('ignore', message='.*torch.classes.*')
+warnings.filterwarnings('ignore', category=FutureWarning)
+# Add the project root to Python path
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from src.config.model_config import GPTConfig
+from src.models.gpt import LlamaForCausalLM
+from src.utils.device_utils import get_device
+@st.cache_resource
+def load_model():
+    """
+    Load and prepare the model for inference.
+    Returns the loaded model and device.
+    """
+    device = get_device()
+    try:
+        # Load the checkpoint dictionary
+        checkpoint = torch.load('model.pt', map_location=device)
+        # Initialize model with config
+        config = GPTConfig()
+        model = LlamaForCausalLM(config)
+        # Load state dict - extract model_state_dict from checkpoint
+        if "model_state_dict" in checkpoint:
+            state_dict = checkpoint["model_state_dict"]
+        else:
+            state_dict = checkpoint
+        # Remove cached rotary embedding buffers
+        state_dict.pop("model.rotary_emb.cos_cached", None)
+        state_dict.pop("model.rotary_emb.sin_cached", None)
+        model.load_state_dict(state_dict, strict=True)
+        # Prepare model for inference
+        model = model.float()
+        model.to(device)
+        model.eval()
+        return model, device
+    except Exception as e:
+        st.error(f"Detailed error during model loading: {str(e)}")
+        raise e
+def generate_text(model, prompt, max_length=100, num_return_sequences=1, device='cpu'):
+    """
+    Generate text based on the input prompt.
+    Args:
+        model: The loaded GPT model
+        prompt: Input text prompt
+        max_length: Maximum number of tokens to generate
+        num_return_sequences: Number of different sequences to generate
+        device: Device to run inference on
+    Returns:
+        List of generated text sequences
+    """
+    tokenizer = tiktoken.get_encoding('gpt2')
+    input_tokens = tokenizer.encode(prompt)
+    x = torch.tensor(input_tokens).unsqueeze(0).repeat(num_return_sequences, 1)
+    x = x.to(device)
+    # Calculate final length (input length + requested additional tokens)
+    input_length = x.size(1)
+    target_length = input_length + max_length
+    # Generate text
+    with torch.no_grad():
+        while x.size(1) < target_length:
+            # Get predictions
+            logits, _ = model(x)
+            next_token_logits = logits[:, -1, :]
+            # Apply temperature to make the distribution more focused
+            probs = torch.softmax(next_token_logits / 0.8, dim=-1)
+            # Sample from the distribution
+            next_token = torch.multinomial(probs, num_samples=1)
+            # Append to the sequence
+            x = torch.cat((x, next_token), dim=1)
+    # Print token information
+    st.text(f"Size of Input tokens: {input_length}, Additional tokens to be predicted: {max_length}, Total tokens to be generated: {x.size(1)}")
+    # Decode generated sequences
+    generated_texts = []
+    for i in range(num_return_sequences):
+        tokens = x[i].tolist()
+        text = tokenizer.decode(tokens)
+        generated_texts.append(text)
+    return generated_texts
+# Set page config
+st.set_page_config(
+    page_title="SmolLM2-135 Text Generator",
+    page_icon="🐢",
+    layout="wide"
+)
+# Streamlit UI
+st.title("🐢 SmolLM2-135 Text Generator")
+st.markdown("""
+This application uses a fine-tuned SmolLM2-135 model to generate text based on your prompts.
+Enter your prompt below and adjust the generation parameters to create unique text sequences.
+""")
+# Create two columns for the interface
+col1, col2 = st.columns([2, 1])
+with col1:
+    # Input form
+    prompt = st.text_area(
+        "Enter your prompt:",
+        "Once upon a time",
+        height=100,
+        help="Enter the text you want the model to continue from"
+    )
+with col2:
+    # Generation parameters
+    max_length = st.slider(
+        "Predict additional text of length:",
+        min_value=1,
+        max_value=50,
+        value=20,
+        help="Number of additional tokens to generate"
+    )
+    num_sequences = st.slider(
+        "Number of sequences to generate:",
+        min_value=1,
+        max_value=5,
+        value=1,
+        help="Generate multiple different sequences from the same prompt"
+    )
+# Load model
+try:
+    model, device = load_model()
+    model_status = st.success("Model loaded successfully! Ready to generate text.")
+except Exception as e:
+    st.error(f"Error loading model: {str(e)}")
+    st.stop()
+# Generate button
+if st.button("Generate", type="primary"):
+    if not prompt:
+        st.warning("Please enter a prompt first!")
+    else:
+        with st.spinner("Generating text..."):
+            try:
+                generated_texts = generate_text(
+                    model=model,
+                    prompt=prompt,
+                    max_length=max_length,
+                    num_return_sequences=num_sequences,
+                    device=device
+                )
+                # Display results
+                st.subheader("Generated Text:")
+                for i, text in enumerate(generated_texts, 1):
+                    with st.expander(f"Sequence {i}", expanded=True):
+                        st.write(text)
+            except Exception as e:
+                st.error(f"Error during text generation: {str(e)}")
+# Add footer
+st.markdown("---")
+st.markdown("""
+<div style='text-align: center'>
+    <p>Built with Streamlit and PyTorch | SmolLM2-135 Model</p>
+</div>
+""", unsafe_allow_html=True)

model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bf16a04318e75ccea0fc7e37ac501f7a56016ee500352ce2a20ee78e004e610b
+size 277571739

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+streamlit==1.41.1
+torch>=2.0.0
+tiktoken>=0.5.0
+numpy>=1.24.0
+tqdm>=4.65.0

src/config/model_config.py ADDED Viewed

	@@ -0,0 +1,10 @@

+class GPTConfig:
+    def __init__(self):
+        self.vocab_size = 32000
+        self.hidden_size = 256
+        self.num_hidden_layers = 12
+        self.num_attention_heads = 4  # Changed to match head_dim=64
+        self.intermediate_size = 512
+        self.hidden_act = "silu"
+        self.rms_norm_eps = 1e-5
+        self.max_position_embeddings = 1024

src/models/gpt.py ADDED Viewed

	@@ -0,0 +1,177 @@

+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+def apply_rotary_pos_emb(q, k, cos, sin, position_ids):
+    # Reshape position_ids to match q's shape
+    position_ids = position_ids.unsqueeze(0).unsqueeze(0)  # [1, 1, seq_len]
+    # Get the rotary embeddings for this position
+    cos = cos.squeeze(0)  # [seq_len, dim]
+    sin = sin.squeeze(0)  # [seq_len, dim]
+    # Apply rotary embeddings
+    q_embed = (q * cos) + (rotate_half(q) * sin)
+    k_embed = (k * cos) + (rotate_half(k) * sin)
+    return q_embed, k_embed
+def rotate_half(x):
+    """Rotates half the hidden dims of the input."""
+    x1 = x[..., :x.shape[-1] // 2]
+    x2 = x[..., x.shape[-1] // 2:]
+    return torch.cat((-x2, x1), dim=-1)
+class LlamaRMSNorm(nn.Module):
+    def __init__(self, hidden_size, eps=1e-5):
+        super().__init__()
+        self.weight = nn.Parameter(torch.ones(hidden_size))
+        self.eps = eps
+    def forward(self, x):
+        variance = x.pow(2).mean(-1, keepdim=True)
+        x = x * torch.rsqrt(variance + self.eps)
+        return self.weight * x
+class LlamaRotaryEmbedding(nn.Module):
+    def __init__(self, dim, max_position_embeddings=2048, base=10000):
+        super().__init__()
+        inv_freq = 1.0 / (base ** (torch.arange(0, dim//4, dtype=torch.float32) / dim))
+        self.register_buffer("inv_freq", inv_freq)
+        self.register_buffer("cos_cached", None, persistent=False)
+        self.register_buffer("sin_cached", None, persistent=False)
+        self.max_position_embeddings = max_position_embeddings
+    def forward(self, x, seq_len):
+        if self.cos_cached is not None and self.cos_cached.size(1) >= seq_len:
+            return self.cos_cached[:, :seq_len, :], self.sin_cached[:, :seq_len, :]
+        t = torch.arange(seq_len, device=x.device).type_as(self.inv_freq)
+        freqs = torch.einsum("i,j->ij", t, self.inv_freq)
+        emb = torch.cat((freqs, freqs), dim=-1)
+        cos = torch.cos(emb)[None, :, :]
+        sin = torch.sin(emb)[None, :, :]
+        self.cos_cached = cos
+        self.sin_cached = sin
+        return cos, sin
+class LlamaAttention(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.hidden_size = config.hidden_size
+        self.num_heads = config.num_attention_heads
+        self.head_dim = 64  # Fixed head dimension to match saved model
+        self.q_proj = nn.Linear(config.hidden_size, config.hidden_size, bias=False)
+        self.k_proj = nn.Linear(config.hidden_size, 64, bias=False)  # Single head dimension
+        self.v_proj = nn.Linear(config.hidden_size, 64, bias=False)  # Single head dimension
+        self.o_proj = nn.Linear(config.hidden_size, config.hidden_size, bias=False)
+    def forward(self, hidden_states, rotary_emb=None):
+        bsz, q_len, _ = hidden_states.size()
+        q = self.q_proj(hidden_states)
+        k = self.k_proj(hidden_states)
+        v = self.v_proj(hidden_states)
+        # Split q into heads before applying rotary embeddings
+        q = q.view(bsz, q_len, self.num_heads, -1)  # -1 will be 64
+        k = k.view(bsz, q_len, 1, -1)  # Keep k as single head
+        v = v.view(bsz, q_len, 1, -1)  # Keep v as single head
+        # Apply rotary embeddings if provided
+        if rotary_emb is not None:
+            position_ids = torch.arange(q_len, device=q.device)
+            cos, sin = rotary_emb(v, q_len)
+            # Split q and k in half for rotation
+            q1, q2 = q[..., :32], q[..., 32:]
+            k1, k2 = k[..., :32], k[..., 32:]
+            # Apply rotation to first half
+            q_embed = torch.cat([
+                q1 * cos.unsqueeze(2) - q2 * sin.unsqueeze(2),
+                q2 * cos.unsqueeze(2) + q1 * sin.unsqueeze(2)
+            ], dim=-1)
+            k_embed = torch.cat([
+                k1 * cos.unsqueeze(2) - k2 * sin.unsqueeze(2),
+                k2 * cos.unsqueeze(2) + k1 * sin.unsqueeze(2)
+            ], dim=-1)
+            q, k = q_embed, k_embed
+        # Expand k and v to match number of heads
+        k = k.expand(-1, -1, self.num_heads, -1)
+        v = v.expand(-1, -1, self.num_heads, -1)
+        # Scaled dot-product attention
+        attn_weights = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.head_dim)
+        attn_weights = F.softmax(attn_weights, dim=-1)
+        attn_output = torch.matmul(attn_weights, v)
+        attn_output = attn_output.view(bsz, q_len, self.hidden_size)
+        attn_output = self.o_proj(attn_output)
+        return attn_output
+class LlamaMLP(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.gate_proj = nn.Linear(config.hidden_size, config.intermediate_size, bias=False)
+        self.up_proj = nn.Linear(config.hidden_size, config.intermediate_size, bias=False)
+        self.down_proj = nn.Linear(config.intermediate_size, config.hidden_size, bias=False)
+        self.act_fn = nn.SiLU()
+    def forward(self, x):
+        return self.down_proj(self.act_fn(self.gate_proj(x)) * self.up_proj(x))
+class LlamaDecoderLayer(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.input_layernorm = LlamaRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+        self.self_attn = LlamaAttention(config)
+        self.post_attention_layernorm = LlamaRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+        self.mlp = LlamaMLP(config)
+    def forward(self, hidden_states, rotary_emb=None):  # Add rotary_emb parameter
+        residual = hidden_states
+        hidden_states = self.input_layernorm(hidden_states)
+        hidden_states = self.self_attn(hidden_states, rotary_emb=rotary_emb)
+        hidden_states = residual + hidden_states
+        residual = hidden_states
+        hidden_states = self.post_attention_layernorm(hidden_states)
+        hidden_states = self.mlp(hidden_states)
+        hidden_states = residual + hidden_states
+        return hidden_states
+class LlamaModel(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.config = config
+        self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size)
+        self.rotary_emb = LlamaRotaryEmbedding(dim=64)  # This will create inv_freq of size 16
+        self.layers = nn.ModuleList([LlamaDecoderLayer(config) for _ in range(config.num_hidden_layers)])
+        self.norm = LlamaRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+    def forward(self, input_ids):
+        hidden_states = self.embed_tokens(input_ids)
+        for layer in self.layers:
+            hidden_states = layer(hidden_states, rotary_emb=self.rotary_emb)
+        hidden_states = self.norm(hidden_states)
+        return hidden_states
+class LlamaForCausalLM(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.model = LlamaModel(config)
+        self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
+    def forward(self, input_ids):
+        hidden_states = self.model(input_ids)
+        logits = self.lm_head(hidden_states)
+        return logits, None

src/utils/device_utils.py ADDED Viewed

	@@ -0,0 +1,10 @@

+import torch
+def get_device():
+    """
+    Determine the available device for computation.
+    Returns either CUDA device if available, or CPU.
+    """
+    if torch.cuda.is_available():
+        return torch.device('cuda')
+    return torch.device('cpu')