Spaces:

vuminhtue
/

Qwen3_Sentence_Completion

Sleeping

App Files Files Community

vuminhtue commited on Oct 14, 2025

Commit

45c4371

verified ·

1 Parent(s): 0d05af9

Upload 4 files

Browse files

Files changed (4) hide show

Qwen3_model.py +445 -0
README.md +191 -12
app.py +316 -0
requirements.txt +18 -0

Qwen3_model.py ADDED Viewed

	@@ -0,0 +1,445 @@

+"""
+Qwen3 Model Implementation
+This file contains the complete Qwen3 model architecture and helper functions
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import math
+# ============================================================================
+# Helper Functions for Text Generation
+# ============================================================================
+def text_to_token_ids(text, tokenizer):
+    """
+    Convert text to token IDs using the tokenizer
+    Parameters:
+    -----------
+    text : str
+        Input text to tokenize
+    tokenizer : tiktoken tokenizer
+        The tokenizer to use (e.g., tiktoken.get_encoding("gpt2"))
+    Returns:
+    --------
+    torch.Tensor : Token IDs as a tensor with shape [1, num_tokens]
+    """
+    encoded = tokenizer.encode(text, allowed_special={'<|endoftext|>'})
+    encoded_tensor = torch.tensor(encoded).unsqueeze(0)  # Add batch dimension
+    return encoded_tensor
+def token_ids_to_text(token_ids, tokenizer):
+    """
+    Convert token IDs back to text
+    Parameters:
+    -----------
+    token_ids : torch.Tensor
+        Token IDs with shape [batch_size, num_tokens]
+    tokenizer : tiktoken tokenizer
+        The tokenizer to use
+    Returns:
+    --------
+    str : Decoded text
+    """
+    flat = token_ids.squeeze(0)  # Remove batch dimension
+    return tokenizer.decode(flat.tolist())
+def generate_text_simple(model, idx, max_new_tokens, context_size, temperature=1.0, top_k=None):
+    """
+    Generate text using the model
+    This function generates text one token at a time by:
+    1. Getting the model's predictions for the next token
+    2. Applying temperature to control randomness
+    3. Optionally using top-k sampling to limit choices
+    4. Sampling the next token and adding it to the sequence
+    Parameters:
+    -----------
+    model : Qwen3Model
+        The trained Qwen3 model
+    idx : torch.Tensor
+        Starting token IDs with shape [batch_size, sequence_length]
+    max_new_tokens : int
+        How many new tokens to generate
+    context_size : int
+        Maximum context length the model can handle
+    temperature : float
+        Controls randomness (lower = more predictable, higher = more random)
+        - temperature < 1.0: More focused/deterministic
+        - temperature = 1.0: Normal sampling
+        - temperature > 1.0: More random/creative
+    top_k : int or None
+        If set, only sample from the top k most likely tokens
+    Returns:
+    --------
+    torch.Tensor : Token IDs including both input and generated tokens
+    """
+    model.eval()  # Set model to evaluation mode
+    # Generate tokens one at a time
+    for _ in range(max_new_tokens):
+        # Crop context if it exceeds the model's maximum context size
+        idx_cond = idx if idx.size(1) <= context_size else idx[:, -context_size:]
+        # Get model predictions
+        with torch.no_grad():
+            logits, _ = model(idx_cond)
+        # Focus only on the last time step (the next token prediction)
+        logits = logits[:, -1, :]  # Shape: [batch_size, vocab_size]
+        # Apply temperature scaling
+        # Lower temperature makes the model more confident in top choices
+        # Higher temperature makes the distribution more uniform (more random)
+        logits = logits / temperature
+        # Optional: Apply top-k filtering
+        # This limits sampling to only the k most likely tokens
+        if top_k is not None:
+            v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
+            logits[logits < v[:, [-1]]] = float('-inf')
+        # Convert logits to probabilities
+        probs = F.softmax(logits, dim=-1)
+        # Sample the next token
+        idx_next = torch.multinomial(probs, num_samples=1)
+        # Append sampled token to the sequence
+        idx = torch.cat((idx, idx_next), dim=1)
+    return idx
+# ============================================================================
+# Model Architecture Components
+# ============================================================================
+class RMSNorm(nn.Module):
+    """
+    Root Mean Square Layer Normalization
+    RMSNorm is simpler and more efficient than LayerNorm.
+    Instead of normalizing using mean and variance, it only uses the root mean square.
+    """
+    def __init__(self, emb_dim, eps=1e-6, bias=False, qwen3_compatible=True):
+        super().__init__()
+        self.eps = eps
+        self.qwen3_compatible = qwen3_compatible
+        self.scale = nn.Parameter(torch.ones(emb_dim))
+        self.shift = nn.Parameter(torch.zeros(emb_dim)) if bias else None
+    def forward(self, x):
+        input_dtype = x.dtype
+        if self.qwen3_compatible:
+            x = x.to(torch.float32)
+        # Calculate variance using mean of squares
+        variance = x.pow(2).mean(dim=-1, keepdim=True)
+        # Normalize
+        norm_x = x * torch.rsqrt(variance + self.eps)
+        norm_x = norm_x * self.scale
+        if self.shift is not None:
+            norm_x = norm_x + self.shift
+        return norm_x.to(input_dtype)
+def compute_rope_params(head_dim, theta_base=10_000, context_length=4096, dtype=torch.float32):
+    """
+    Compute Rotary Position Embedding (RoPE) parameters
+    RoPE encodes position by rotating token embeddings.
+    This allows the model to understand relative positions between tokens.
+    """
+    assert head_dim % 2 == 0, "Embedding dimension must be even"
+    # Compute the inverse frequencies
+    inv_freq = 1.0 / (theta_base ** (torch.arange(0, head_dim, 2, dtype=dtype)[: (head_dim // 2)].float() / head_dim))
+    # Generate position indices
+    positions = torch.arange(context_length, dtype=dtype)
+    # Compute the angles
+    angles = positions[:, None] * inv_freq[None, :]
+    # Expand angles to match the head_dim
+    angles = torch.cat([angles, angles], dim=1)
+    # Precompute sine and cosine
+    cos = torch.cos(angles)
+    sin = torch.sin(angles)
+    return cos, sin
+def apply_rope(x, cos, sin):
+    """
+    Apply Rotary Position Embedding to input tensor
+    This rotates the embeddings based on their position in the sequence.
+    """
+    batch_size, num_heads, seq_len, head_dim = x.shape
+    assert head_dim % 2 == 0, "Head dimension must be even"
+    # Split x into first half and second half
+    x1 = x[..., : head_dim // 2]
+    x2 = x[..., head_dim // 2 :]
+    # Adjust sin and cos shapes
+    cos = cos[:seq_len, :].unsqueeze(0).unsqueeze(0)
+    sin = sin[:seq_len, :].unsqueeze(0).unsqueeze(0)
+    # Apply the rotary transformation
+    rotated = torch.cat((-x2, x1), dim=-1)
+    x_rotated = (x * cos) + (rotated * sin)
+    return x_rotated.to(dtype=x.dtype)
+class GroupedQueryAttention(nn.Module):
+    """
+    Grouped Query Attention (GQA)
+    GQA is more efficient than standard multi-head attention.
+    It shares Key and Value projections across multiple Query heads,
+    reducing the number of parameters while maintaining performance.
+    """
+    def __init__(self, d_in, num_heads, num_kv_groups, head_dim=None, qk_norm=False, dtype=None):
+        super().__init__()
+        assert num_heads % num_kv_groups == 0, "num_heads must be divisible by num_kv_groups"
+        self.num_heads = num_heads
+        self.num_kv_groups = num_kv_groups
+        self.group_size = num_heads // num_kv_groups
+        if head_dim is None:
+            assert d_in % num_heads == 0, "`d_in` must be divisible by `num_heads` if `head_dim` is not set"
+            head_dim = d_in // num_heads
+        self.head_dim = head_dim
+        self.d_out = num_heads * head_dim
+        self.W_query = nn.Linear(d_in, self.d_out, bias=False, dtype=dtype)
+        self.W_key = nn.Linear(d_in, num_kv_groups * head_dim, bias=False, dtype=dtype)
+        self.W_value = nn.Linear(d_in, num_kv_groups * head_dim, bias=False, dtype=dtype)
+        self.out_proj = nn.Linear(self.d_out, d_in, bias=False, dtype=dtype)
+        if qk_norm:
+            self.q_norm = RMSNorm(head_dim, eps=1e-6)
+            self.k_norm = RMSNorm(head_dim, eps=1e-6)
+        else:
+            self.q_norm = self.k_norm = None
+    def forward(self, x, mask, cos, sin):
+        b, num_tokens, _ = x.shape
+        # Apply projections
+        queries = self.W_query(x)
+        keys = self.W_key(x)
+        values = self.W_value(x)
+        # Reshape
+        queries = queries.view(b, num_tokens, self.num_heads, self.head_dim).transpose(1, 2)
+        keys = keys.view(b, num_tokens, self.num_kv_groups, self.head_dim).transpose(1, 2)
+        values = values.view(b, num_tokens, self.num_kv_groups, self.head_dim).transpose(1, 2)
+        # Optional normalization
+        if self.q_norm:
+            queries = self.q_norm(queries)
+        if self.k_norm:
+            keys = self.k_norm(keys)
+        # Apply RoPE
+        queries = apply_rope(queries, cos, sin)
+        keys = apply_rope(keys, cos, sin)
+        # Expand K and V to match number of heads
+        keys = keys.repeat_interleave(self.group_size, dim=1)
+        values = values.repeat_interleave(self.group_size, dim=1)
+        # Attention
+        attn_scores = queries @ keys.transpose(2, 3)
+        attn_scores = attn_scores.masked_fill(mask, -torch.inf)
+        attn_weights = torch.softmax(attn_scores / self.head_dim**0.5, dim=-1)
+        context = (attn_weights @ values).transpose(1, 2).reshape(b, num_tokens, self.d_out)
+        return self.out_proj(context)
+class FeedForward(nn.Module):
+    """
+    Feed-Forward Network used in transformer blocks
+    This applies two linear transformations with a SiLU activation in between.
+    The hidden dimension is typically larger than the embedding dimension,
+    allowing the model to learn complex patterns.
+    """
+    def __init__(self, cfg):
+        super().__init__()
+        self.fc1 = nn.Linear(cfg["emb_dim"], cfg["hidden_dim"], dtype=cfg["dtype"], bias=False)
+        self.fc2 = nn.Linear(cfg["emb_dim"], cfg["hidden_dim"], dtype=cfg["dtype"], bias=False)
+        self.fc3 = nn.Linear(cfg["hidden_dim"], cfg["emb_dim"], dtype=cfg["dtype"], bias=False)
+    def forward(self, x):
+        x_fc1 = self.fc1(x)
+        x_fc2 = self.fc2(x)
+        x = nn.functional.silu(x_fc1) * x_fc2
+        return self.fc3(x)
+class TransformerBlock(nn.Module):
+    """
+    A single Transformer Block
+    Each block consists of:
+    1. Grouped Query Attention for processing relationships between tokens
+    2. Feed-Forward Network for processing each token independently
+    3. Residual connections and normalization for stable training
+    """
+    def __init__(self, cfg):
+        super().__init__()
+        self.att = GroupedQueryAttention(
+            d_in=cfg["emb_dim"],
+            num_heads=cfg["n_heads"],
+            head_dim=cfg["head_dim"],
+            num_kv_groups=cfg["n_kv_groups"],
+            qk_norm=cfg["qk_norm"],
+            dtype=cfg["dtype"]
+        )
+        self.ff = FeedForward(cfg)
+        self.norm1 = RMSNorm(cfg["emb_dim"], eps=1e-6)
+        self.norm2 = RMSNorm(cfg["emb_dim"], eps=1e-6)
+    def forward(self, x, mask, cos, sin):
+        # Attention block with residual connection
+        shortcut = x
+        x = self.norm1(x)
+        x = self.att(x, mask, cos, sin)
+        x = x + shortcut
+        # Feed-forward block with residual connection
+        shortcut = x
+        x = self.norm2(x)
+        x = self.ff(x)
+        x = x + shortcut
+        return x
+class Qwen3Model(nn.Module):
+    """
+    Complete Qwen3 Language Model
+    This model can:
+    1. Take token IDs as input
+    2. Process them through multiple transformer layers
+    3. Output predictions for the next token
+    4. Generate new text autoregressively
+    """
+    def __init__(self, cfg):
+        super().__init__()
+        # Token embedding layer
+        self.tok_emb = nn.Embedding(cfg["vocab_size"], cfg["emb_dim"], dtype=cfg["dtype"])
+        # Stack of transformer blocks
+        self.trf_blocks = nn.ModuleList(
+            [TransformerBlock(cfg) for _ in range(cfg["n_layers"])]
+        )
+        # Final normalization and output projection
+        self.final_norm = RMSNorm(cfg["emb_dim"])
+        self.out_head = nn.Linear(cfg["emb_dim"], cfg["vocab_size"], bias=False, dtype=cfg["dtype"])
+        # Precompute RoPE parameters
+        if cfg["head_dim"] is None:
+            head_dim = cfg["emb_dim"] // cfg["n_heads"]
+        else:
+            head_dim = cfg["head_dim"]
+        cos, sin = compute_rope_params(
+            head_dim=head_dim,
+            theta_base=cfg["rope_base"],
+            context_length=cfg["context_length"]
+        )
+        self.register_buffer("cos", cos, persistent=False)
+        self.register_buffer("sin", sin, persistent=False)
+        self.cfg = cfg
+    def forward(self, in_idx, targets=None):
+        """
+        Forward pass through the model
+        Parameters:
+        -----------
+        in_idx : torch.Tensor
+            Input token IDs with shape [batch_size, sequence_length]
+        targets : torch.Tensor or None
+            Target token IDs for computing loss (used during training)
+        Returns:
+        --------
+        logits : torch.Tensor
+            Predictions for next tokens with shape [batch_size, sequence_length, vocab_size]
+        loss : torch.Tensor or None
+            Cross-entropy loss if targets are provided, otherwise None
+        """
+        # Get token embeddings
+        tok_embeds = self.tok_emb(in_idx)
+        x = tok_embeds
+        # Create causal mask (prevents looking at future tokens)
+        num_tokens = x.shape[1]
+        mask = torch.triu(torch.ones(num_tokens, num_tokens, device=x.device, dtype=torch.bool), diagonal=1)
+        # Pass through all transformer blocks
+        for block in self.trf_blocks:
+            x = block(x, mask, self.cos, self.sin)
+        # Final normalization and projection to vocabulary
+        x = self.final_norm(x)
+        logits = self.out_head(x.to(self.cfg["dtype"]))
+        # Compute loss if targets are provided
+        loss = None
+        if targets is not None:
+            loss = F.cross_entropy(logits.reshape(-1, logits.size(-1)), targets.reshape(-1))
+        return logits, loss
+    @torch.no_grad()
+    def generate(self, idx, max_new_tokens, temperature=1.0, top_k=None):
+        """
+        Generate new tokens autoregressively
+        This is a convenience method that wraps the generation logic.
+        For more details, see the generate_text_simple function.
+        """
+        for _ in range(max_new_tokens):
+            ctx_len = self.cfg["context_length"]
+            idx_cond = idx if idx.size(1) <= ctx_len else idx[:, -ctx_len:]
+            logits, _ = self(idx_cond)
+            logits = logits[:, -1, :] / temperature
+            if top_k is not None:
+                v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
+                logits[logits < v[:, [-1]]] = float("-inf")
+            probs = F.softmax(logits, dim=-1)
+            idx_next = torch.multinomial(probs, num_samples=1)
+            idx = torch.cat((idx, idx_next), dim=1)
+        return idx

README.md CHANGED Viewed

@@ -1,14 +1,193 @@
 ---
-title: Qwen3 Sentence Completion
-emoji: 👁
-colorFrom: yellow
-colorTo: green
-sdk: gradio
-sdk_version: 5.49.1
-app_file: app.py
-pinned: false
-license: apache-2.0
-short_description: Sentence completion task, trained from Qwen3 0.6B model
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# Qwen3 Text Generator
+A text generation application using the Qwen3 0.6B model trained on TinyStories dataset.
+## 🚀 Quick Start
+### Running Locally
+1. Make sure you have the required files:
+   - `app.py` - The Gradio interface
+   - `Qwen3_model.py` - The model architecture
+   - `Qwen3_200k_model_params.pt` - Your trained model weights
+   - `requirements.txt` - Python dependencies
+2. Install dependencies:
+```bash
+pip install -r requirements.txt
+```
+3. Run the app:
+```bash
+python app.py
+```
+4. Open your browser to the URL shown (usually http://127.0.0.1:7860)
+## 📤 Deploying to HuggingFace Spaces
+### Step 1: Prepare Your Files
+You need these files in your repository:
+- `app.py` - Main application
+- `Qwen3_model.py` - Model architecture
+- `Qwen3_200k_model_params.pt` - Your trained model weights
+- `requirements.txt` - Dependencies
+- `README.md` - This file
+### Step 2: Create a HuggingFace Space
+1. Go to https://huggingface.co/new-space
+2. Fill in the details:
+   - **Space name**: Choose a name (e.g., "qwen3-text-generator")
+   - **License**: Select your preferred license
+   - **Select the SDK**: Choose **Gradio**
+   - **Space hardware**: Start with "CPU basic" (free)
+3. Click "Create Space"
+### Step 3: Upload Your Files
+You have two options:
+#### Option A: Using Git (Recommended)
+```bash
+# Clone your new space
+git clone https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE_NAME
+cd YOUR_SPACE_NAME
+# Copy your files
+cp /path/to/app.py .
+cp /path/to/Qwen3_model.py .
+cp /path/to/Qwen3_200k_model_params.pt .
+cp /path/to/requirements.txt .
+cp /path/to/README.md .
+# Commit and push
+git add .
+git commit -m "Initial commit: Add Qwen3 text generator"
+git push
+```
+#### Option B: Using the Web Interface
+1. On your Space page, click "Files" → "Add file" → "Upload files"
+2. Drag and drop or select all your files
+3. Click "Commit to main"
+### Step 4: Wait for Build
+- HuggingFace will automatically build and deploy your app
+- This may take 5-10 minutes
+- You'll see build logs in the "App" tab
+### Step 5: Test Your App
+Once the build is complete, your app will be live at:
+`https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE_NAME`
+## 🎮 How to Use the App
+1. **Enter Starting Text**: Type the beginning of your story (e.g., "Once upon a time")
+2. **Adjust Max New Tokens**:
+   - Controls how much text to generate
+   - 10-50: Short continuation
+   - 50-100: Medium paragraph
+   - 100-200: Long passage
+3. **Adjust Temperature**:
+   - 0.1-0.7: More predictable, focused text
+   - 0.8-1.0: Balanced creativity
+   - 1.1-2.0: Very creative, more random
+4. **Click Generate**: Watch as the model continues your story!
+## 📊 Model Information
+- **Architecture**: Qwen3 0.6B
+- **Parameters**: 596 million (unique parameters)
+- **Training Data**: TinyStories dataset
+- **Best For**: Simple narratives, children's stories, everyday situations
+## 🔧 Troubleshooting
+### Model File Too Large
+If your model file (`Qwen3_200k_model_params.pt`) is larger than 100MB, you'll need to use Git LFS:
+```bash
+# Install Git LFS
+git lfs install
+# Track large files
+git lfs track "*.pt"
+# Add and commit
+git add .gitattributes
+git add Qwen3_200k_model_params.pt
+git commit -m "Add model with LFS"
+git push
+```
+### Out of Memory Error
+If you get memory errors:
+1. Go to your Space settings
+2. Upgrade to a better hardware tier (may require payment)
+3. Or optimize your model file size
+### App Not Loading
+1. Check the build logs in the "App" tab
+2. Make sure all files are uploaded correctly
+3. Verify `requirements.txt` has all necessary packages
+4. Check that file names match exactly (case-sensitive)
+## 💡 Tips for Better Results
+1. **Good Prompts**: Start with clear, simple sentences
+   - ✅ "Once upon a time, there was a little girl"
+   - ❌ "Explain quantum physics"
+2. **Temperature Selection**:
+   - Use lower temperature (0.5-0.7) for coherent stories
+   - Use higher temperature (1.0-1.5) for creative variety
+3. **Token Length**:
+   - Start with 30-50 tokens to see the style
+   - Increase if you want longer passages
+## 📝 File Structure
+```
+.
+├── app.py                          # Main Gradio application
+├── Qwen3_model.py                  # Model architecture and helpers
+├── Qwen3_200k_model_params.pt      # Trained model weights
+├── requirements.txt                # Python dependencies
+└── README.md                       # This file
+```
+## 🤝 Contributing
+Feel free to:
+- Report issues
+- Suggest improvements
+- Share your generated stories!
+## 📜 License
+This project uses the Qwen3 architecture. Please check the license for your specific use case.
+## 🙏 Acknowledgments
+- Qwen3 architecture from Alibaba Cloud
+- Training approach inspired by "LLMs from Scratch"
+- TinyStories dataset for training data
 ---
+**Enjoy generating creative stories! 📚✨**

app.py ADDED Viewed

	@@ -0,0 +1,316 @@

+"""
+Qwen3 Text Generation App for Hugging Face Spaces
+This app allows you to generate text using a trained Qwen3 model.
+You can control:
+- The starting text (prompt)
+- How many new words to generate (max_new_tokens)
+- How creative the output should be (temperature)
+"""
+import gradio as gr
+import torch
+import tiktoken
+from pathlib import Path
+from huggingface_hub import hf_hub_download
+# Import our Qwen3 model
+from Qwen3_model import Qwen3Model, generate_text_simple, text_to_token_ids, token_ids_to_text
+class TextGenerator:
+    """
+    A simple class to load the model and generate text
+    This makes it easy to:
+    1. Load the trained model once at startup
+    2. Generate text multiple times without reloading
+    """
+    def __init__(self, repo_id="vuminhtue/qwen3_sentiment_tinystories"):
+        """
+        Initialize the text generator
+        Parameters:
+        -----------
+        repo_id : str
+            HuggingFace repository ID to download the model from
+            Default: "vuminhtue/qwen3_sentiment_tinystories"
+        """
+        print("🚀 Loading Qwen3 model from HuggingFace...")
+        print(f"   Repository: {repo_id}")
+        # Configuration for Qwen3 0.6B model
+        # These settings define the architecture of the model
+        self.config = {
+            "vocab_size": 151_936,      # Number of different tokens the model knows
+            "context_length": 40_960,   # Maximum length of text it can process
+            "emb_dim": 1024,            # Size of the embedding vectors
+            "n_heads": 16,              # Number of attention heads
+            "n_layers": 28,             # Number of transformer layers
+            "hidden_dim": 3072,         # Size of the feed-forward network
+            "head_dim": 128,            # Size of each attention head
+            "qk_norm": True,            # Whether to normalize queries and keys
+            "n_kv_groups": 8,           # Number of key-value groups
+            "rope_base": 1_000_000.0,   # Base for rotary position encoding
+            "dtype": torch.bfloat16,    # Data type for model weights
+        }
+        # Detect if we have a GPU available
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        print(f"   Using device: {self.device}")
+        # Load the tokenizer (converts text to numbers and back)
+        # We use GPT-2's tokenizer which works well for English text
+        self.tokenizer = tiktoken.get_encoding("gpt2")
+        print("   ✓ Tokenizer loaded")
+        # Download the model file from HuggingFace
+        # This will cache the file locally, so it only downloads once
+        print("   📥 Downloading model from HuggingFace (this may take a moment)...")
+        try:
+            model_path = hf_hub_download(
+                repo_id=repo_id,
+                filename="Qwen3_200k_model_params.pt",
+                repo_type="model"
+            )
+            print(f"   ✓ Model downloaded to: {model_path}")
+        except Exception as e:
+            print(f"   ❌ Error downloading model: {e}")
+            raise
+        # Create the model with our configuration
+        self.model = Qwen3Model(self.config)
+        # Load the trained weights from the downloaded file
+        print("   ⚙️  Loading model weights...")
+        self.model.load_state_dict(
+            torch.load(
+                model_path,
+                map_location=torch.device(self.device),
+                weights_only=True
+            )
+        )
+        # Move model to the appropriate device (CPU or GPU)
+        self.model = self.model.to(self.device)
+        # Set to evaluation mode (disables training-specific features)
+        self.model.eval()
+        print("   ✓ Model loaded successfully!")
+        print("✅ Ready to generate text!\n")
+    def generate(self, prompt, max_new_tokens=50, temperature=1.0):
+        """
+        Generate text based on a prompt
+        Parameters:
+        -----------
+        prompt : str
+            The starting text (what you want the model to continue)
+        max_new_tokens : int
+            How many new tokens (roughly words) to generate
+        temperature : float
+            Controls creativity:
+            - Lower (0.1-0.7): More predictable, focused
+            - Medium (0.8-1.0): Balanced
+            - Higher (1.1-2.0): More creative, random
+        Returns:
+        --------
+        str : The generated text (including the original prompt)
+        """
+        try:
+            # Convert the text prompt to token IDs (numbers)
+            input_ids = text_to_token_ids(prompt, self.tokenizer)
+            input_ids = input_ids.to(self.device)
+            # Generate new tokens
+            output_ids = generate_text_simple(
+                model=self.model,
+                idx=input_ids,
+                max_new_tokens=max_new_tokens,
+                context_size=self.config["context_length"],
+                temperature=temperature
+            )
+            # Convert the token IDs back to text
+            generated_text = token_ids_to_text(output_ids, self.tokenizer)
+            return generated_text
+        except Exception as e:
+            return f"❌ Error generating text: {str(e)}"
+# Initialize the generator once when the app starts
+print("="*70)
+print("INITIALIZING TEXT GENERATION APP")
+print("="*70)
+generator = TextGenerator()
+def generate_text_interface(prompt, max_new_tokens, temperature):
+    """
+    Interface function for Gradio
+    This function:
+    1. Takes inputs from the user interface
+    2. Calls our generator
+    3. Returns the result to display
+    """
+    # Check if prompt is empty
+    if not prompt or len(prompt.strip()) == 0:
+        return "⚠️ Please enter some text to start with!"
+    # Limit max tokens to prevent very long generation times
+    max_new_tokens = min(max_new_tokens, 200)
+    # Generate text
+    result = generator.generate(prompt, max_new_tokens, temperature)
+    return result
+# Create the Gradio interface
+# This defines what the web app looks like and how it behaves
+with gr.Blocks(title="Qwen3 Text Generator", theme=gr.themes.Soft()) as demo:
+    # Header
+    gr.Markdown(
+        """
+        # 🤖 Qwen3 Text Generator
+        Generate creative stories and text using a Qwen3 model trained on TinyStories!
+        ### How to use:
+        1. **Enter your starting text** (e.g., "Once upon a time")
+        2. **Adjust the sliders** to control the output
+        3. **Click Generate** to create text
+        """
+    )
+    # Main content area
+    with gr.Row():
+        with gr.Column(scale=1):
+            # Input section
+            gr.Markdown("### 📝 Input")
+            prompt_input = gr.Textbox(
+                label="Starting Text (Prompt)",
+                placeholder="Once upon a time...",
+                lines=3,
+                info="Enter the text you want the model to continue"
+            )
+            # Control sliders
+            gr.Markdown("### ⚙️ Generation Settings")
+            max_tokens_slider = gr.Slider(
+                minimum=10,
+                maximum=200,
+                value=50,
+                step=10,
+                label="Max New Tokens",
+                info="How many new tokens to generate (roughly = number of words)"
+            )
+            temperature_slider = gr.Slider(
+                minimum=0.1,
+                maximum=2.0,
+                value=1.0,
+                step=0.1,
+                label="Temperature",
+                info="Lower = more predictable, Higher = more creative"
+            )
+            # Generate button
+            generate_btn = gr.Button(
+                "✨ Generate Text",
+                variant="primary",
+                size="lg"
+            )
+        with gr.Column(scale=1):
+            # Output section
+            gr.Markdown("### 📖 Generated Text")
+            output_text = gr.Textbox(
+                label="Result",
+                lines=15,
+                interactive=False,
+                show_copy_button=True
+            )
+    # Example prompts to try
+    gr.Markdown("### 💡 Try these examples:")
+    gr.Examples(
+        examples=[
+            ["Once upon a time", 50, 0.8],
+            ["There was a little girl named", 60, 1.0],
+            ["In a magical forest", 70, 1.2],
+            ["A brave knight", 50, 0.7],
+            ["The sun was shining and", 60, 0.9],
+        ],
+        inputs=[prompt_input, max_tokens_slider, temperature_slider],
+        label="Click any example to try it"
+    )
+    # Information section
+    gr.Markdown(
+        """
+        ---
+        ### 📊 About This Model
+        - **Model**: Qwen3 0.6B (596M parameters)
+        - **Training Data**: TinyStories dataset (children's stories)
+        - **Architecture**: 28 transformer layers with Grouped Query Attention
+        - **Model Source**: [vuminhtue/qwen3_sentiment_tinystories](https://huggingface.co/vuminhtue/qwen3_sentiment_tinystories)
+        ### 🎯 Understanding the Parameters
+        **Max New Tokens:**
+        - Controls the length of generated text
+        - One token ≈ one word (roughly)
+        - More tokens = longer output = slower generation
+        **Temperature:**
+        - `0.1 - 0.7`: Safe, predictable, focused responses
+        - `0.8 - 1.0`: Balanced creativity and coherence
+        - `1.1 - 2.0`: Very creative but may be less coherent
+        ### ⚠️ Note
+        This model was trained on children's stories, so it works best for:
+        - Simple, clear narratives
+        - Stories about everyday situations
+        - Children's vocabulary and themes
+        ---
+        *Built with Qwen3 architecture • Trained on TinyStories • Powered by PyTorch • Model hosted on 🤗 HuggingFace*
+        """
+    )
+    # Connect the button to the generation function
+    generate_btn.click(
+        fn=generate_text_interface,
+        inputs=[prompt_input, max_tokens_slider, temperature_slider],
+        outputs=output_text
+    )
+    # Also allow pressing Enter in the text box to generate
+    prompt_input.submit(
+        fn=generate_text_interface,
+        inputs=[prompt_input, max_tokens_slider, temperature_slider],
+        outputs=output_text
+    )
+# Launch the app
+if __name__ == "__main__":
+    print("\n" + "="*70)
+    print("LAUNCHING GRADIO APP")
+    print("="*70)
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,18 @@

+# Requirements for Hugging Face Spaces
+# This file lists all dependencies needed to run your Qwen3 text generator
+# Core ML libraries
+torch>=2.0.0
+tiktoken>=0.5.0
+numpy>=1.24.0
+# HuggingFace Hub for downloading models
+huggingface_hub>=0.16.0
+# Gradio for web interface
+gradio>=4.0.0
+# Optional but recommended
+scikit-learn>=1.3.0
+joblib>=1.3.0
+pandas>=2.0.0