vuminhtue's picture
Update app.py
31dc810 verified
"""
Qwen3:0.6B Text Generation App for Hugging Face Spaces
This app allows you to generate text using a trained Qwen3:0.6B model with TinyStories dataset .
You can control:
- The starting text (prompt)
- How many new words to generate (max_new_tokens)
- How creative the output should be (temperature)
"""
import gradio as gr
import torch
import tiktoken
from pathlib import Path
from huggingface_hub import hf_hub_download
# Import our Qwen3 model
from Qwen3_model import Qwen3Model, generate_text_simple, text_to_token_ids, token_ids_to_text
class TextGenerator:
"""
A simple class to load the model and generate text
This makes it easy to:
1. Load the trained model once at startup
2. Generate text multiple times without reloading
"""
def __init__(self, repo_id="vuminhtue/qwen3_sentiment_tinystories"):
"""
Initialize the text generator
Parameters:
-----------
repo_id : str
HuggingFace repository ID to download the model from
Default: "vuminhtue/qwen3_sentiment_tinystories"
"""
print("πŸš€ Loading Qwen3 model from HuggingFace...")
print(f" Repository: {repo_id}")
# Configuration for Qwen3 0.6B model
# These settings define the architecture of the model
self.config = {
"vocab_size": 151_936, # Number of different tokens the model knows
"context_length": 40_960, # Maximum length of text it can process
"emb_dim": 1024, # Size of the embedding vectors
"n_heads": 16, # Number of attention heads
"n_layers": 28, # Number of transformer layers
"hidden_dim": 3072, # Size of the feed-forward network
"head_dim": 128, # Size of each attention head
"qk_norm": True, # Whether to normalize queries and keys
"n_kv_groups": 8, # Number of key-value groups
"rope_base": 1_000_000.0, # Base for rotary position encoding
"dtype": torch.bfloat16, # Data type for model weights
}
# Detect if we have a GPU available
self.device = "cuda" if torch.cuda.is_available() else "cpu"
print(f" Using device: {self.device}")
# Load the tokenizer (converts text to numbers and back)
# We use GPT-2's tokenizer which works well for English text
self.tokenizer = tiktoken.get_encoding("gpt2")
print(" βœ“ Tokenizer loaded")
# Download the model file from HuggingFace
# This will cache the file locally, so it only downloads once
print(" πŸ“₯ Downloading model from HuggingFace (this may take a moment)...")
try:
model_path = hf_hub_download(
repo_id=repo_id,
filename="Qwen3_200k_model_params.pt",
repo_type="model"
)
print(f" βœ“ Model downloaded to: {model_path}")
except Exception as e:
print(f" ❌ Error downloading model: {e}")
raise
# Create the model with our configuration
self.model = Qwen3Model(self.config)
# Load the trained weights from the downloaded file
print(" βš™οΈ Loading model weights...")
self.model.load_state_dict(
torch.load(
model_path,
map_location=torch.device(self.device),
weights_only=True
)
)
# Move model to the appropriate device (CPU or GPU)
self.model = self.model.to(self.device)
# Set to evaluation mode (disables training-specific features)
self.model.eval()
print(" βœ“ Model loaded successfully!")
print("βœ… Ready to generate text!\n")
def generate(self, prompt, max_new_tokens=50, temperature=1.0):
"""
Generate text based on a prompt
Parameters:
-----------
prompt : str
The starting text (what you want the model to continue)
max_new_tokens : int
How many new tokens (roughly words) to generate
temperature : float
Controls creativity:
- Lower (0.1-0.7): More predictable, focused
- Medium (0.8-1.0): Balanced
- Higher (1.1-2.0): More creative, random
Returns:
--------
str : The generated text (including the original prompt)
"""
try:
# Convert the text prompt to token IDs (numbers)
input_ids = text_to_token_ids(prompt, self.tokenizer)
input_ids = input_ids.to(self.device)
# Generate new tokens
output_ids = generate_text_simple(
model=self.model,
idx=input_ids,
max_new_tokens=max_new_tokens,
context_size=self.config["context_length"],
temperature=temperature
)
# Convert the token IDs back to text
generated_text = token_ids_to_text(output_ids, self.tokenizer)
return generated_text
except Exception as e:
return f"❌ Error generating text: {str(e)}"
# Initialize the generator once when the app starts
print("="*70)
print("INITIALIZING TEXT GENERATION APP")
print("="*70)
generator = TextGenerator()
def generate_text_interface(prompt, max_new_tokens, temperature):
"""
Interface function for Gradio
This function:
1. Takes inputs from the user interface
2. Calls our generator
3. Returns the result to display
"""
# Check if prompt is empty
if not prompt or len(prompt.strip()) == 0:
return "⚠️ Please enter some text to start with!"
# Limit max tokens to prevent very long generation times
max_new_tokens = min(max_new_tokens, 200)
# Generate text
result = generator.generate(prompt, max_new_tokens, temperature)
return result
# Create the Gradio interface
# This defines what the web app looks like and how it behaves
with gr.Blocks(title="Qwen3:0.6B Text Generator", theme=gr.themes.Soft()) as demo:
# Header
gr.Markdown(
"""
# πŸ€– Qwen3:0.6B Text Generator
Generate creative stories and text using a Qwen3:0.6B model trained on TinyStories!
### How to use:
1. **Enter your starting text** (e.g., "Once upon a time")
2. **Adjust the sliders** to control the output
3. **Click Generate** to create text
"""
)
# Main content area
with gr.Row():
with gr.Column(scale=1):
# Input section
gr.Markdown("### πŸ“ Input")
prompt_input = gr.Textbox(
label="Starting Text (Prompt)",
placeholder="Once upon a time...",
lines=3,
info="Enter the text you want the model to continue"
)
# Control sliders
gr.Markdown("### βš™οΈ Generation Settings")
max_tokens_slider = gr.Slider(
minimum=10,
maximum=200,
value=50,
step=10,
label="Max New Tokens",
info="How many new tokens to generate (roughly = number of words)"
)
temperature_slider = gr.Slider(
minimum=0.1,
maximum=2.0,
value=1.0,
step=0.1,
label="Temperature",
info="Lower = more predictable, Higher = more creative"
)
# Generate button
generate_btn = gr.Button(
"✨ Generate Text",
variant="primary",
size="lg"
)
with gr.Column(scale=1):
# Output section
gr.Markdown("### πŸ“– Generated Text")
output_text = gr.Textbox(
label="Result",
lines=15,
interactive=False,
show_copy_button=True
)
# Example prompts to try
gr.Markdown("### πŸ’‘ Try these examples:")
gr.Examples(
examples=[
["Once upon a time", 50, 0.8],
["There was a little girl named", 60, 1.0],
["In a magical forest", 70, 1.2],
["A brave knight", 50, 0.7],
["The sun was shining and", 60, 0.9],
],
inputs=[prompt_input, max_tokens_slider, temperature_slider],
label="Click any example to try it"
)
# Information section
gr.Markdown(
"""
---
### πŸ“Š About This Model
- **Model**: Qwen3:0.6B (596M parameters)
- **Training Data**: TinyStories dataset (children's stories)
- **Architecture**: 28 transformer layers with Grouped Query Attention
- **Model Source**: [vuminhtue/qwen3_sentiment_tinystories](https://huggingface.co/vuminhtue/qwen3_sentiment_tinystories)
### 🎯 Understanding the Parameters
**Max New Tokens:**
- Controls the length of generated text
- One token β‰ˆ one word (roughly)
- More tokens = longer output = slower generation
**Temperature:**
- `0.1 - 0.7`: Safe, predictable, focused responses
- `0.8 - 1.0`: Balanced creativity and coherence
- `1.1 - 2.0`: Very creative but may be less coherent
### ⚠️ Note
This model was trained on children's stories, so it works best for:
- Simple, clear narratives
- Stories about everyday situations
- Children's vocabulary and themes
---
*Built with Qwen3:0.6B architecture β€’ Trained on TinyStories β€’ Powered by PyTorch β€’ Model hosted on πŸ€— HuggingFace*
"""
)
# Connect the button to the generation function
generate_btn.click(
fn=generate_text_interface,
inputs=[prompt_input, max_tokens_slider, temperature_slider],
outputs=output_text
)
# Also allow pressing Enter in the text box to generate
prompt_input.submit(
fn=generate_text_interface,
inputs=[prompt_input, max_tokens_slider, temperature_slider],
outputs=output_text
)
# Launch the app
if __name__ == "__main__":
print("\n" + "="*70)
print("LAUNCHING GRADIO APP")
print("="*70)
demo.launch()