""" Qwen3:0.6B Text Generation App for Hugging Face Spaces This app allows you to generate text using a trained Qwen3:0.6B model with TinyStories dataset . You can control: - The starting text (prompt) - How many new words to generate (max_new_tokens) - How creative the output should be (temperature) """ import gradio as gr import torch import tiktoken from pathlib import Path from huggingface_hub import hf_hub_download # Import our Qwen3 model from Qwen3_model import Qwen3Model, generate_text_simple, text_to_token_ids, token_ids_to_text class TextGenerator: """ A simple class to load the model and generate text This makes it easy to: 1. Load the trained model once at startup 2. Generate text multiple times without reloading """ def __init__(self, repo_id="vuminhtue/qwen3_sentiment_tinystories"): """ Initialize the text generator Parameters: ----------- repo_id : str HuggingFace repository ID to download the model from Default: "vuminhtue/qwen3_sentiment_tinystories" """ print("🚀 Loading Qwen3 model from HuggingFace...") print(f" Repository: {repo_id}") # Configuration for Qwen3 0.6B model # These settings define the architecture of the model self.config = { "vocab_size": 151_936, # Number of different tokens the model knows "context_length": 40_960, # Maximum length of text it can process "emb_dim": 1024, # Size of the embedding vectors "n_heads": 16, # Number of attention heads "n_layers": 28, # Number of transformer layers "hidden_dim": 3072, # Size of the feed-forward network "head_dim": 128, # Size of each attention head "qk_norm": True, # Whether to normalize queries and keys "n_kv_groups": 8, # Number of key-value groups "rope_base": 1_000_000.0, # Base for rotary position encoding "dtype": torch.bfloat16, # Data type for model weights } # Detect if we have a GPU available self.device = "cuda" if torch.cuda.is_available() else "cpu" print(f" Using device: {self.device}") # Load the tokenizer (converts text to numbers and back) # We use GPT-2's tokenizer which works well for English text self.tokenizer = tiktoken.get_encoding("gpt2") print(" ✓ Tokenizer loaded") # Download the model file from HuggingFace # This will cache the file locally, so it only downloads once print(" 📥 Downloading model from HuggingFace (this may take a moment)...") try: model_path = hf_hub_download( repo_id=repo_id, filename="Qwen3_200k_model_params.pt", repo_type="model" ) print(f" ✓ Model downloaded to: {model_path}") except Exception as e: print(f" ❌ Error downloading model: {e}") raise # Create the model with our configuration self.model = Qwen3Model(self.config) # Load the trained weights from the downloaded file print(" ⚙️ Loading model weights...") self.model.load_state_dict( torch.load( model_path, map_location=torch.device(self.device), weights_only=True ) ) # Move model to the appropriate device (CPU or GPU) self.model = self.model.to(self.device) # Set to evaluation mode (disables training-specific features) self.model.eval() print(" ✓ Model loaded successfully!") print("✅ Ready to generate text!\n") def generate(self, prompt, max_new_tokens=50, temperature=1.0): """ Generate text based on a prompt Parameters: ----------- prompt : str The starting text (what you want the model to continue) max_new_tokens : int How many new tokens (roughly words) to generate temperature : float Controls creativity: - Lower (0.1-0.7): More predictable, focused - Medium (0.8-1.0): Balanced - Higher (1.1-2.0): More creative, random Returns: -------- str : The generated text (including the original prompt) """ try: # Convert the text prompt to token IDs (numbers) input_ids = text_to_token_ids(prompt, self.tokenizer) input_ids = input_ids.to(self.device) # Generate new tokens output_ids = generate_text_simple( model=self.model, idx=input_ids, max_new_tokens=max_new_tokens, context_size=self.config["context_length"], temperature=temperature ) # Convert the token IDs back to text generated_text = token_ids_to_text(output_ids, self.tokenizer) return generated_text except Exception as e: return f"❌ Error generating text: {str(e)}" # Initialize the generator once when the app starts print("="*70) print("INITIALIZING TEXT GENERATION APP") print("="*70) generator = TextGenerator() def generate_text_interface(prompt, max_new_tokens, temperature): """ Interface function for Gradio This function: 1. Takes inputs from the user interface 2. Calls our generator 3. Returns the result to display """ # Check if prompt is empty if not prompt or len(prompt.strip()) == 0: return "⚠️ Please enter some text to start with!" # Limit max tokens to prevent very long generation times max_new_tokens = min(max_new_tokens, 200) # Generate text result = generator.generate(prompt, max_new_tokens, temperature) return result # Create the Gradio interface # This defines what the web app looks like and how it behaves with gr.Blocks(title="Qwen3:0.6B Text Generator", theme=gr.themes.Soft()) as demo: # Header gr.Markdown( """ # 🤖 Qwen3:0.6B Text Generator Generate creative stories and text using a Qwen3:0.6B model trained on TinyStories! ### How to use: 1. **Enter your starting text** (e.g., "Once upon a time") 2. **Adjust the sliders** to control the output 3. **Click Generate** to create text """ ) # Main content area with gr.Row(): with gr.Column(scale=1): # Input section gr.Markdown("### 📝 Input") prompt_input = gr.Textbox( label="Starting Text (Prompt)", placeholder="Once upon a time...", lines=3, info="Enter the text you want the model to continue" ) # Control sliders gr.Markdown("### ⚙️ Generation Settings") max_tokens_slider = gr.Slider( minimum=10, maximum=200, value=50, step=10, label="Max New Tokens", info="How many new tokens to generate (roughly = number of words)" ) temperature_slider = gr.Slider( minimum=0.1, maximum=2.0, value=1.0, step=0.1, label="Temperature", info="Lower = more predictable, Higher = more creative" ) # Generate button generate_btn = gr.Button( "✨ Generate Text", variant="primary", size="lg" ) with gr.Column(scale=1): # Output section gr.Markdown("### 📖 Generated Text") output_text = gr.Textbox( label="Result", lines=15, interactive=False, show_copy_button=True ) # Example prompts to try gr.Markdown("### 💡 Try these examples:") gr.Examples( examples=[ ["Once upon a time", 50, 0.8], ["There was a little girl named", 60, 1.0], ["In a magical forest", 70, 1.2], ["A brave knight", 50, 0.7], ["The sun was shining and", 60, 0.9], ], inputs=[prompt_input, max_tokens_slider, temperature_slider], label="Click any example to try it" ) # Information section gr.Markdown( """ --- ### 📊 About This Model - **Model**: Qwen3:0.6B (596M parameters) - **Training Data**: TinyStories dataset (children's stories) - **Architecture**: 28 transformer layers with Grouped Query Attention - **Model Source**: [vuminhtue/qwen3_sentiment_tinystories](https://huggingface.co/vuminhtue/qwen3_sentiment_tinystories) ### 🎯 Understanding the Parameters **Max New Tokens:** - Controls the length of generated text - One token ≈ one word (roughly) - More tokens = longer output = slower generation **Temperature:** - `0.1 - 0.7`: Safe, predictable, focused responses - `0.8 - 1.0`: Balanced creativity and coherence - `1.1 - 2.0`: Very creative but may be less coherent ### ⚠️ Note This model was trained on children's stories, so it works best for: - Simple, clear narratives - Stories about everyday situations - Children's vocabulary and themes --- *Built with Qwen3:0.6B architecture • Trained on TinyStories • Powered by PyTorch • Model hosted on 🤗 HuggingFace* """ ) # Connect the button to the generation function generate_btn.click( fn=generate_text_interface, inputs=[prompt_input, max_tokens_slider, temperature_slider], outputs=output_text ) # Also allow pressing Enter in the text box to generate prompt_input.submit( fn=generate_text_interface, inputs=[prompt_input, max_tokens_slider, temperature_slider], outputs=output_text ) # Launch the app if __name__ == "__main__": print("\n" + "="*70) print("LAUNCHING GRADIO APP") print("="*70) demo.launch()