Spaces:
Sleeping
Sleeping
| """ | |
| Qwen3:0.6B Text Generation App for Hugging Face Spaces | |
| This app allows you to generate text using a trained Qwen3:0.6B model with TinyStories dataset . | |
| You can control: | |
| - The starting text (prompt) | |
| - How many new words to generate (max_new_tokens) | |
| - How creative the output should be (temperature) | |
| """ | |
| import gradio as gr | |
| import torch | |
| import tiktoken | |
| from pathlib import Path | |
| from huggingface_hub import hf_hub_download | |
| # Import our Qwen3 model | |
| from Qwen3_model import Qwen3Model, generate_text_simple, text_to_token_ids, token_ids_to_text | |
| class TextGenerator: | |
| """ | |
| A simple class to load the model and generate text | |
| This makes it easy to: | |
| 1. Load the trained model once at startup | |
| 2. Generate text multiple times without reloading | |
| """ | |
| def __init__(self, repo_id="vuminhtue/qwen3_sentiment_tinystories"): | |
| """ | |
| Initialize the text generator | |
| Parameters: | |
| ----------- | |
| repo_id : str | |
| HuggingFace repository ID to download the model from | |
| Default: "vuminhtue/qwen3_sentiment_tinystories" | |
| """ | |
| print("π Loading Qwen3 model from HuggingFace...") | |
| print(f" Repository: {repo_id}") | |
| # Configuration for Qwen3 0.6B model | |
| # These settings define the architecture of the model | |
| self.config = { | |
| "vocab_size": 151_936, # Number of different tokens the model knows | |
| "context_length": 40_960, # Maximum length of text it can process | |
| "emb_dim": 1024, # Size of the embedding vectors | |
| "n_heads": 16, # Number of attention heads | |
| "n_layers": 28, # Number of transformer layers | |
| "hidden_dim": 3072, # Size of the feed-forward network | |
| "head_dim": 128, # Size of each attention head | |
| "qk_norm": True, # Whether to normalize queries and keys | |
| "n_kv_groups": 8, # Number of key-value groups | |
| "rope_base": 1_000_000.0, # Base for rotary position encoding | |
| "dtype": torch.bfloat16, # Data type for model weights | |
| } | |
| # Detect if we have a GPU available | |
| self.device = "cuda" if torch.cuda.is_available() else "cpu" | |
| print(f" Using device: {self.device}") | |
| # Load the tokenizer (converts text to numbers and back) | |
| # We use GPT-2's tokenizer which works well for English text | |
| self.tokenizer = tiktoken.get_encoding("gpt2") | |
| print(" β Tokenizer loaded") | |
| # Download the model file from HuggingFace | |
| # This will cache the file locally, so it only downloads once | |
| print(" π₯ Downloading model from HuggingFace (this may take a moment)...") | |
| try: | |
| model_path = hf_hub_download( | |
| repo_id=repo_id, | |
| filename="Qwen3_200k_model_params.pt", | |
| repo_type="model" | |
| ) | |
| print(f" β Model downloaded to: {model_path}") | |
| except Exception as e: | |
| print(f" β Error downloading model: {e}") | |
| raise | |
| # Create the model with our configuration | |
| self.model = Qwen3Model(self.config) | |
| # Load the trained weights from the downloaded file | |
| print(" βοΈ Loading model weights...") | |
| self.model.load_state_dict( | |
| torch.load( | |
| model_path, | |
| map_location=torch.device(self.device), | |
| weights_only=True | |
| ) | |
| ) | |
| # Move model to the appropriate device (CPU or GPU) | |
| self.model = self.model.to(self.device) | |
| # Set to evaluation mode (disables training-specific features) | |
| self.model.eval() | |
| print(" β Model loaded successfully!") | |
| print("β Ready to generate text!\n") | |
| def generate(self, prompt, max_new_tokens=50, temperature=1.0): | |
| """ | |
| Generate text based on a prompt | |
| Parameters: | |
| ----------- | |
| prompt : str | |
| The starting text (what you want the model to continue) | |
| max_new_tokens : int | |
| How many new tokens (roughly words) to generate | |
| temperature : float | |
| Controls creativity: | |
| - Lower (0.1-0.7): More predictable, focused | |
| - Medium (0.8-1.0): Balanced | |
| - Higher (1.1-2.0): More creative, random | |
| Returns: | |
| -------- | |
| str : The generated text (including the original prompt) | |
| """ | |
| try: | |
| # Convert the text prompt to token IDs (numbers) | |
| input_ids = text_to_token_ids(prompt, self.tokenizer) | |
| input_ids = input_ids.to(self.device) | |
| # Generate new tokens | |
| output_ids = generate_text_simple( | |
| model=self.model, | |
| idx=input_ids, | |
| max_new_tokens=max_new_tokens, | |
| context_size=self.config["context_length"], | |
| temperature=temperature | |
| ) | |
| # Convert the token IDs back to text | |
| generated_text = token_ids_to_text(output_ids, self.tokenizer) | |
| return generated_text | |
| except Exception as e: | |
| return f"β Error generating text: {str(e)}" | |
| # Initialize the generator once when the app starts | |
| print("="*70) | |
| print("INITIALIZING TEXT GENERATION APP") | |
| print("="*70) | |
| generator = TextGenerator() | |
| def generate_text_interface(prompt, max_new_tokens, temperature): | |
| """ | |
| Interface function for Gradio | |
| This function: | |
| 1. Takes inputs from the user interface | |
| 2. Calls our generator | |
| 3. Returns the result to display | |
| """ | |
| # Check if prompt is empty | |
| if not prompt or len(prompt.strip()) == 0: | |
| return "β οΈ Please enter some text to start with!" | |
| # Limit max tokens to prevent very long generation times | |
| max_new_tokens = min(max_new_tokens, 200) | |
| # Generate text | |
| result = generator.generate(prompt, max_new_tokens, temperature) | |
| return result | |
| # Create the Gradio interface | |
| # This defines what the web app looks like and how it behaves | |
| with gr.Blocks(title="Qwen3:0.6B Text Generator", theme=gr.themes.Soft()) as demo: | |
| # Header | |
| gr.Markdown( | |
| """ | |
| # π€ Qwen3:0.6B Text Generator | |
| Generate creative stories and text using a Qwen3:0.6B model trained on TinyStories! | |
| ### How to use: | |
| 1. **Enter your starting text** (e.g., "Once upon a time") | |
| 2. **Adjust the sliders** to control the output | |
| 3. **Click Generate** to create text | |
| """ | |
| ) | |
| # Main content area | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| # Input section | |
| gr.Markdown("### π Input") | |
| prompt_input = gr.Textbox( | |
| label="Starting Text (Prompt)", | |
| placeholder="Once upon a time...", | |
| lines=3, | |
| info="Enter the text you want the model to continue" | |
| ) | |
| # Control sliders | |
| gr.Markdown("### βοΈ Generation Settings") | |
| max_tokens_slider = gr.Slider( | |
| minimum=10, | |
| maximum=200, | |
| value=50, | |
| step=10, | |
| label="Max New Tokens", | |
| info="How many new tokens to generate (roughly = number of words)" | |
| ) | |
| temperature_slider = gr.Slider( | |
| minimum=0.1, | |
| maximum=2.0, | |
| value=1.0, | |
| step=0.1, | |
| label="Temperature", | |
| info="Lower = more predictable, Higher = more creative" | |
| ) | |
| # Generate button | |
| generate_btn = gr.Button( | |
| "β¨ Generate Text", | |
| variant="primary", | |
| size="lg" | |
| ) | |
| with gr.Column(scale=1): | |
| # Output section | |
| gr.Markdown("### π Generated Text") | |
| output_text = gr.Textbox( | |
| label="Result", | |
| lines=15, | |
| interactive=False, | |
| show_copy_button=True | |
| ) | |
| # Example prompts to try | |
| gr.Markdown("### π‘ Try these examples:") | |
| gr.Examples( | |
| examples=[ | |
| ["Once upon a time", 50, 0.8], | |
| ["There was a little girl named", 60, 1.0], | |
| ["In a magical forest", 70, 1.2], | |
| ["A brave knight", 50, 0.7], | |
| ["The sun was shining and", 60, 0.9], | |
| ], | |
| inputs=[prompt_input, max_tokens_slider, temperature_slider], | |
| label="Click any example to try it" | |
| ) | |
| # Information section | |
| gr.Markdown( | |
| """ | |
| --- | |
| ### π About This Model | |
| - **Model**: Qwen3:0.6B (596M parameters) | |
| - **Training Data**: TinyStories dataset (children's stories) | |
| - **Architecture**: 28 transformer layers with Grouped Query Attention | |
| - **Model Source**: [vuminhtue/qwen3_sentiment_tinystories](https://huggingface.co/vuminhtue/qwen3_sentiment_tinystories) | |
| ### π― Understanding the Parameters | |
| **Max New Tokens:** | |
| - Controls the length of generated text | |
| - One token β one word (roughly) | |
| - More tokens = longer output = slower generation | |
| **Temperature:** | |
| - `0.1 - 0.7`: Safe, predictable, focused responses | |
| - `0.8 - 1.0`: Balanced creativity and coherence | |
| - `1.1 - 2.0`: Very creative but may be less coherent | |
| ### β οΈ Note | |
| This model was trained on children's stories, so it works best for: | |
| - Simple, clear narratives | |
| - Stories about everyday situations | |
| - Children's vocabulary and themes | |
| --- | |
| *Built with Qwen3:0.6B architecture β’ Trained on TinyStories β’ Powered by PyTorch β’ Model hosted on π€ HuggingFace* | |
| """ | |
| ) | |
| # Connect the button to the generation function | |
| generate_btn.click( | |
| fn=generate_text_interface, | |
| inputs=[prompt_input, max_tokens_slider, temperature_slider], | |
| outputs=output_text | |
| ) | |
| # Also allow pressing Enter in the text box to generate | |
| prompt_input.submit( | |
| fn=generate_text_interface, | |
| inputs=[prompt_input, max_tokens_slider, temperature_slider], | |
| outputs=output_text | |
| ) | |
| # Launch the app | |
| if __name__ == "__main__": | |
| print("\n" + "="*70) | |
| print("LAUNCHING GRADIO APP") | |
| print("="*70) | |
| demo.launch() | |