Spaces:

vuminhtue
/

Qwen3_Sentence_Completion

Sleeping

App Files Files Community

vuminhtue commited on Oct 15, 2025

Commit

738bcf1

verified ·

1 Parent(s): b23c630

Upload app.py

Browse files

Files changed (1) hide show

app.py +316 -0

app.py ADDED Viewed

	@@ -0,0 +1,316 @@

+"""
+Qwen3 Text Generation App for Hugging Face Spaces
+This app allows you to generate text using a trained Qwen3 model.
+You can control:
+- The starting text (prompt)
+- How many new words to generate (max_new_tokens)
+- How creative the output should be (temperature)
+"""
+import gradio as gr
+import torch
+import tiktoken
+from pathlib import Path
+from huggingface_hub import hf_hub_download
+# Import our Qwen3 model
+from Qwen3_model import Qwen3Model, generate_text_simple, text_to_token_ids, token_ids_to_text
+class TextGenerator:
+    """
+    A simple class to load the model and generate text
+    This makes it easy to:
+    1. Load the trained model once at startup
+    2. Generate text multiple times without reloading
+    """
+    def __init__(self, repo_id="vuminhtue/qwen3_sentiment_tinystories"):
+        """
+        Initialize the text generator
+        Parameters:
+        -----------
+        repo_id : str
+            HuggingFace repository ID to download the model from
+            Default: "vuminhtue/qwen3_sentiment_tinystories"
+        """
+        print("🚀 Loading Qwen3 model from HuggingFace...")
+        print(f"   Repository: {repo_id}")
+        # Configuration for Qwen3 0.6B model
+        # These settings define the architecture of the model
+        self.config = {
+            "vocab_size": 151_936,      # Number of different tokens the model knows
+            "context_length": 40_960,   # Maximum length of text it can process
+            "emb_dim": 1024,            # Size of the embedding vectors
+            "n_heads": 16,              # Number of attention heads
+            "n_layers": 28,             # Number of transformer layers
+            "hidden_dim": 3072,         # Size of the feed-forward network
+            "head_dim": 128,            # Size of each attention head
+            "qk_norm": True,            # Whether to normalize queries and keys
+            "n_kv_groups": 8,           # Number of key-value groups
+            "rope_base": 1_000_000.0,   # Base for rotary position encoding
+            "dtype": torch.bfloat16,    # Data type for model weights
+        }
+        # Detect if we have a GPU available
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        print(f"   Using device: {self.device}")
+        # Load the tokenizer (converts text to numbers and back)
+        # We use GPT-2's tokenizer which works well for English text
+        self.tokenizer = tiktoken.get_encoding("gpt2")
+        print("   ✓ Tokenizer loaded")
+        # Download the model file from HuggingFace
+        # This will cache the file locally, so it only downloads once
+        print("   📥 Downloading model from HuggingFace (this may take a moment)...")
+        try:
+            model_path = hf_hub_download(
+                repo_id=repo_id,
+                filename="Qwen3_200k_model_params.pt",
+                repo_type="model"
+            )
+            print(f"   ✓ Model downloaded to: {model_path}")
+        except Exception as e:
+            print(f"   ❌ Error downloading model: {e}")
+            raise
+        # Create the model with our configuration
+        self.model = Qwen3Model(self.config)
+        # Load the trained weights from the downloaded file
+        print("   ⚙️  Loading model weights...")
+        self.model.load_state_dict(
+            torch.load(
+                model_path,
+                map_location=torch.device(self.device),
+                weights_only=True
+            )
+        )
+        # Move model to the appropriate device (CPU or GPU)
+        self.model = self.model.to(self.device)
+        # Set to evaluation mode (disables training-specific features)
+        self.model.eval()
+        print("   ✓ Model loaded successfully!")
+        print("✅ Ready to generate text!\n")
+    def generate(self, prompt, max_new_tokens=50, temperature=1.0):
+        """
+        Generate text based on a prompt
+        Parameters:
+        -----------
+        prompt : str
+            The starting text (what you want the model to continue)
+        max_new_tokens : int
+            How many new tokens (roughly words) to generate
+        temperature : float
+            Controls creativity:
+            - Lower (0.1-0.7): More predictable, focused
+            - Medium (0.8-1.0): Balanced
+            - Higher (1.1-2.0): More creative, random
+        Returns:
+        --------
+        str : The generated text (including the original prompt)
+        """
+        try:
+            # Convert the text prompt to token IDs (numbers)
+            input_ids = text_to_token_ids(prompt, self.tokenizer)
+            input_ids = input_ids.to(self.device)
+            # Generate new tokens
+            output_ids = generate_text_simple(
+                model=self.model,
+                idx=input_ids,
+                max_new_tokens=max_new_tokens,
+                context_size=self.config["context_length"],
+                temperature=temperature
+            )
+            # Convert the token IDs back to text
+            generated_text = token_ids_to_text(output_ids, self.tokenizer)
+            return generated_text
+        except Exception as e:
+            return f"❌ Error generating text: {str(e)}"
+# Initialize the generator once when the app starts
+print("="*70)
+print("INITIALIZING TEXT GENERATION APP")
+print("="*70)
+generator = TextGenerator()
+def generate_text_interface(prompt, max_new_tokens, temperature):
+    """
+    Interface function for Gradio
+    This function:
+    1. Takes inputs from the user interface
+    2. Calls our generator
+    3. Returns the result to display
+    """
+    # Check if prompt is empty
+    if not prompt or len(prompt.strip()) == 0:
+        return "⚠️ Please enter some text to start with!"
+    # Limit max tokens to prevent very long generation times
+    max_new_tokens = min(max_new_tokens, 200)
+    # Generate text
+    result = generator.generate(prompt, max_new_tokens, temperature)
+    return result
+# Create the Gradio interface
+# This defines what the web app looks like and how it behaves
+with gr.Blocks(title="Qwen3 Text Generator", theme=gr.themes.Soft()) as demo:
+    # Header
+    gr.Markdown(
+        """
+        # 🤖 Qwen3 Text Generator
+        Generate creative stories and text using a Qwen3 model trained on TinyStories!
+        ### How to use:
+        1. **Enter your starting text** (e.g., "Once upon a time")
+        2. **Adjust the sliders** to control the output
+        3. **Click Generate** to create text
+        """
+    )
+    # Main content area
+    with gr.Row():
+        with gr.Column(scale=1):
+            # Input section
+            gr.Markdown("### 📝 Input")
+            prompt_input = gr.Textbox(
+                label="Starting Text (Prompt)",
+                placeholder="Once upon a time...",
+                lines=3,
+                info="Enter the text you want the model to continue"
+            )
+            # Control sliders
+            gr.Markdown("### ⚙️ Generation Settings")
+            max_tokens_slider = gr.Slider(
+                minimum=10,
+                maximum=200,
+                value=50,
+                step=10,
+                label="Max New Tokens",
+                info="How many new tokens to generate (roughly = number of words)"
+            )
+            temperature_slider = gr.Slider(
+                minimum=0.1,
+                maximum=2.0,
+                value=1.0,
+                step=0.1,
+                label="Temperature",
+                info="Lower = more predictable, Higher = more creative"
+            )
+            # Generate button
+            generate_btn = gr.Button(
+                "✨ Generate Text",
+                variant="primary",
+                size="lg"
+            )
+        with gr.Column(scale=1):
+            # Output section
+            gr.Markdown("### 📖 Generated Text")
+            output_text = gr.Textbox(
+                label="Result",
+                lines=15,
+                interactive=False,
+                show_copy_button=True
+            )
+    # Example prompts to try
+    gr.Markdown("### 💡 Try these examples:")
+    gr.Examples(
+        examples=[
+            ["Once upon a time", 50, 0.8],
+            ["There was a little girl named", 60, 1.0],
+            ["In a magical forest", 70, 1.2],
+            ["A brave knight", 50, 0.7],
+            ["The sun was shining and", 60, 0.9],
+        ],
+        inputs=[prompt_input, max_tokens_slider, temperature_slider],
+        label="Click any example to try it"
+    )
+    # Information section
+    gr.Markdown(
+        """
+        ---
+        ### 📊 About This Model
+        - **Model**: Qwen3 0.6B (596M parameters)
+        - **Training Data**: TinyStories dataset (children's stories)
+        - **Architecture**: 28 transformer layers with Grouped Query Attention
+        - **Model Source**: [vuminhtue/qwen3_sentiment_tinystories](https://huggingface.co/vuminhtue/qwen3_sentiment_tinystories)
+        ### 🎯 Understanding the Parameters
+        **Max New Tokens:**
+        - Controls the length of generated text
+        - One token ≈ one word (roughly)
+        - More tokens = longer output = slower generation
+        **Temperature:**
+        - `0.1 - 0.7`: Safe, predictable, focused responses
+        - `0.8 - 1.0`: Balanced creativity and coherence
+        - `1.1 - 2.0`: Very creative but may be less coherent
+        ### ⚠️ Note
+        This model was trained on children's stories, so it works best for:
+        - Simple, clear narratives
+        - Stories about everyday situations
+        - Children's vocabulary and themes
+        ---
+        *Built with Qwen3 architecture • Trained on TinyStories • Powered by PyTorch • Model hosted on 🤗 HuggingFace*
+        """
+    )
+    # Connect the button to the generation function
+    generate_btn.click(
+        fn=generate_text_interface,
+        inputs=[prompt_input, max_tokens_slider, temperature_slider],
+        outputs=output_text
+    )
+    # Also allow pressing Enter in the text box to generate
+    prompt_input.submit(
+        fn=generate_text_interface,
+        inputs=[prompt_input, max_tokens_slider, temperature_slider],
+        outputs=output_text
+    )
+# Launch the app
+if __name__ == "__main__":
+    print("\n" + "="*70)
+    print("LAUNCHING GRADIO APP")
+    print("="*70)
+    demo.launch()