Spaces:

vuminhtue
/

Qwen3_Sentence_Completion

Sleeping

App Files Files Community

Qwen3_Sentence_Completion / app.py

vuminhtue

Update app.py

31dc810 verified 3 months ago

raw

history blame contribute delete

10.8 kB

	"""
	Qwen3:0.6B Text Generation App for Hugging Face Spaces

	This app allows you to generate text using a trained Qwen3:0.6B model with TinyStories dataset .
	You can control:
	- The starting text (prompt)
	- How many new words to generate (max_new_tokens)
	- How creative the output should be (temperature)
	"""

	import gradio as gr
	import torch
	import tiktoken
	from pathlib import Path
	from huggingface_hub import hf_hub_download

	# Import our Qwen3 model
	from Qwen3_model import Qwen3Model, generate_text_simple, text_to_token_ids, token_ids_to_text


	class TextGenerator:
	"""
	A simple class to load the model and generate text

	This makes it easy to:
	1. Load the trained model once at startup
	2. Generate text multiple times without reloading
	"""

	def __init__(self, repo_id="vuminhtue/qwen3_sentiment_tinystories"):
	"""
	Initialize the text generator

	Parameters:
	-----------
	repo_id : str
	HuggingFace repository ID to download the model from
	Default: "vuminhtue/qwen3_sentiment_tinystories"
	"""
	print("🚀 Loading Qwen3 model from HuggingFace...")
	print(f" Repository: {repo_id}")

	# Configuration for Qwen3 0.6B model
	# These settings define the architecture of the model
	self.config = {
	"vocab_size": 151_936, # Number of different tokens the model knows
	"context_length": 40_960, # Maximum length of text it can process
	"emb_dim": 1024, # Size of the embedding vectors
	"n_heads": 16, # Number of attention heads
	"n_layers": 28, # Number of transformer layers
	"hidden_dim": 3072, # Size of the feed-forward network
	"head_dim": 128, # Size of each attention head
	"qk_norm": True, # Whether to normalize queries and keys
	"n_kv_groups": 8, # Number of key-value groups
	"rope_base": 1_000_000.0, # Base for rotary position encoding
	"dtype": torch.bfloat16, # Data type for model weights
	}

	# Detect if we have a GPU available
	self.device = "cuda" if torch.cuda.is_available() else "cpu"
	print(f" Using device: {self.device}")

	# Load the tokenizer (converts text to numbers and back)
	# We use GPT-2's tokenizer which works well for English text
	self.tokenizer = tiktoken.get_encoding("gpt2")
	print(" ✓ Tokenizer loaded")

	# Download the model file from HuggingFace
	# This will cache the file locally, so it only downloads once
	print(" 📥 Downloading model from HuggingFace (this may take a moment)...")
	try:
	model_path = hf_hub_download(
	repo_id=repo_id,
	filename="Qwen3_200k_model_params.pt",
	repo_type="model"
	)
	print(f" ✓ Model downloaded to: {model_path}")
	except Exception as e:
	print(f" ❌ Error downloading model: {e}")
	raise

	# Create the model with our configuration
	self.model = Qwen3Model(self.config)

	# Load the trained weights from the downloaded file
	print(" ⚙️ Loading model weights...")
	self.model.load_state_dict(
	torch.load(
	model_path,
	map_location=torch.device(self.device),
	weights_only=True
	)
	)

	# Move model to the appropriate device (CPU or GPU)
	self.model = self.model.to(self.device)

	# Set to evaluation mode (disables training-specific features)
	self.model.eval()

	print(" ✓ Model loaded successfully!")
	print("✅ Ready to generate text!\n")

	def generate(self, prompt, max_new_tokens=50, temperature=1.0):
	"""
	Generate text based on a prompt

	Parameters:
	-----------
	prompt : str
	The starting text (what you want the model to continue)
	max_new_tokens : int
	How many new tokens (roughly words) to generate
	temperature : float
	Controls creativity:
	- Lower (0.1-0.7): More predictable, focused
	- Medium (0.8-1.0): Balanced
	- Higher (1.1-2.0): More creative, random

	Returns:
	--------
	str : The generated text (including the original prompt)
	"""
	try:
	# Convert the text prompt to token IDs (numbers)
	input_ids = text_to_token_ids(prompt, self.tokenizer)
	input_ids = input_ids.to(self.device)

	# Generate new tokens
	output_ids = generate_text_simple(
	model=self.model,
	idx=input_ids,
	max_new_tokens=max_new_tokens,
	context_size=self.config["context_length"],
	temperature=temperature
	)

	# Convert the token IDs back to text
	generated_text = token_ids_to_text(output_ids, self.tokenizer)

	return generated_text

	except Exception as e:
	return f"❌ Error generating text: {str(e)}"


	# Initialize the generator once when the app starts
	print("="*70)
	print("INITIALIZING TEXT GENERATION APP")
	print("="*70)
	generator = TextGenerator()


	def generate_text_interface(prompt, max_new_tokens, temperature):
	"""
	Interface function for Gradio

	This function:
	1. Takes inputs from the user interface
	2. Calls our generator
	3. Returns the result to display
	"""
	# Check if prompt is empty
	if not prompt or len(prompt.strip()) == 0:
	return "⚠️ Please enter some text to start with!"

	# Limit max tokens to prevent very long generation times
	max_new_tokens = min(max_new_tokens, 200)

	# Generate text
	result = generator.generate(prompt, max_new_tokens, temperature)

	return result


	# Create the Gradio interface
	# This defines what the web app looks like and how it behaves
	with gr.Blocks(title="Qwen3:0.6B Text Generator", theme=gr.themes.Soft()) as demo:

	# Header
	gr.Markdown(
	"""
	# 🤖 Qwen3:0.6B Text Generator

	Generate creative stories and text using a Qwen3:0.6B model trained on TinyStories!

	### How to use:
	1. Enter your starting text (e.g., "Once upon a time")
	2. Adjust the sliders to control the output
	3. Click Generate to create text
	"""
	)

	# Main content area
	with gr.Row():
	with gr.Column(scale=1):
	# Input section
	gr.Markdown("### 📝 Input")

	prompt_input = gr.Textbox(
	label="Starting Text (Prompt)",
	placeholder="Once upon a time...",
	lines=3,
	info="Enter the text you want the model to continue"
	)

	# Control sliders
	gr.Markdown("### ⚙️ Generation Settings")

	max_tokens_slider = gr.Slider(
	minimum=10,
	maximum=200,
	value=50,
	step=10,
	label="Max New Tokens",
	info="How many new tokens to generate (roughly = number of words)"
	)

	temperature_slider = gr.Slider(
	minimum=0.1,
	maximum=2.0,
	value=1.0,
	step=0.1,
	label="Temperature",
	info="Lower = more predictable, Higher = more creative"
	)

	# Generate button
	generate_btn = gr.Button(
	"✨ Generate Text",
	variant="primary",
	size="lg"
	)

	with gr.Column(scale=1):
	# Output section
	gr.Markdown("### 📖 Generated Text")

	output_text = gr.Textbox(
	label="Result",
	lines=15,
	interactive=False,
	show_copy_button=True
	)

	# Example prompts to try
	gr.Markdown("### 💡 Try these examples:")
	gr.Examples(
	examples=[
	["Once upon a time", 50, 0.8],
	["There was a little girl named", 60, 1.0],
	["In a magical forest", 70, 1.2],
	["A brave knight", 50, 0.7],
	["The sun was shining and", 60, 0.9],
	],
	inputs=[prompt_input, max_tokens_slider, temperature_slider],
	label="Click any example to try it"
	)

	# Information section
	gr.Markdown(
	"""
	---
	### 📊 About This Model

	- Model: Qwen3:0.6B (596M parameters)
	- Training Data: TinyStories dataset (children's stories)
	- Architecture: 28 transformer layers with Grouped Query Attention
	- Model Source: [vuminhtue/qwen3_sentiment_tinystories](https://huggingface.co/vuminhtue/qwen3_sentiment_tinystories)

	### 🎯 Understanding the Parameters

	Max New Tokens:
	- Controls the length of generated text
	- One token ≈ one word (roughly)
	- More tokens = longer output = slower generation

	Temperature:
	- `0.1 - 0.7`: Safe, predictable, focused responses
	- `0.8 - 1.0`: Balanced creativity and coherence
	- `1.1 - 2.0`: Very creative but may be less coherent

	### ⚠️ Note

	This model was trained on children's stories, so it works best for:
	- Simple, clear narratives
	- Stories about everyday situations
	- Children's vocabulary and themes

	---
	Built with Qwen3:0.6B architecture • Trained on TinyStories • Powered by PyTorch • Model hosted on 🤗 HuggingFace
	"""
	)

	# Connect the button to the generation function
	generate_btn.click(
	fn=generate_text_interface,
	inputs=[prompt_input, max_tokens_slider, temperature_slider],
	outputs=output_text
	)

	# Also allow pressing Enter in the text box to generate
	prompt_input.submit(
	fn=generate_text_interface,
	inputs=[prompt_input, max_tokens_slider, temperature_slider],
	outputs=output_text
	)


	# Launch the app
	if __name__ == "__main__":
	print("\n" + "="*70)
	print("LAUNCHING GRADIO APP")
	print("="*70)
	demo.launch()