vuminhtue commited on
Commit
738bcf1
Β·
verified Β·
1 Parent(s): b23c630

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +316 -0
app.py ADDED
@@ -0,0 +1,316 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Qwen3 Text Generation App for Hugging Face Spaces
3
+
4
+ This app allows you to generate text using a trained Qwen3 model.
5
+ You can control:
6
+ - The starting text (prompt)
7
+ - How many new words to generate (max_new_tokens)
8
+ - How creative the output should be (temperature)
9
+ """
10
+
11
+ import gradio as gr
12
+ import torch
13
+ import tiktoken
14
+ from pathlib import Path
15
+ from huggingface_hub import hf_hub_download
16
+
17
+ # Import our Qwen3 model
18
+ from Qwen3_model import Qwen3Model, generate_text_simple, text_to_token_ids, token_ids_to_text
19
+
20
+
21
+ class TextGenerator:
22
+ """
23
+ A simple class to load the model and generate text
24
+
25
+ This makes it easy to:
26
+ 1. Load the trained model once at startup
27
+ 2. Generate text multiple times without reloading
28
+ """
29
+
30
+ def __init__(self, repo_id="vuminhtue/qwen3_sentiment_tinystories"):
31
+ """
32
+ Initialize the text generator
33
+
34
+ Parameters:
35
+ -----------
36
+ repo_id : str
37
+ HuggingFace repository ID to download the model from
38
+ Default: "vuminhtue/qwen3_sentiment_tinystories"
39
+ """
40
+ print("πŸš€ Loading Qwen3 model from HuggingFace...")
41
+ print(f" Repository: {repo_id}")
42
+
43
+ # Configuration for Qwen3 0.6B model
44
+ # These settings define the architecture of the model
45
+ self.config = {
46
+ "vocab_size": 151_936, # Number of different tokens the model knows
47
+ "context_length": 40_960, # Maximum length of text it can process
48
+ "emb_dim": 1024, # Size of the embedding vectors
49
+ "n_heads": 16, # Number of attention heads
50
+ "n_layers": 28, # Number of transformer layers
51
+ "hidden_dim": 3072, # Size of the feed-forward network
52
+ "head_dim": 128, # Size of each attention head
53
+ "qk_norm": True, # Whether to normalize queries and keys
54
+ "n_kv_groups": 8, # Number of key-value groups
55
+ "rope_base": 1_000_000.0, # Base for rotary position encoding
56
+ "dtype": torch.bfloat16, # Data type for model weights
57
+ }
58
+
59
+ # Detect if we have a GPU available
60
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
61
+ print(f" Using device: {self.device}")
62
+
63
+ # Load the tokenizer (converts text to numbers and back)
64
+ # We use GPT-2's tokenizer which works well for English text
65
+ self.tokenizer = tiktoken.get_encoding("gpt2")
66
+ print(" βœ“ Tokenizer loaded")
67
+
68
+ # Download the model file from HuggingFace
69
+ # This will cache the file locally, so it only downloads once
70
+ print(" πŸ“₯ Downloading model from HuggingFace (this may take a moment)...")
71
+ try:
72
+ model_path = hf_hub_download(
73
+ repo_id=repo_id,
74
+ filename="Qwen3_200k_model_params.pt",
75
+ repo_type="model"
76
+ )
77
+ print(f" βœ“ Model downloaded to: {model_path}")
78
+ except Exception as e:
79
+ print(f" ❌ Error downloading model: {e}")
80
+ raise
81
+
82
+ # Create the model with our configuration
83
+ self.model = Qwen3Model(self.config)
84
+
85
+ # Load the trained weights from the downloaded file
86
+ print(" βš™οΈ Loading model weights...")
87
+ self.model.load_state_dict(
88
+ torch.load(
89
+ model_path,
90
+ map_location=torch.device(self.device),
91
+ weights_only=True
92
+ )
93
+ )
94
+
95
+ # Move model to the appropriate device (CPU or GPU)
96
+ self.model = self.model.to(self.device)
97
+
98
+ # Set to evaluation mode (disables training-specific features)
99
+ self.model.eval()
100
+
101
+ print(" βœ“ Model loaded successfully!")
102
+ print("βœ… Ready to generate text!\n")
103
+
104
+ def generate(self, prompt, max_new_tokens=50, temperature=1.0):
105
+ """
106
+ Generate text based on a prompt
107
+
108
+ Parameters:
109
+ -----------
110
+ prompt : str
111
+ The starting text (what you want the model to continue)
112
+ max_new_tokens : int
113
+ How many new tokens (roughly words) to generate
114
+ temperature : float
115
+ Controls creativity:
116
+ - Lower (0.1-0.7): More predictable, focused
117
+ - Medium (0.8-1.0): Balanced
118
+ - Higher (1.1-2.0): More creative, random
119
+
120
+ Returns:
121
+ --------
122
+ str : The generated text (including the original prompt)
123
+ """
124
+ try:
125
+ # Convert the text prompt to token IDs (numbers)
126
+ input_ids = text_to_token_ids(prompt, self.tokenizer)
127
+ input_ids = input_ids.to(self.device)
128
+
129
+ # Generate new tokens
130
+ output_ids = generate_text_simple(
131
+ model=self.model,
132
+ idx=input_ids,
133
+ max_new_tokens=max_new_tokens,
134
+ context_size=self.config["context_length"],
135
+ temperature=temperature
136
+ )
137
+
138
+ # Convert the token IDs back to text
139
+ generated_text = token_ids_to_text(output_ids, self.tokenizer)
140
+
141
+ return generated_text
142
+
143
+ except Exception as e:
144
+ return f"❌ Error generating text: {str(e)}"
145
+
146
+
147
+ # Initialize the generator once when the app starts
148
+ print("="*70)
149
+ print("INITIALIZING TEXT GENERATION APP")
150
+ print("="*70)
151
+ generator = TextGenerator()
152
+
153
+
154
+ def generate_text_interface(prompt, max_new_tokens, temperature):
155
+ """
156
+ Interface function for Gradio
157
+
158
+ This function:
159
+ 1. Takes inputs from the user interface
160
+ 2. Calls our generator
161
+ 3. Returns the result to display
162
+ """
163
+ # Check if prompt is empty
164
+ if not prompt or len(prompt.strip()) == 0:
165
+ return "⚠️ Please enter some text to start with!"
166
+
167
+ # Limit max tokens to prevent very long generation times
168
+ max_new_tokens = min(max_new_tokens, 200)
169
+
170
+ # Generate text
171
+ result = generator.generate(prompt, max_new_tokens, temperature)
172
+
173
+ return result
174
+
175
+
176
+ # Create the Gradio interface
177
+ # This defines what the web app looks like and how it behaves
178
+ with gr.Blocks(title="Qwen3 Text Generator", theme=gr.themes.Soft()) as demo:
179
+
180
+ # Header
181
+ gr.Markdown(
182
+ """
183
+ # πŸ€– Qwen3 Text Generator
184
+
185
+ Generate creative stories and text using a Qwen3 model trained on TinyStories!
186
+
187
+ ### How to use:
188
+ 1. **Enter your starting text** (e.g., "Once upon a time")
189
+ 2. **Adjust the sliders** to control the output
190
+ 3. **Click Generate** to create text
191
+ """
192
+ )
193
+
194
+ # Main content area
195
+ with gr.Row():
196
+ with gr.Column(scale=1):
197
+ # Input section
198
+ gr.Markdown("### πŸ“ Input")
199
+
200
+ prompt_input = gr.Textbox(
201
+ label="Starting Text (Prompt)",
202
+ placeholder="Once upon a time...",
203
+ lines=3,
204
+ info="Enter the text you want the model to continue"
205
+ )
206
+
207
+ # Control sliders
208
+ gr.Markdown("### βš™οΈ Generation Settings")
209
+
210
+ max_tokens_slider = gr.Slider(
211
+ minimum=10,
212
+ maximum=200,
213
+ value=50,
214
+ step=10,
215
+ label="Max New Tokens",
216
+ info="How many new tokens to generate (roughly = number of words)"
217
+ )
218
+
219
+ temperature_slider = gr.Slider(
220
+ minimum=0.1,
221
+ maximum=2.0,
222
+ value=1.0,
223
+ step=0.1,
224
+ label="Temperature",
225
+ info="Lower = more predictable, Higher = more creative"
226
+ )
227
+
228
+ # Generate button
229
+ generate_btn = gr.Button(
230
+ "✨ Generate Text",
231
+ variant="primary",
232
+ size="lg"
233
+ )
234
+
235
+ with gr.Column(scale=1):
236
+ # Output section
237
+ gr.Markdown("### πŸ“– Generated Text")
238
+
239
+ output_text = gr.Textbox(
240
+ label="Result",
241
+ lines=15,
242
+ interactive=False,
243
+ show_copy_button=True
244
+ )
245
+
246
+ # Example prompts to try
247
+ gr.Markdown("### πŸ’‘ Try these examples:")
248
+ gr.Examples(
249
+ examples=[
250
+ ["Once upon a time", 50, 0.8],
251
+ ["There was a little girl named", 60, 1.0],
252
+ ["In a magical forest", 70, 1.2],
253
+ ["A brave knight", 50, 0.7],
254
+ ["The sun was shining and", 60, 0.9],
255
+ ],
256
+ inputs=[prompt_input, max_tokens_slider, temperature_slider],
257
+ label="Click any example to try it"
258
+ )
259
+
260
+ # Information section
261
+ gr.Markdown(
262
+ """
263
+ ---
264
+ ### πŸ“Š About This Model
265
+
266
+ - **Model**: Qwen3 0.6B (596M parameters)
267
+ - **Training Data**: TinyStories dataset (children's stories)
268
+ - **Architecture**: 28 transformer layers with Grouped Query Attention
269
+ - **Model Source**: [vuminhtue/qwen3_sentiment_tinystories](https://huggingface.co/vuminhtue/qwen3_sentiment_tinystories)
270
+
271
+ ### 🎯 Understanding the Parameters
272
+
273
+ **Max New Tokens:**
274
+ - Controls the length of generated text
275
+ - One token β‰ˆ one word (roughly)
276
+ - More tokens = longer output = slower generation
277
+
278
+ **Temperature:**
279
+ - `0.1 - 0.7`: Safe, predictable, focused responses
280
+ - `0.8 - 1.0`: Balanced creativity and coherence
281
+ - `1.1 - 2.0`: Very creative but may be less coherent
282
+
283
+ ### ⚠️ Note
284
+
285
+ This model was trained on children's stories, so it works best for:
286
+ - Simple, clear narratives
287
+ - Stories about everyday situations
288
+ - Children's vocabulary and themes
289
+
290
+ ---
291
+ *Built with Qwen3 architecture β€’ Trained on TinyStories β€’ Powered by PyTorch β€’ Model hosted on πŸ€— HuggingFace*
292
+ """
293
+ )
294
+
295
+ # Connect the button to the generation function
296
+ generate_btn.click(
297
+ fn=generate_text_interface,
298
+ inputs=[prompt_input, max_tokens_slider, temperature_slider],
299
+ outputs=output_text
300
+ )
301
+
302
+ # Also allow pressing Enter in the text box to generate
303
+ prompt_input.submit(
304
+ fn=generate_text_interface,
305
+ inputs=[prompt_input, max_tokens_slider, temperature_slider],
306
+ outputs=output_text
307
+ )
308
+
309
+
310
+ # Launch the app
311
+ if __name__ == "__main__":
312
+ print("\n" + "="*70)
313
+ print("LAUNCHING GRADIO APP")
314
+ print("="*70)
315
+ demo.launch()
316
+