vuminhtue commited on
Commit
45c4371
·
verified ·
1 Parent(s): 0d05af9

Upload 4 files

Browse files
Files changed (4) hide show
  1. Qwen3_model.py +445 -0
  2. README.md +191 -12
  3. app.py +316 -0
  4. requirements.txt +18 -0
Qwen3_model.py ADDED
@@ -0,0 +1,445 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Qwen3 Model Implementation
3
+ This file contains the complete Qwen3 model architecture and helper functions
4
+ """
5
+
6
+ import torch
7
+ import torch.nn as nn
8
+ import torch.nn.functional as F
9
+ import math
10
+
11
+
12
+ # ============================================================================
13
+ # Helper Functions for Text Generation
14
+ # ============================================================================
15
+
16
+ def text_to_token_ids(text, tokenizer):
17
+ """
18
+ Convert text to token IDs using the tokenizer
19
+
20
+ Parameters:
21
+ -----------
22
+ text : str
23
+ Input text to tokenize
24
+ tokenizer : tiktoken tokenizer
25
+ The tokenizer to use (e.g., tiktoken.get_encoding("gpt2"))
26
+
27
+ Returns:
28
+ --------
29
+ torch.Tensor : Token IDs as a tensor with shape [1, num_tokens]
30
+ """
31
+ encoded = tokenizer.encode(text, allowed_special={'<|endoftext|>'})
32
+ encoded_tensor = torch.tensor(encoded).unsqueeze(0) # Add batch dimension
33
+ return encoded_tensor
34
+
35
+
36
+ def token_ids_to_text(token_ids, tokenizer):
37
+ """
38
+ Convert token IDs back to text
39
+
40
+ Parameters:
41
+ -----------
42
+ token_ids : torch.Tensor
43
+ Token IDs with shape [batch_size, num_tokens]
44
+ tokenizer : tiktoken tokenizer
45
+ The tokenizer to use
46
+
47
+ Returns:
48
+ --------
49
+ str : Decoded text
50
+ """
51
+ flat = token_ids.squeeze(0) # Remove batch dimension
52
+ return tokenizer.decode(flat.tolist())
53
+
54
+
55
+ def generate_text_simple(model, idx, max_new_tokens, context_size, temperature=1.0, top_k=None):
56
+ """
57
+ Generate text using the model
58
+
59
+ This function generates text one token at a time by:
60
+ 1. Getting the model's predictions for the next token
61
+ 2. Applying temperature to control randomness
62
+ 3. Optionally using top-k sampling to limit choices
63
+ 4. Sampling the next token and adding it to the sequence
64
+
65
+ Parameters:
66
+ -----------
67
+ model : Qwen3Model
68
+ The trained Qwen3 model
69
+ idx : torch.Tensor
70
+ Starting token IDs with shape [batch_size, sequence_length]
71
+ max_new_tokens : int
72
+ How many new tokens to generate
73
+ context_size : int
74
+ Maximum context length the model can handle
75
+ temperature : float
76
+ Controls randomness (lower = more predictable, higher = more random)
77
+ - temperature < 1.0: More focused/deterministic
78
+ - temperature = 1.0: Normal sampling
79
+ - temperature > 1.0: More random/creative
80
+ top_k : int or None
81
+ If set, only sample from the top k most likely tokens
82
+
83
+ Returns:
84
+ --------
85
+ torch.Tensor : Token IDs including both input and generated tokens
86
+ """
87
+ model.eval() # Set model to evaluation mode
88
+
89
+ # Generate tokens one at a time
90
+ for _ in range(max_new_tokens):
91
+ # Crop context if it exceeds the model's maximum context size
92
+ idx_cond = idx if idx.size(1) <= context_size else idx[:, -context_size:]
93
+
94
+ # Get model predictions
95
+ with torch.no_grad():
96
+ logits, _ = model(idx_cond)
97
+
98
+ # Focus only on the last time step (the next token prediction)
99
+ logits = logits[:, -1, :] # Shape: [batch_size, vocab_size]
100
+
101
+ # Apply temperature scaling
102
+ # Lower temperature makes the model more confident in top choices
103
+ # Higher temperature makes the distribution more uniform (more random)
104
+ logits = logits / temperature
105
+
106
+ # Optional: Apply top-k filtering
107
+ # This limits sampling to only the k most likely tokens
108
+ if top_k is not None:
109
+ v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
110
+ logits[logits < v[:, [-1]]] = float('-inf')
111
+
112
+ # Convert logits to probabilities
113
+ probs = F.softmax(logits, dim=-1)
114
+
115
+ # Sample the next token
116
+ idx_next = torch.multinomial(probs, num_samples=1)
117
+
118
+ # Append sampled token to the sequence
119
+ idx = torch.cat((idx, idx_next), dim=1)
120
+
121
+ return idx
122
+
123
+
124
+ # ============================================================================
125
+ # Model Architecture Components
126
+ # ============================================================================
127
+
128
+ class RMSNorm(nn.Module):
129
+ """
130
+ Root Mean Square Layer Normalization
131
+
132
+ RMSNorm is simpler and more efficient than LayerNorm.
133
+ Instead of normalizing using mean and variance, it only uses the root mean square.
134
+ """
135
+ def __init__(self, emb_dim, eps=1e-6, bias=False, qwen3_compatible=True):
136
+ super().__init__()
137
+ self.eps = eps
138
+ self.qwen3_compatible = qwen3_compatible
139
+ self.scale = nn.Parameter(torch.ones(emb_dim))
140
+ self.shift = nn.Parameter(torch.zeros(emb_dim)) if bias else None
141
+
142
+ def forward(self, x):
143
+ input_dtype = x.dtype
144
+
145
+ if self.qwen3_compatible:
146
+ x = x.to(torch.float32)
147
+
148
+ # Calculate variance using mean of squares
149
+ variance = x.pow(2).mean(dim=-1, keepdim=True)
150
+
151
+ # Normalize
152
+ norm_x = x * torch.rsqrt(variance + self.eps)
153
+ norm_x = norm_x * self.scale
154
+
155
+ if self.shift is not None:
156
+ norm_x = norm_x + self.shift
157
+
158
+ return norm_x.to(input_dtype)
159
+
160
+
161
+ def compute_rope_params(head_dim, theta_base=10_000, context_length=4096, dtype=torch.float32):
162
+ """
163
+ Compute Rotary Position Embedding (RoPE) parameters
164
+
165
+ RoPE encodes position by rotating token embeddings.
166
+ This allows the model to understand relative positions between tokens.
167
+ """
168
+ assert head_dim % 2 == 0, "Embedding dimension must be even"
169
+
170
+ # Compute the inverse frequencies
171
+ inv_freq = 1.0 / (theta_base ** (torch.arange(0, head_dim, 2, dtype=dtype)[: (head_dim // 2)].float() / head_dim))
172
+
173
+ # Generate position indices
174
+ positions = torch.arange(context_length, dtype=dtype)
175
+
176
+ # Compute the angles
177
+ angles = positions[:, None] * inv_freq[None, :]
178
+
179
+ # Expand angles to match the head_dim
180
+ angles = torch.cat([angles, angles], dim=1)
181
+
182
+ # Precompute sine and cosine
183
+ cos = torch.cos(angles)
184
+ sin = torch.sin(angles)
185
+
186
+ return cos, sin
187
+
188
+
189
+ def apply_rope(x, cos, sin):
190
+ """
191
+ Apply Rotary Position Embedding to input tensor
192
+
193
+ This rotates the embeddings based on their position in the sequence.
194
+ """
195
+ batch_size, num_heads, seq_len, head_dim = x.shape
196
+ assert head_dim % 2 == 0, "Head dimension must be even"
197
+
198
+ # Split x into first half and second half
199
+ x1 = x[..., : head_dim // 2]
200
+ x2 = x[..., head_dim // 2 :]
201
+
202
+ # Adjust sin and cos shapes
203
+ cos = cos[:seq_len, :].unsqueeze(0).unsqueeze(0)
204
+ sin = sin[:seq_len, :].unsqueeze(0).unsqueeze(0)
205
+
206
+ # Apply the rotary transformation
207
+ rotated = torch.cat((-x2, x1), dim=-1)
208
+ x_rotated = (x * cos) + (rotated * sin)
209
+
210
+ return x_rotated.to(dtype=x.dtype)
211
+
212
+
213
+ class GroupedQueryAttention(nn.Module):
214
+ """
215
+ Grouped Query Attention (GQA)
216
+
217
+ GQA is more efficient than standard multi-head attention.
218
+ It shares Key and Value projections across multiple Query heads,
219
+ reducing the number of parameters while maintaining performance.
220
+ """
221
+ def __init__(self, d_in, num_heads, num_kv_groups, head_dim=None, qk_norm=False, dtype=None):
222
+ super().__init__()
223
+ assert num_heads % num_kv_groups == 0, "num_heads must be divisible by num_kv_groups"
224
+
225
+ self.num_heads = num_heads
226
+ self.num_kv_groups = num_kv_groups
227
+ self.group_size = num_heads // num_kv_groups
228
+
229
+ if head_dim is None:
230
+ assert d_in % num_heads == 0, "`d_in` must be divisible by `num_heads` if `head_dim` is not set"
231
+ head_dim = d_in // num_heads
232
+
233
+ self.head_dim = head_dim
234
+ self.d_out = num_heads * head_dim
235
+
236
+ self.W_query = nn.Linear(d_in, self.d_out, bias=False, dtype=dtype)
237
+ self.W_key = nn.Linear(d_in, num_kv_groups * head_dim, bias=False, dtype=dtype)
238
+ self.W_value = nn.Linear(d_in, num_kv_groups * head_dim, bias=False, dtype=dtype)
239
+
240
+ self.out_proj = nn.Linear(self.d_out, d_in, bias=False, dtype=dtype)
241
+
242
+ if qk_norm:
243
+ self.q_norm = RMSNorm(head_dim, eps=1e-6)
244
+ self.k_norm = RMSNorm(head_dim, eps=1e-6)
245
+ else:
246
+ self.q_norm = self.k_norm = None
247
+
248
+ def forward(self, x, mask, cos, sin):
249
+ b, num_tokens, _ = x.shape
250
+
251
+ # Apply projections
252
+ queries = self.W_query(x)
253
+ keys = self.W_key(x)
254
+ values = self.W_value(x)
255
+
256
+ # Reshape
257
+ queries = queries.view(b, num_tokens, self.num_heads, self.head_dim).transpose(1, 2)
258
+ keys = keys.view(b, num_tokens, self.num_kv_groups, self.head_dim).transpose(1, 2)
259
+ values = values.view(b, num_tokens, self.num_kv_groups, self.head_dim).transpose(1, 2)
260
+
261
+ # Optional normalization
262
+ if self.q_norm:
263
+ queries = self.q_norm(queries)
264
+ if self.k_norm:
265
+ keys = self.k_norm(keys)
266
+
267
+ # Apply RoPE
268
+ queries = apply_rope(queries, cos, sin)
269
+ keys = apply_rope(keys, cos, sin)
270
+
271
+ # Expand K and V to match number of heads
272
+ keys = keys.repeat_interleave(self.group_size, dim=1)
273
+ values = values.repeat_interleave(self.group_size, dim=1)
274
+
275
+ # Attention
276
+ attn_scores = queries @ keys.transpose(2, 3)
277
+ attn_scores = attn_scores.masked_fill(mask, -torch.inf)
278
+ attn_weights = torch.softmax(attn_scores / self.head_dim**0.5, dim=-1)
279
+
280
+ context = (attn_weights @ values).transpose(1, 2).reshape(b, num_tokens, self.d_out)
281
+ return self.out_proj(context)
282
+
283
+
284
+ class FeedForward(nn.Module):
285
+ """
286
+ Feed-Forward Network used in transformer blocks
287
+
288
+ This applies two linear transformations with a SiLU activation in between.
289
+ The hidden dimension is typically larger than the embedding dimension,
290
+ allowing the model to learn complex patterns.
291
+ """
292
+ def __init__(self, cfg):
293
+ super().__init__()
294
+ self.fc1 = nn.Linear(cfg["emb_dim"], cfg["hidden_dim"], dtype=cfg["dtype"], bias=False)
295
+ self.fc2 = nn.Linear(cfg["emb_dim"], cfg["hidden_dim"], dtype=cfg["dtype"], bias=False)
296
+ self.fc3 = nn.Linear(cfg["hidden_dim"], cfg["emb_dim"], dtype=cfg["dtype"], bias=False)
297
+
298
+ def forward(self, x):
299
+ x_fc1 = self.fc1(x)
300
+ x_fc2 = self.fc2(x)
301
+ x = nn.functional.silu(x_fc1) * x_fc2
302
+ return self.fc3(x)
303
+
304
+
305
+ class TransformerBlock(nn.Module):
306
+ """
307
+ A single Transformer Block
308
+
309
+ Each block consists of:
310
+ 1. Grouped Query Attention for processing relationships between tokens
311
+ 2. Feed-Forward Network for processing each token independently
312
+ 3. Residual connections and normalization for stable training
313
+ """
314
+ def __init__(self, cfg):
315
+ super().__init__()
316
+ self.att = GroupedQueryAttention(
317
+ d_in=cfg["emb_dim"],
318
+ num_heads=cfg["n_heads"],
319
+ head_dim=cfg["head_dim"],
320
+ num_kv_groups=cfg["n_kv_groups"],
321
+ qk_norm=cfg["qk_norm"],
322
+ dtype=cfg["dtype"]
323
+ )
324
+ self.ff = FeedForward(cfg)
325
+ self.norm1 = RMSNorm(cfg["emb_dim"], eps=1e-6)
326
+ self.norm2 = RMSNorm(cfg["emb_dim"], eps=1e-6)
327
+
328
+ def forward(self, x, mask, cos, sin):
329
+ # Attention block with residual connection
330
+ shortcut = x
331
+ x = self.norm1(x)
332
+ x = self.att(x, mask, cos, sin)
333
+ x = x + shortcut
334
+
335
+ # Feed-forward block with residual connection
336
+ shortcut = x
337
+ x = self.norm2(x)
338
+ x = self.ff(x)
339
+ x = x + shortcut
340
+
341
+ return x
342
+
343
+
344
+ class Qwen3Model(nn.Module):
345
+ """
346
+ Complete Qwen3 Language Model
347
+
348
+ This model can:
349
+ 1. Take token IDs as input
350
+ 2. Process them through multiple transformer layers
351
+ 3. Output predictions for the next token
352
+ 4. Generate new text autoregressively
353
+ """
354
+ def __init__(self, cfg):
355
+ super().__init__()
356
+
357
+ # Token embedding layer
358
+ self.tok_emb = nn.Embedding(cfg["vocab_size"], cfg["emb_dim"], dtype=cfg["dtype"])
359
+
360
+ # Stack of transformer blocks
361
+ self.trf_blocks = nn.ModuleList(
362
+ [TransformerBlock(cfg) for _ in range(cfg["n_layers"])]
363
+ )
364
+
365
+ # Final normalization and output projection
366
+ self.final_norm = RMSNorm(cfg["emb_dim"])
367
+ self.out_head = nn.Linear(cfg["emb_dim"], cfg["vocab_size"], bias=False, dtype=cfg["dtype"])
368
+
369
+ # Precompute RoPE parameters
370
+ if cfg["head_dim"] is None:
371
+ head_dim = cfg["emb_dim"] // cfg["n_heads"]
372
+ else:
373
+ head_dim = cfg["head_dim"]
374
+
375
+ cos, sin = compute_rope_params(
376
+ head_dim=head_dim,
377
+ theta_base=cfg["rope_base"],
378
+ context_length=cfg["context_length"]
379
+ )
380
+ self.register_buffer("cos", cos, persistent=False)
381
+ self.register_buffer("sin", sin, persistent=False)
382
+ self.cfg = cfg
383
+
384
+ def forward(self, in_idx, targets=None):
385
+ """
386
+ Forward pass through the model
387
+
388
+ Parameters:
389
+ -----------
390
+ in_idx : torch.Tensor
391
+ Input token IDs with shape [batch_size, sequence_length]
392
+ targets : torch.Tensor or None
393
+ Target token IDs for computing loss (used during training)
394
+
395
+ Returns:
396
+ --------
397
+ logits : torch.Tensor
398
+ Predictions for next tokens with shape [batch_size, sequence_length, vocab_size]
399
+ loss : torch.Tensor or None
400
+ Cross-entropy loss if targets are provided, otherwise None
401
+ """
402
+ # Get token embeddings
403
+ tok_embeds = self.tok_emb(in_idx)
404
+ x = tok_embeds
405
+
406
+ # Create causal mask (prevents looking at future tokens)
407
+ num_tokens = x.shape[1]
408
+ mask = torch.triu(torch.ones(num_tokens, num_tokens, device=x.device, dtype=torch.bool), diagonal=1)
409
+
410
+ # Pass through all transformer blocks
411
+ for block in self.trf_blocks:
412
+ x = block(x, mask, self.cos, self.sin)
413
+
414
+ # Final normalization and projection to vocabulary
415
+ x = self.final_norm(x)
416
+ logits = self.out_head(x.to(self.cfg["dtype"]))
417
+
418
+ # Compute loss if targets are provided
419
+ loss = None
420
+ if targets is not None:
421
+ loss = F.cross_entropy(logits.reshape(-1, logits.size(-1)), targets.reshape(-1))
422
+
423
+ return logits, loss
424
+
425
+ @torch.no_grad()
426
+ def generate(self, idx, max_new_tokens, temperature=1.0, top_k=None):
427
+ """
428
+ Generate new tokens autoregressively
429
+
430
+ This is a convenience method that wraps the generation logic.
431
+ For more details, see the generate_text_simple function.
432
+ """
433
+ for _ in range(max_new_tokens):
434
+ ctx_len = self.cfg["context_length"]
435
+ idx_cond = idx if idx.size(1) <= ctx_len else idx[:, -ctx_len:]
436
+ logits, _ = self(idx_cond)
437
+ logits = logits[:, -1, :] / temperature
438
+ if top_k is not None:
439
+ v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
440
+ logits[logits < v[:, [-1]]] = float("-inf")
441
+ probs = F.softmax(logits, dim=-1)
442
+ idx_next = torch.multinomial(probs, num_samples=1)
443
+ idx = torch.cat((idx, idx_next), dim=1)
444
+ return idx
445
+
README.md CHANGED
@@ -1,14 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
- title: Qwen3 Sentence Completion
3
- emoji: 👁
4
- colorFrom: yellow
5
- colorTo: green
6
- sdk: gradio
7
- sdk_version: 5.49.1
8
- app_file: app.py
9
- pinned: false
10
- license: apache-2.0
11
- short_description: Sentence completion task, trained from Qwen3 0.6B model
12
- ---
13
 
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
1
+ # Qwen3 Text Generator
2
+
3
+ A text generation application using the Qwen3 0.6B model trained on TinyStories dataset.
4
+
5
+ ## 🚀 Quick Start
6
+
7
+ ### Running Locally
8
+
9
+ 1. Make sure you have the required files:
10
+ - `app.py` - The Gradio interface
11
+ - `Qwen3_model.py` - The model architecture
12
+ - `Qwen3_200k_model_params.pt` - Your trained model weights
13
+ - `requirements.txt` - Python dependencies
14
+
15
+ 2. Install dependencies:
16
+ ```bash
17
+ pip install -r requirements.txt
18
+ ```
19
+
20
+ 3. Run the app:
21
+ ```bash
22
+ python app.py
23
+ ```
24
+
25
+ 4. Open your browser to the URL shown (usually http://127.0.0.1:7860)
26
+
27
+ ## 📤 Deploying to HuggingFace Spaces
28
+
29
+ ### Step 1: Prepare Your Files
30
+
31
+ You need these files in your repository:
32
+ - `app.py` - Main application
33
+ - `Qwen3_model.py` - Model architecture
34
+ - `Qwen3_200k_model_params.pt` - Your trained model weights
35
+ - `requirements.txt` - Dependencies
36
+ - `README.md` - This file
37
+
38
+ ### Step 2: Create a HuggingFace Space
39
+
40
+ 1. Go to https://huggingface.co/new-space
41
+ 2. Fill in the details:
42
+ - **Space name**: Choose a name (e.g., "qwen3-text-generator")
43
+ - **License**: Select your preferred license
44
+ - **Select the SDK**: Choose **Gradio**
45
+ - **Space hardware**: Start with "CPU basic" (free)
46
+
47
+ 3. Click "Create Space"
48
+
49
+ ### Step 3: Upload Your Files
50
+
51
+ You have two options:
52
+
53
+ #### Option A: Using Git (Recommended)
54
+
55
+ ```bash
56
+ # Clone your new space
57
+ git clone https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE_NAME
58
+ cd YOUR_SPACE_NAME
59
+
60
+ # Copy your files
61
+ cp /path/to/app.py .
62
+ cp /path/to/Qwen3_model.py .
63
+ cp /path/to/Qwen3_200k_model_params.pt .
64
+ cp /path/to/requirements.txt .
65
+ cp /path/to/README.md .
66
+
67
+ # Commit and push
68
+ git add .
69
+ git commit -m "Initial commit: Add Qwen3 text generator"
70
+ git push
71
+ ```
72
+
73
+ #### Option B: Using the Web Interface
74
+
75
+ 1. On your Space page, click "Files" → "Add file" → "Upload files"
76
+ 2. Drag and drop or select all your files
77
+ 3. Click "Commit to main"
78
+
79
+ ### Step 4: Wait for Build
80
+
81
+ - HuggingFace will automatically build and deploy your app
82
+ - This may take 5-10 minutes
83
+ - You'll see build logs in the "App" tab
84
+
85
+ ### Step 5: Test Your App
86
+
87
+ Once the build is complete, your app will be live at:
88
+ `https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE_NAME`
89
+
90
+ ## 🎮 How to Use the App
91
+
92
+ 1. **Enter Starting Text**: Type the beginning of your story (e.g., "Once upon a time")
93
+
94
+ 2. **Adjust Max New Tokens**:
95
+ - Controls how much text to generate
96
+ - 10-50: Short continuation
97
+ - 50-100: Medium paragraph
98
+ - 100-200: Long passage
99
+
100
+ 3. **Adjust Temperature**:
101
+ - 0.1-0.7: More predictable, focused text
102
+ - 0.8-1.0: Balanced creativity
103
+ - 1.1-2.0: Very creative, more random
104
+
105
+ 4. **Click Generate**: Watch as the model continues your story!
106
+
107
+ ## 📊 Model Information
108
+
109
+ - **Architecture**: Qwen3 0.6B
110
+ - **Parameters**: 596 million (unique parameters)
111
+ - **Training Data**: TinyStories dataset
112
+ - **Best For**: Simple narratives, children's stories, everyday situations
113
+
114
+ ## 🔧 Troubleshooting
115
+
116
+ ### Model File Too Large
117
+
118
+ If your model file (`Qwen3_200k_model_params.pt`) is larger than 100MB, you'll need to use Git LFS:
119
+
120
+ ```bash
121
+ # Install Git LFS
122
+ git lfs install
123
+
124
+ # Track large files
125
+ git lfs track "*.pt"
126
+
127
+ # Add and commit
128
+ git add .gitattributes
129
+ git add Qwen3_200k_model_params.pt
130
+ git commit -m "Add model with LFS"
131
+ git push
132
+ ```
133
+
134
+ ### Out of Memory Error
135
+
136
+ If you get memory errors:
137
+ 1. Go to your Space settings
138
+ 2. Upgrade to a better hardware tier (may require payment)
139
+ 3. Or optimize your model file size
140
+
141
+ ### App Not Loading
142
+
143
+ 1. Check the build logs in the "App" tab
144
+ 2. Make sure all files are uploaded correctly
145
+ 3. Verify `requirements.txt` has all necessary packages
146
+ 4. Check that file names match exactly (case-sensitive)
147
+
148
+ ## 💡 Tips for Better Results
149
+
150
+ 1. **Good Prompts**: Start with clear, simple sentences
151
+ - ✅ "Once upon a time, there was a little girl"
152
+ - ❌ "Explain quantum physics"
153
+
154
+ 2. **Temperature Selection**:
155
+ - Use lower temperature (0.5-0.7) for coherent stories
156
+ - Use higher temperature (1.0-1.5) for creative variety
157
+
158
+ 3. **Token Length**:
159
+ - Start with 30-50 tokens to see the style
160
+ - Increase if you want longer passages
161
+
162
+ ## 📝 File Structure
163
+
164
+ ```
165
+ .
166
+ ├── app.py # Main Gradio application
167
+ ├── Qwen3_model.py # Model architecture and helpers
168
+ ├── Qwen3_200k_model_params.pt # Trained model weights
169
+ ├── requirements.txt # Python dependencies
170
+ └── README.md # This file
171
+ ```
172
+
173
+ ## 🤝 Contributing
174
+
175
+ Feel free to:
176
+ - Report issues
177
+ - Suggest improvements
178
+ - Share your generated stories!
179
+
180
+ ## 📜 License
181
+
182
+ This project uses the Qwen3 architecture. Please check the license for your specific use case.
183
+
184
+ ## 🙏 Acknowledgments
185
+
186
+ - Qwen3 architecture from Alibaba Cloud
187
+ - Training approach inspired by "LLMs from Scratch"
188
+ - TinyStories dataset for training data
189
+
190
  ---
 
 
 
 
 
 
 
 
 
 
 
191
 
192
+ **Enjoy generating creative stories! 📚✨**
193
+
app.py ADDED
@@ -0,0 +1,316 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Qwen3 Text Generation App for Hugging Face Spaces
3
+
4
+ This app allows you to generate text using a trained Qwen3 model.
5
+ You can control:
6
+ - The starting text (prompt)
7
+ - How many new words to generate (max_new_tokens)
8
+ - How creative the output should be (temperature)
9
+ """
10
+
11
+ import gradio as gr
12
+ import torch
13
+ import tiktoken
14
+ from pathlib import Path
15
+ from huggingface_hub import hf_hub_download
16
+
17
+ # Import our Qwen3 model
18
+ from Qwen3_model import Qwen3Model, generate_text_simple, text_to_token_ids, token_ids_to_text
19
+
20
+
21
+ class TextGenerator:
22
+ """
23
+ A simple class to load the model and generate text
24
+
25
+ This makes it easy to:
26
+ 1. Load the trained model once at startup
27
+ 2. Generate text multiple times without reloading
28
+ """
29
+
30
+ def __init__(self, repo_id="vuminhtue/qwen3_sentiment_tinystories"):
31
+ """
32
+ Initialize the text generator
33
+
34
+ Parameters:
35
+ -----------
36
+ repo_id : str
37
+ HuggingFace repository ID to download the model from
38
+ Default: "vuminhtue/qwen3_sentiment_tinystories"
39
+ """
40
+ print("🚀 Loading Qwen3 model from HuggingFace...")
41
+ print(f" Repository: {repo_id}")
42
+
43
+ # Configuration for Qwen3 0.6B model
44
+ # These settings define the architecture of the model
45
+ self.config = {
46
+ "vocab_size": 151_936, # Number of different tokens the model knows
47
+ "context_length": 40_960, # Maximum length of text it can process
48
+ "emb_dim": 1024, # Size of the embedding vectors
49
+ "n_heads": 16, # Number of attention heads
50
+ "n_layers": 28, # Number of transformer layers
51
+ "hidden_dim": 3072, # Size of the feed-forward network
52
+ "head_dim": 128, # Size of each attention head
53
+ "qk_norm": True, # Whether to normalize queries and keys
54
+ "n_kv_groups": 8, # Number of key-value groups
55
+ "rope_base": 1_000_000.0, # Base for rotary position encoding
56
+ "dtype": torch.bfloat16, # Data type for model weights
57
+ }
58
+
59
+ # Detect if we have a GPU available
60
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
61
+ print(f" Using device: {self.device}")
62
+
63
+ # Load the tokenizer (converts text to numbers and back)
64
+ # We use GPT-2's tokenizer which works well for English text
65
+ self.tokenizer = tiktoken.get_encoding("gpt2")
66
+ print(" ✓ Tokenizer loaded")
67
+
68
+ # Download the model file from HuggingFace
69
+ # This will cache the file locally, so it only downloads once
70
+ print(" 📥 Downloading model from HuggingFace (this may take a moment)...")
71
+ try:
72
+ model_path = hf_hub_download(
73
+ repo_id=repo_id,
74
+ filename="Qwen3_200k_model_params.pt",
75
+ repo_type="model"
76
+ )
77
+ print(f" ✓ Model downloaded to: {model_path}")
78
+ except Exception as e:
79
+ print(f" ❌ Error downloading model: {e}")
80
+ raise
81
+
82
+ # Create the model with our configuration
83
+ self.model = Qwen3Model(self.config)
84
+
85
+ # Load the trained weights from the downloaded file
86
+ print(" ⚙️ Loading model weights...")
87
+ self.model.load_state_dict(
88
+ torch.load(
89
+ model_path,
90
+ map_location=torch.device(self.device),
91
+ weights_only=True
92
+ )
93
+ )
94
+
95
+ # Move model to the appropriate device (CPU or GPU)
96
+ self.model = self.model.to(self.device)
97
+
98
+ # Set to evaluation mode (disables training-specific features)
99
+ self.model.eval()
100
+
101
+ print(" ✓ Model loaded successfully!")
102
+ print("✅ Ready to generate text!\n")
103
+
104
+ def generate(self, prompt, max_new_tokens=50, temperature=1.0):
105
+ """
106
+ Generate text based on a prompt
107
+
108
+ Parameters:
109
+ -----------
110
+ prompt : str
111
+ The starting text (what you want the model to continue)
112
+ max_new_tokens : int
113
+ How many new tokens (roughly words) to generate
114
+ temperature : float
115
+ Controls creativity:
116
+ - Lower (0.1-0.7): More predictable, focused
117
+ - Medium (0.8-1.0): Balanced
118
+ - Higher (1.1-2.0): More creative, random
119
+
120
+ Returns:
121
+ --------
122
+ str : The generated text (including the original prompt)
123
+ """
124
+ try:
125
+ # Convert the text prompt to token IDs (numbers)
126
+ input_ids = text_to_token_ids(prompt, self.tokenizer)
127
+ input_ids = input_ids.to(self.device)
128
+
129
+ # Generate new tokens
130
+ output_ids = generate_text_simple(
131
+ model=self.model,
132
+ idx=input_ids,
133
+ max_new_tokens=max_new_tokens,
134
+ context_size=self.config["context_length"],
135
+ temperature=temperature
136
+ )
137
+
138
+ # Convert the token IDs back to text
139
+ generated_text = token_ids_to_text(output_ids, self.tokenizer)
140
+
141
+ return generated_text
142
+
143
+ except Exception as e:
144
+ return f"❌ Error generating text: {str(e)}"
145
+
146
+
147
+ # Initialize the generator once when the app starts
148
+ print("="*70)
149
+ print("INITIALIZING TEXT GENERATION APP")
150
+ print("="*70)
151
+ generator = TextGenerator()
152
+
153
+
154
+ def generate_text_interface(prompt, max_new_tokens, temperature):
155
+ """
156
+ Interface function for Gradio
157
+
158
+ This function:
159
+ 1. Takes inputs from the user interface
160
+ 2. Calls our generator
161
+ 3. Returns the result to display
162
+ """
163
+ # Check if prompt is empty
164
+ if not prompt or len(prompt.strip()) == 0:
165
+ return "⚠️ Please enter some text to start with!"
166
+
167
+ # Limit max tokens to prevent very long generation times
168
+ max_new_tokens = min(max_new_tokens, 200)
169
+
170
+ # Generate text
171
+ result = generator.generate(prompt, max_new_tokens, temperature)
172
+
173
+ return result
174
+
175
+
176
+ # Create the Gradio interface
177
+ # This defines what the web app looks like and how it behaves
178
+ with gr.Blocks(title="Qwen3 Text Generator", theme=gr.themes.Soft()) as demo:
179
+
180
+ # Header
181
+ gr.Markdown(
182
+ """
183
+ # 🤖 Qwen3 Text Generator
184
+
185
+ Generate creative stories and text using a Qwen3 model trained on TinyStories!
186
+
187
+ ### How to use:
188
+ 1. **Enter your starting text** (e.g., "Once upon a time")
189
+ 2. **Adjust the sliders** to control the output
190
+ 3. **Click Generate** to create text
191
+ """
192
+ )
193
+
194
+ # Main content area
195
+ with gr.Row():
196
+ with gr.Column(scale=1):
197
+ # Input section
198
+ gr.Markdown("### 📝 Input")
199
+
200
+ prompt_input = gr.Textbox(
201
+ label="Starting Text (Prompt)",
202
+ placeholder="Once upon a time...",
203
+ lines=3,
204
+ info="Enter the text you want the model to continue"
205
+ )
206
+
207
+ # Control sliders
208
+ gr.Markdown("### ⚙️ Generation Settings")
209
+
210
+ max_tokens_slider = gr.Slider(
211
+ minimum=10,
212
+ maximum=200,
213
+ value=50,
214
+ step=10,
215
+ label="Max New Tokens",
216
+ info="How many new tokens to generate (roughly = number of words)"
217
+ )
218
+
219
+ temperature_slider = gr.Slider(
220
+ minimum=0.1,
221
+ maximum=2.0,
222
+ value=1.0,
223
+ step=0.1,
224
+ label="Temperature",
225
+ info="Lower = more predictable, Higher = more creative"
226
+ )
227
+
228
+ # Generate button
229
+ generate_btn = gr.Button(
230
+ "✨ Generate Text",
231
+ variant="primary",
232
+ size="lg"
233
+ )
234
+
235
+ with gr.Column(scale=1):
236
+ # Output section
237
+ gr.Markdown("### 📖 Generated Text")
238
+
239
+ output_text = gr.Textbox(
240
+ label="Result",
241
+ lines=15,
242
+ interactive=False,
243
+ show_copy_button=True
244
+ )
245
+
246
+ # Example prompts to try
247
+ gr.Markdown("### 💡 Try these examples:")
248
+ gr.Examples(
249
+ examples=[
250
+ ["Once upon a time", 50, 0.8],
251
+ ["There was a little girl named", 60, 1.0],
252
+ ["In a magical forest", 70, 1.2],
253
+ ["A brave knight", 50, 0.7],
254
+ ["The sun was shining and", 60, 0.9],
255
+ ],
256
+ inputs=[prompt_input, max_tokens_slider, temperature_slider],
257
+ label="Click any example to try it"
258
+ )
259
+
260
+ # Information section
261
+ gr.Markdown(
262
+ """
263
+ ---
264
+ ### 📊 About This Model
265
+
266
+ - **Model**: Qwen3 0.6B (596M parameters)
267
+ - **Training Data**: TinyStories dataset (children's stories)
268
+ - **Architecture**: 28 transformer layers with Grouped Query Attention
269
+ - **Model Source**: [vuminhtue/qwen3_sentiment_tinystories](https://huggingface.co/vuminhtue/qwen3_sentiment_tinystories)
270
+
271
+ ### 🎯 Understanding the Parameters
272
+
273
+ **Max New Tokens:**
274
+ - Controls the length of generated text
275
+ - One token ≈ one word (roughly)
276
+ - More tokens = longer output = slower generation
277
+
278
+ **Temperature:**
279
+ - `0.1 - 0.7`: Safe, predictable, focused responses
280
+ - `0.8 - 1.0`: Balanced creativity and coherence
281
+ - `1.1 - 2.0`: Very creative but may be less coherent
282
+
283
+ ### ⚠️ Note
284
+
285
+ This model was trained on children's stories, so it works best for:
286
+ - Simple, clear narratives
287
+ - Stories about everyday situations
288
+ - Children's vocabulary and themes
289
+
290
+ ---
291
+ *Built with Qwen3 architecture • Trained on TinyStories • Powered by PyTorch • Model hosted on 🤗 HuggingFace*
292
+ """
293
+ )
294
+
295
+ # Connect the button to the generation function
296
+ generate_btn.click(
297
+ fn=generate_text_interface,
298
+ inputs=[prompt_input, max_tokens_slider, temperature_slider],
299
+ outputs=output_text
300
+ )
301
+
302
+ # Also allow pressing Enter in the text box to generate
303
+ prompt_input.submit(
304
+ fn=generate_text_interface,
305
+ inputs=[prompt_input, max_tokens_slider, temperature_slider],
306
+ outputs=output_text
307
+ )
308
+
309
+
310
+ # Launch the app
311
+ if __name__ == "__main__":
312
+ print("\n" + "="*70)
313
+ print("LAUNCHING GRADIO APP")
314
+ print("="*70)
315
+ demo.launch()
316
+
requirements.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Requirements for Hugging Face Spaces
2
+ # This file lists all dependencies needed to run your Qwen3 text generator
3
+
4
+ # Core ML libraries
5
+ torch>=2.0.0
6
+ tiktoken>=0.5.0
7
+ numpy>=1.24.0
8
+
9
+ # HuggingFace Hub for downloading models
10
+ huggingface_hub>=0.16.0
11
+
12
+ # Gradio for web interface
13
+ gradio>=4.0.0
14
+
15
+ # Optional but recommended
16
+ scikit-learn>=1.3.0
17
+ joblib>=1.3.0
18
+ pandas>=2.0.0