Gogs commited on
Commit
d702978
·
1 Parent(s): b27edcc

✨ Professional Gradio UI with comparison table and clean design

Browse files
Files changed (1) hide show
  1. app.py +431 -184
app.py CHANGED
@@ -3,24 +3,46 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
 
5
  # ============================================================================
6
- # 🌸 YUUKI - Mobile-Trained Code Generator
 
7
  # ============================================================================
8
 
9
- print("🌸 Loading Yuuki model...")
10
- print("This may take a minute on first load...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
- try:
13
- model = AutoModelForCausalLM.from_pretrained(
14
- "OpceanAI/Yuuki-best",
15
- torch_dtype=torch.float32,
16
- low_cpu_mem_usage=True
17
- )
18
- tokenizer = AutoTokenizer.from_pretrained("OpceanAI/Yuuki-best")
19
- print("✅ Model loaded successfully!")
20
- except Exception as e:
21
- print(f"❌ Error loading model: {e}")
22
- model = None
23
- tokenizer = None
24
 
25
  # ============================================================================
26
  # Generation Function
@@ -28,186 +50,331 @@ except Exception as e:
28
 
29
  def generate_code(
30
  prompt: str,
31
- max_length: int = 100,
32
  temperature: float = 0.7,
33
- top_p: float = 0.9
 
 
34
  ) -> str:
35
  """Generate code completion using Yuuki."""
36
 
37
- if model is None or tokenizer is None:
38
- return "❌ Model failed to load. Please refresh the page."
 
39
 
40
- if not prompt.strip():
41
- return "⚠️ Please enter a code prompt."
42
 
43
  try:
44
- inputs = tokenizer(prompt, return_tensors="pt")
 
 
 
 
 
45
 
46
  with torch.no_grad():
47
  outputs = model.generate(
48
  **inputs,
49
- max_length=max_length,
50
  temperature=temperature,
51
  top_p=top_p,
 
 
52
  do_sample=True,
53
- pad_token_id=tokenizer.eos_token_id,
54
- eos_token_id=tokenizer.eos_token_id
 
55
  )
56
 
57
- generated = tokenizer.decode(outputs[0], skip_special_tokens=True)
58
- return generated
59
 
60
  except Exception as e:
61
- return f"Generation error: {str(e)}"
 
62
 
63
  # ============================================================================
64
- # Examples
65
  # ============================================================================
66
 
67
- examples = [
68
- # Agda (best language)
69
- ["module Main where", 100, 0.7, 0.9],
70
- ["open import Data.Nat", 80, 0.7, 0.9],
71
- ["data Bool : Set where", 80, 0.7, 0.9],
72
 
73
- # C (limited but improving)
74
- ["int main() {", 80, 0.7, 0.9],
75
- ["#include <stdio.h>", 60, 0.7, 0.9],
76
 
77
- # Python (weak due to dataset ordering)
78
- ["def hello():", 60, 0.8, 0.9],
79
- ["import numpy as np", 60, 0.7, 0.9],
 
 
 
80
  ]
81
 
 
82
  # ============================================================================
83
- # Custom CSS
84
  # ============================================================================
85
 
86
- custom_css = """
87
- #title {
 
 
 
 
 
 
 
88
  text-align: center;
89
- background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
90
- -webkit-background-clip: text;
91
- -webkit-text-fill-color: transparent;
92
- font-size: 3em;
93
- font-weight: bold;
94
- margin-bottom: 0.5em;
95
  }
96
 
97
- #subtitle {
98
  text-align: center;
99
- font-size: 1.3em;
100
- color: #666;
101
- margin-bottom: 1em;
 
 
 
 
 
 
 
 
 
102
  }
103
 
104
- #warning-box {
105
- background: linear-gradient(135deg, #fff3cd 0%, #ffe8a1 100%);
106
- border-left: 4px solid #ffc107;
107
- border-radius: 8px;
108
- padding: 20px;
109
- margin: 20px 0;
110
- box-shadow: 0 2px 4px rgba(0,0,0,0.1);
111
  }
112
 
113
- #stats-box {
114
- background: linear-gradient(135deg, #e7f3ff 0%, #cfe7ff 100%);
115
- border-left: 4px solid #2196F3;
116
- border-radius: 8px;
117
- padding: 20px;
118
- margin: 20px 0;
119
- box-shadow: 0 2px 4px rgba(0,0,0,0.1);
120
  }
121
 
122
- #achievement-box {
123
- background: linear-gradient(135deg, #f0e8ff 0%, #e1d4ff 100%);
124
- border-left: 4px solid #9c27b0;
125
- border-radius: 8px;
126
- padding: 20px;
127
- margin: 20px 0;
128
- box-shadow: 0 2px 4px rgba(0,0,0,0.1);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  }
130
 
131
- .gr-button-primary {
132
- background: linear-gradient(90deg, #667eea 0%, #764ba2 100%) !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  border: none !important;
134
- font-weight: bold !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  }
136
 
137
- footer {
138
- margin-top: 40px;
139
- padding-top: 20px;
140
- border-top: 1px solid #ddd;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  }
142
  """
143
 
 
144
  # ============================================================================
145
  # Gradio Interface
146
  # ============================================================================
147
 
148
- with gr.Blocks(css=custom_css, title="🌸 Yuuki - Mobile-Trained Code Generator", theme=gr.themes.Soft()) as demo:
 
 
 
 
149
 
150
  # Header
151
- gr.Markdown("<h1 id='title'>🌸 Yuuki</h1>")
152
- gr.Markdown("<p id='subtitle'>First LLM Trained Entirely on Mobile CPU | Zero-Budget ML Research</p>")
153
-
154
- # Warning Box
155
- gr.Markdown("""
156
- <div id='warning-box'>
157
- <h3 style='margin-top:0; color:#856404;'>⚠️ Experimental Research Model</h3>
158
- <p style='margin-bottom:0;'>
159
- Yuuki was trained on a <strong>smartphone CPU</strong> with <strong>$0 budget</strong>.
160
- This is a <strong>proof-of-concept</strong> demonstrating mobile training feasibility,
161
- not a production-ready code generator.
162
- </p>
163
- <br>
164
- <p style='margin-bottom:0;'>
165
- <strong>Best at:</strong> Agda (55/100) •
166
- <strong>Limited:</strong> C (20/100), Assembly (15/100) •
167
- <strong>Weak:</strong> Python (8/100)
168
- </p>
169
- </div>
170
  """)
171
 
172
- # Stats Box
173
- gr.Markdown("""
174
- <div id='stats-box'>
175
- <h3 style='margin-top:0; color:#0d47a1;'>📊 Training Statistics</h3>
176
- <p style='margin-bottom:5px;'><strong>Hardware:</strong> Snapdragon 685 (CPU only) | <strong>Steps:</strong> 2,000 / 37,500 (5.3%)</p>
177
- <p style='margin-bottom:5px;'><strong>Training Time:</strong> ~50 hours continuous | <strong>Speed:</strong> ~86 sec/step</p>
178
- <p style='margin-bottom:5px;'><strong>Loss:</strong> 1.94 | <strong>Cost:</strong> $0.00 | <strong>Quality:</strong> 24.6/100 average</p>
179
- <p style='margin-bottom:0;'><strong>Status:</strong> Best checkpoint from early training | <strong>Full v0.1:</strong> Coming March 2026</p>
180
- </div>
 
 
 
 
 
 
 
 
 
 
 
 
181
  """)
182
 
183
- # Achievement Box
184
- gr.Markdown("""
185
- <div id='achievement-box'>
186
- <h3 style='margin-top:0; color:#6a1b9a;'>🏆 Community Validation</h3>
187
- <p style='margin-bottom:5px;'>✅ <strong>Followed by Gradio team member</strong> - recognized for unique approach</p>
188
- <p style='margin-bottom:5px;'>✅ <strong>Liked by mradermacher</strong> - quantization expert validated concept</p>
189
- <p style='margin-bottom:0;'>✅ <strong>5+ downloads</strong> - early adopters supporting mobile ML training</p>
190
- </div>
 
191
  """)
192
 
193
  # Main Interface
194
  with gr.Row():
195
  with gr.Column(scale=1):
196
  prompt_input = gr.Textbox(
197
- label="💻 Code Prompt",
198
  placeholder="module Main where",
199
- lines=3,
200
- info="Try Agda for best results!"
201
  )
202
 
203
- with gr.Accordion("⚙️ Advanced Settings", open=False):
204
- max_length = gr.Slider(
205
  minimum=20,
206
- maximum=200,
207
  value=100,
208
  step=10,
209
- label="Max Length",
210
- info="Maximum tokens to generate"
211
  )
212
  temperature = gr.Slider(
213
  minimum=0.1,
@@ -215,89 +382,169 @@ with gr.Blocks(css=custom_css, title="🌸 Yuuki - Mobile-Trained Code Generator
215
  value=0.7,
216
  step=0.1,
217
  label="Temperature",
218
- info="Higher = more creative, lower = more conservative"
219
  )
220
  top_p = gr.Slider(
221
  minimum=0.1,
222
  maximum=1.0,
223
  value=0.9,
224
  step=0.05,
225
- label="Top P",
226
- info="Nucleus sampling parameter"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  )
228
 
229
- generate_btn = gr.Button("🚀 Generate Code", variant="primary", size="lg")
 
 
 
 
 
230
 
231
  with gr.Column(scale=1):
232
  output = gr.Textbox(
233
- label="📝 Generated Code",
234
- lines=15,
235
- show_copy_button=True
 
236
  )
237
 
238
- # Examples Section
239
- gr.Markdown("### 💡 Try These Examples:")
240
  gr.Examples(
241
- examples=examples,
242
- inputs=[prompt_input, max_length, temperature, top_p],
243
  outputs=output,
244
  fn=generate_code,
245
  cache_examples=False,
246
  label="Click any example to try it"
247
  )
248
 
249
- # Generate button action
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
  generate_btn.click(
251
  fn=generate_code,
252
- inputs=[prompt_input, max_length, temperature, top_p],
253
  outputs=output
254
  )
255
 
256
- # Footer
257
- gr.Markdown("""
258
- <footer>
259
-
260
- ### 🌟 About This Project
261
-
262
- **Yuuki proves that LLM training is accessible** even with zero budget and consumer hardware.
263
-
264
- **Why this matters:**
265
- - 🎓 **Students** without GPU access can experiment with ML training
266
- - 🌍 **Democratizes** ML research globally - barriers are mindset, not money
267
- - 📱 **Explores** edge ML training possibilities on mobile devices
268
- - 🔬 **Documents** complete training journey including failures and recoveries
269
-
270
- **Training Journey Highlights:**
271
- - Step 1,292: Early peak (loss 1.70, quality 31/100)
272
- - Step 1,600: Mode collapse (loss 2.41) 💀
273
- - Step 1,900: Recovery begins (loss 1.76)
274
- - **Step 2,000: Current best** (loss 1.94, quality 24.6/100) ⭐
275
- - Steps 2,100-2,500: Bad data zone (<11/100 quality)
276
-
277
- **Key Finding:** Dataset quality matters more than loss value. Some checkpoints with excellent
278
- loss (1.71) had terrible quality (7/100) due to corrupted training data.
279
-
280
- ---
281
-
282
- ### 🔗 Links
283
-
284
- - 🤗 [Yuuki-best Model](https://huggingface.co/OpceanAI/Yuuki-best) - This checkpoint (recommended)
285
- - 📜 [Original Yuuki](https://huggingface.co/OpceanAI/Yuuki) - First upload (historical)
286
- - ⏳ Yuuki v0.1 Complete - Coming March 2026 (2 full epochs)
287
- - 📄 Research Paper - Coming soon
288
- - 💻 [Training Code](https://github.com/YuuKi-OS/yuuki-training)
289
-
290
- ---
291
-
292
- <p align="center">
293
- <i>Built with patience, a phone, and zero budget</i><br>
294
- <b>🌸 Proving the barrier to AI is mindset, not money</b><br><br>
295
- Made with ❤️ | Powered by <a href="https://gradio.app">Gradio</a> & <a href="https://huggingface.co">HuggingFace</a>
296
- </p>
297
-
298
- </footer>
299
- """)
300
 
 
301
  # Launch
 
 
302
  if __name__ == "__main__":
303
- demo.launch()
 
 
 
 
 
 
 
 
3
  import torch
4
 
5
  # ============================================================================
6
+ # YUUKI - Mobile-Trained Code Generator
7
+ # First LLM Trained Entirely on a Smartphone
8
  # ============================================================================
9
 
10
+ MODEL_ID = "OpceanAI/Yuuki-best"
11
+ MODEL_LOADED = False
12
+ model = None
13
+ tokenizer = None
14
+
15
+
16
+ def load_model():
17
+ """Load the Yuuki model with proper error handling."""
18
+ global model, tokenizer, MODEL_LOADED
19
+
20
+ if MODEL_LOADED:
21
+ return True
22
+
23
+ try:
24
+ print(f"Loading Yuuki model from {MODEL_ID}...")
25
+
26
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
27
+ model = AutoModelForCausalLM.from_pretrained(
28
+ MODEL_ID,
29
+ torch_dtype=torch.float32,
30
+ low_cpu_mem_usage=True,
31
+ trust_remote_code=True
32
+ )
33
+
34
+ # Ensure pad token is set
35
+ if tokenizer.pad_token is None:
36
+ tokenizer.pad_token = tokenizer.eos_token
37
+
38
+ MODEL_LOADED = True
39
+ print("Model loaded successfully!")
40
+ return True
41
+
42
+ except Exception as e:
43
+ print(f"Error loading model: {e}")
44
+ return False
45
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
  # ============================================================================
48
  # Generation Function
 
50
 
51
  def generate_code(
52
  prompt: str,
53
+ max_new_tokens: int = 100,
54
  temperature: float = 0.7,
55
+ top_p: float = 0.9,
56
+ top_k: int = 50,
57
+ repetition_penalty: float = 1.1
58
  ) -> str:
59
  """Generate code completion using Yuuki."""
60
 
61
+ if not MODEL_LOADED:
62
+ if not load_model():
63
+ return "Error: Model failed to load. Please try refreshing the page."
64
 
65
+ if not prompt or not prompt.strip():
66
+ return "Please enter a code prompt to generate."
67
 
68
  try:
69
+ inputs = tokenizer(
70
+ prompt,
71
+ return_tensors="pt",
72
+ truncation=True,
73
+ max_length=512
74
+ )
75
 
76
  with torch.no_grad():
77
  outputs = model.generate(
78
  **inputs,
79
+ max_new_tokens=max_new_tokens,
80
  temperature=temperature,
81
  top_p=top_p,
82
+ top_k=top_k,
83
+ repetition_penalty=repetition_penalty,
84
  do_sample=True,
85
+ pad_token_id=tokenizer.pad_token_id,
86
+ eos_token_id=tokenizer.eos_token_id,
87
+ num_return_sequences=1
88
  )
89
 
90
+ generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
91
+ return generated_text
92
 
93
  except Exception as e:
94
+ return f"Generation error: {str(e)}"
95
+
96
 
97
  # ============================================================================
98
+ # Examples by Language Quality
99
  # ============================================================================
100
 
101
+ EXAMPLES = [
102
+ # Agda - Best performance (55/100)
103
+ ["module Main where", 120, 0.7, 0.9, 50, 1.1],
104
+ ["open import Data.Nat", 100, 0.7, 0.9, 50, 1.1],
105
+ ["data Bool : Set where", 100, 0.7, 0.9, 50, 1.1],
106
 
107
+ # C - Limited (20/100)
108
+ ["int main() {", 100, 0.7, 0.9, 50, 1.1],
109
+ ["#include <stdio.h>", 80, 0.7, 0.9, 50, 1.1],
110
 
111
+ # Assembly - Basic (15/100)
112
+ ["mov eax,", 60, 0.8, 0.9, 50, 1.1],
113
+
114
+ # Python - Weak due to dataset order (8/100)
115
+ ["def hello():", 80, 0.8, 0.9, 50, 1.2],
116
+ ["import numpy as np", 60, 0.7, 0.9, 50, 1.1],
117
  ]
118
 
119
+
120
  # ============================================================================
121
+ # Custom CSS - Clean Modern Design
122
  # ============================================================================
123
 
124
+ CUSTOM_CSS = """
125
+ /* Main container */
126
+ .gradio-container {
127
+ max-width: 1200px !important;
128
+ margin: auto !important;
129
+ }
130
+
131
+ /* Header styling */
132
+ .header-title {
133
  text-align: center;
134
+ font-size: 2.5rem;
135
+ font-weight: 700;
136
+ color: #1a1a2e;
137
+ margin-bottom: 0.25rem;
138
+ letter-spacing: -0.02em;
 
139
  }
140
 
141
+ .header-subtitle {
142
  text-align: center;
143
+ font-size: 1.1rem;
144
+ color: #64748b;
145
+ margin-bottom: 1.5rem;
146
+ }
147
+
148
+ /* Info cards */
149
+ .info-card {
150
+ background: #f8fafc;
151
+ border: 1px solid #e2e8f0;
152
+ border-radius: 12px;
153
+ padding: 1.25rem;
154
+ margin-bottom: 1rem;
155
  }
156
 
157
+ .info-card.warning {
158
+ background: #fffbeb;
159
+ border-color: #fcd34d;
160
+ border-left: 4px solid #f59e0b;
 
 
 
161
  }
162
 
163
+ .info-card.stats {
164
+ background: #f0f9ff;
165
+ border-color: #bae6fd;
166
+ border-left: 4px solid #0ea5e9;
 
 
 
167
  }
168
 
169
+ .info-card.achievement {
170
+ background: #faf5ff;
171
+ border-color: #e9d5ff;
172
+ border-left: 4px solid #a855f7;
173
+ }
174
+
175
+ .info-card h3 {
176
+ margin: 0 0 0.75rem 0;
177
+ font-size: 1rem;
178
+ font-weight: 600;
179
+ }
180
+
181
+ .info-card.warning h3 { color: #92400e; }
182
+ .info-card.stats h3 { color: #0369a1; }
183
+ .info-card.achievement h3 { color: #7c3aed; }
184
+
185
+ .info-card p {
186
+ margin: 0.25rem 0;
187
+ font-size: 0.9rem;
188
+ color: #475569;
189
+ line-height: 1.5;
190
  }
191
 
192
+ /* Score badges */
193
+ .score-row {
194
+ display: flex;
195
+ gap: 1rem;
196
+ flex-wrap: wrap;
197
+ margin-top: 0.75rem;
198
+ }
199
+
200
+ .score-badge {
201
+ display: inline-flex;
202
+ align-items: center;
203
+ gap: 0.5rem;
204
+ padding: 0.375rem 0.75rem;
205
+ border-radius: 9999px;
206
+ font-size: 0.8rem;
207
+ font-weight: 500;
208
+ }
209
+
210
+ .score-badge.good {
211
+ background: #dcfce7;
212
+ color: #166534;
213
+ }
214
+
215
+ .score-badge.medium {
216
+ background: #fef3c7;
217
+ color: #92400e;
218
+ }
219
+
220
+ .score-badge.weak {
221
+ background: #fee2e2;
222
+ color: #991b1b;
223
+ }
224
+
225
+ /* Primary button */
226
+ .primary-btn {
227
+ background: linear-gradient(135deg, #3b82f6 0%, #1d4ed8 100%) !important;
228
  border: none !important;
229
+ color: white !important;
230
+ font-weight: 600 !important;
231
+ transition: all 0.2s ease !important;
232
+ }
233
+
234
+ .primary-btn:hover {
235
+ transform: translateY(-1px) !important;
236
+ box-shadow: 0 4px 12px rgba(59, 130, 246, 0.4) !important;
237
+ }
238
+
239
+ /* Comparison table */
240
+ .comparison-table {
241
+ width: 100%;
242
+ border-collapse: collapse;
243
+ margin: 1rem 0;
244
+ font-size: 0.875rem;
245
  }
246
 
247
+ .comparison-table th,
248
+ .comparison-table td {
249
+ padding: 0.75rem;
250
+ text-align: left;
251
+ border-bottom: 1px solid #e2e8f0;
252
+ }
253
+
254
+ .comparison-table th {
255
+ background: #f1f5f9;
256
+ font-weight: 600;
257
+ color: #334155;
258
+ }
259
+
260
+ .comparison-table tr:hover {
261
+ background: #f8fafc;
262
+ }
263
+
264
+ /* Footer */
265
+ .footer {
266
+ margin-top: 2rem;
267
+ padding-top: 1.5rem;
268
+ border-top: 1px solid #e2e8f0;
269
+ text-align: center;
270
+ color: #64748b;
271
+ font-size: 0.875rem;
272
+ }
273
+
274
+ .footer a {
275
+ color: #3b82f6;
276
+ text-decoration: none;
277
+ }
278
+
279
+ .footer a:hover {
280
+ text-decoration: underline;
281
+ }
282
+
283
+ /* Links row */
284
+ .links-row {
285
+ display: flex;
286
+ justify-content: center;
287
+ gap: 1.5rem;
288
+ flex-wrap: wrap;
289
+ margin: 1rem 0;
290
+ }
291
+
292
+ .link-item {
293
+ color: #3b82f6;
294
+ text-decoration: none;
295
+ font-weight: 500;
296
+ font-size: 0.9rem;
297
+ }
298
+
299
+ /* Accordion styling */
300
+ .accordion {
301
+ border: 1px solid #e2e8f0 !important;
302
+ border-radius: 8px !important;
303
+ margin-top: 0.5rem !important;
304
  }
305
  """
306
 
307
+
308
  # ============================================================================
309
  # Gradio Interface
310
  # ============================================================================
311
 
312
+ with gr.Blocks(
313
+ css=CUSTOM_CSS,
314
+ title="Yuuki - Mobile-Trained Code Generator",
315
+ theme=gr.themes.Soft()
316
+ ) as demo:
317
 
318
  # Header
319
+ gr.HTML("""
320
+ <div class="header-title">Yuuki</div>
321
+ <div class="header-subtitle">
322
+ First LLM Trained Entirely on a Smartphone | Zero-Budget ML Research
323
+ </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
324
  """)
325
 
326
+ # Disclaimer Card
327
+ gr.HTML("""
328
+ <div class="info-card warning">
329
+ <h3>Experimental Research Model</h3>
330
+ <p>
331
+ Yuuki is the <strong>best model available at this moment</strong>.
332
+ The full <strong>v0.1</strong> release is coming soon once published,
333
+ plans for <strong>v0.2</strong> will begin.
334
+ </p>
335
+ <p style="margin-top: 0.5rem;">
336
+ This model is being trained <strong>entirely on a smartphone CPU</strong> by a
337
+ <strong>single person</strong>. A research paper exploring mobile LLM training
338
+ will be published soon.
339
+ </p>
340
+ <div class="score-row">
341
+ <span class="score-badge good">Agda: 55/100</span>
342
+ <span class="score-badge medium">C: 20/100</span>
343
+ <span class="score-badge medium">Assembly: 15/100</span>
344
+ <span class="score-badge weak">Python: 8/100</span>
345
+ </div>
346
+ </div>
347
  """)
348
 
349
+ # Stats Card
350
+ gr.HTML("""
351
+ <div class="info-card stats">
352
+ <h3>Training Statistics</h3>
353
+ <p><strong>Hardware:</strong> Snapdragon 685 (CPU only) | <strong>Model Size:</strong> 988 MB</p>
354
+ <p><strong>Progress:</strong> 2,000 / 37,500 steps (5.3%) | <strong>Speed:</strong> ~86 sec/step</p>
355
+ <p><strong>Loss:</strong> 1.69 - 2.31 | <strong>Cost:</strong> $0.00 | <strong>Average Quality:</strong> 24.6/100</p>
356
+ <p><strong>Improvement:</strong> +146% quality gain from checkpoint 1400 to 2000</p>
357
+ </div>
358
  """)
359
 
360
  # Main Interface
361
  with gr.Row():
362
  with gr.Column(scale=1):
363
  prompt_input = gr.Textbox(
364
+ label="Code Prompt",
365
  placeholder="module Main where",
366
+ lines=4,
367
+ info="Try Agda prompts for best results"
368
  )
369
 
370
+ with gr.Accordion("Advanced Settings", open=False):
371
+ max_new_tokens = gr.Slider(
372
  minimum=20,
373
+ maximum=256,
374
  value=100,
375
  step=10,
376
+ label="Max New Tokens",
377
+ info="Number of tokens to generate"
378
  )
379
  temperature = gr.Slider(
380
  minimum=0.1,
 
382
  value=0.7,
383
  step=0.1,
384
  label="Temperature",
385
+ info="Higher = more creative, lower = more focused"
386
  )
387
  top_p = gr.Slider(
388
  minimum=0.1,
389
  maximum=1.0,
390
  value=0.9,
391
  step=0.05,
392
+ label="Top P (Nucleus Sampling)",
393
+ info="Cumulative probability threshold"
394
+ )
395
+ top_k = gr.Slider(
396
+ minimum=1,
397
+ maximum=100,
398
+ value=50,
399
+ step=5,
400
+ label="Top K",
401
+ info="Number of top tokens to consider"
402
+ )
403
+ repetition_penalty = gr.Slider(
404
+ minimum=1.0,
405
+ maximum=2.0,
406
+ value=1.1,
407
+ step=0.05,
408
+ label="Repetition Penalty",
409
+ info="Penalize repeated tokens"
410
  )
411
 
412
+ generate_btn = gr.Button(
413
+ "Generate Code",
414
+ variant="primary",
415
+ size="lg",
416
+ elem_classes=["primary-btn"]
417
+ )
418
 
419
  with gr.Column(scale=1):
420
  output = gr.Textbox(
421
+ label="Generated Code",
422
+ lines=16,
423
+ show_copy_button=True,
424
+ info="Output will appear here"
425
  )
426
 
427
+ # Examples
428
+ gr.Markdown("### Examples")
429
  gr.Examples(
430
+ examples=EXAMPLES,
431
+ inputs=[prompt_input, max_new_tokens, temperature, top_p, top_k, repetition_penalty],
432
  outputs=output,
433
  fn=generate_code,
434
  cache_examples=False,
435
  label="Click any example to try it"
436
  )
437
 
438
+ # Comparison Section
439
+ with gr.Accordion("Checkpoint Comparison (1400 vs 2000)", open=False):
440
+ gr.HTML("""
441
+ <table class="comparison-table">
442
+ <thead>
443
+ <tr>
444
+ <th>Metric</th>
445
+ <th>Checkpoint 1400</th>
446
+ <th>Checkpoint 2000</th>
447
+ </tr>
448
+ </thead>
449
+ <tbody>
450
+ <tr>
451
+ <td>Training Progress</td>
452
+ <td>1,400 / 37,500 (3.7%)</td>
453
+ <td>2,000 / 37,500 (5.3%)</td>
454
+ </tr>
455
+ <tr>
456
+ <td>Average Loss</td>
457
+ <td>1.70 - 2.23</td>
458
+ <td>1.69 - 2.31</td>
459
+ </tr>
460
+ <tr>
461
+ <td>Training Speed</td>
462
+ <td>~100 sec/step</td>
463
+ <td>~86 sec/step</td>
464
+ </tr>
465
+ <tr>
466
+ <td>Agda Score</td>
467
+ <td>20/100</td>
468
+ <td><strong>55/100</strong></td>
469
+ </tr>
470
+ <tr>
471
+ <td>C Score</td>
472
+ <td>8/100</td>
473
+ <td><strong>20/100</strong></td>
474
+ </tr>
475
+ <tr>
476
+ <td>Assembly Score</td>
477
+ <td>2/100</td>
478
+ <td><strong>15/100</strong></td>
479
+ </tr>
480
+ <tr>
481
+ <td>Average Quality</td>
482
+ <td>~10/100</td>
483
+ <td><strong>24.6/100 (+146%)</strong></td>
484
+ </tr>
485
+ </tbody>
486
+ </table>
487
+ """)
488
+
489
+ # Why This Matters
490
+ with gr.Accordion("Why This Project Matters", open=False):
491
+ gr.Markdown("""
492
+ **Yuuki proves that LLM training is accessible** even with zero budget and consumer hardware.
493
+
494
+ - **Students** without GPU access can experiment with ML training
495
+ - **Democratizes** ML research globally — barriers are mindset, not money
496
+ - **Explores** edge ML training possibilities on mobile devices
497
+ - **Documents** complete training journey including failures and recoveries
498
+
499
+ **Key Finding:** Dataset quality matters more than loss value. Checkpoint-2700 achieved
500
+ the lowest loss (1.62) but scored 12% worse in quality than checkpoint-2000, proving
501
+ that loss alone is unreliable when training data varies.
502
+ """)
503
+
504
+ # Footer
505
+ gr.HTML("""
506
+ <div class="footer">
507
+ <div class="links-row">
508
+ <a href="https://huggingface.co/OpceanAI/Yuuki-best" target="_blank">Model Card</a>
509
+ <a href="https://huggingface.co/OpceanAI/Yuuki" target="_blank">Original Yuuki</a>
510
+ <a href="https://github.com/YuuKi-OS/yuuki-training" target="_blank">Training Code</a>
511
+ </div>
512
+ <p style="margin-top: 1rem;">
513
+ Built with patience, a phone, and zero budget.<br>
514
+ <strong>Proving the barrier to AI is mindset, not money.</strong>
515
+ </p>
516
+ <p style="margin-top: 0.5rem; font-size: 0.8rem;">
517
+ Licensed under Apache 2.0 | Powered by
518
+ <a href="https://gradio.app" target="_blank">Gradio</a> &
519
+ <a href="https://huggingface.co" target="_blank">Hugging Face</a>
520
+ </p>
521
+ </div>
522
+ """)
523
+
524
+ # Event handlers
525
  generate_btn.click(
526
  fn=generate_code,
527
+ inputs=[prompt_input, max_new_tokens, temperature, top_p, top_k, repetition_penalty],
528
  outputs=output
529
  )
530
 
531
+ prompt_input.submit(
532
+ fn=generate_code,
533
+ inputs=[prompt_input, max_new_tokens, temperature, top_p, top_k, repetition_penalty],
534
+ outputs=output
535
+ )
536
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
537
 
538
+ # ============================================================================
539
  # Launch
540
+ # ============================================================================
541
+
542
  if __name__ == "__main__":
543
+ # Preload model on startup
544
+ load_model()
545
+
546
+ demo.launch(
547
+ share=False,
548
+ show_error=True,
549
+ show_api=False
550
+ )