Imakandi-Labs commited on
Commit
028db64
Β·
verified Β·
1 Parent(s): 9b16fa9

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +260 -69
app.py CHANGED
@@ -1,131 +1,322 @@
1
  #!/usr/bin/env python3
2
  """
3
- YarnGPT Fine-tuning UI - Gradio App
 
 
 
 
 
 
 
 
 
 
 
 
4
  """
5
 
6
  import gradio as gr
7
  import torch
8
  import threading
9
- from train import train_yarngpt
 
10
 
11
  # Global state
12
- training_status = {"progress": 0, "message": "Ready to train", "running": False}
 
 
 
 
 
13
 
14
 
15
- def update_progress(progress, message):
16
  """Update training progress."""
17
- global training_status
18
- training_status["progress"] = progress
19
- training_status["message"] = message
 
 
 
 
 
 
 
20
 
 
 
 
 
 
 
 
 
21
 
22
- def start_training(dataset_id, output_repo, epochs, batch_size, learning_rate, lora_r):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  """Start training in background thread."""
24
- global training_status
25
 
26
- if training_status["running"]:
27
- return "Training already in progress!"
28
 
29
- training_status["running"] = True
30
- training_status["progress"] = 0
31
- training_status["message"] = "Starting..."
 
32
 
33
  def train_thread():
34
- global training_status
35
  try:
36
- result = train_yarngpt(
37
- dataset_id=dataset_id,
38
- output_repo=output_repo,
39
  epochs=int(epochs),
40
  batch_size=int(batch_size),
41
  learning_rate=float(learning_rate),
42
  lora_r=int(lora_r),
43
- progress_callback=update_progress,
 
 
 
 
44
  )
45
- training_status["message"] = f"Complete! Model at: {result}"
 
 
 
 
 
 
 
 
 
 
46
  except Exception as e:
47
- training_status["message"] = f"Error: {str(e)}"
 
 
 
 
48
  finally:
49
- training_status["running"] = False
50
 
51
  thread = threading.Thread(target=train_thread)
52
  thread.start()
53
 
54
- return "Training started! Check progress below."
55
 
56
 
57
- def get_status():
58
- """Get current training status."""
59
- return f"{training_status['message']} ({training_status['progress']*100:.1f}%)"
 
 
 
 
 
 
 
 
60
 
61
 
62
- def get_progress():
63
- """Get progress value."""
64
- return training_status["progress"]
65
 
66
 
67
- # Check GPU
68
- device_info = f"GPU: {torch.cuda.get_device_name(0)}" if torch.cuda.is_available() else "CPU only (slow!)"
 
 
 
 
 
69
 
70
- # Gradio UI
71
- with gr.Blocks(title="YarnGPT Fine-tuning") as demo:
72
- gr.Markdown(f"""
73
- # YarnGPT Fine-tuning Trainer
74
 
75
- Fine-tune YarnGPT2 on Nigerian speech data using LoRA.
 
 
 
 
 
76
 
77
- **Device:** {device_info}
78
  """)
79
 
80
  with gr.Row():
81
- with gr.Column():
82
- dataset_input = gr.Textbox(
83
- label="Dataset ID",
84
- value="Imakandi-Labs/aasd-nigerian-tts-processed",
85
- info="HuggingFace dataset with text transcriptions"
 
 
 
 
 
 
 
 
 
 
 
 
86
  )
87
- output_repo_input = gr.Textbox(
88
- label="Output Model Repo",
89
- value="Imakandi-Labs/YarnGPT2-Nigerian-Finetuned",
90
- info="Where to save the fine-tuned model"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
- with gr.Column():
94
- epochs_input = gr.Slider(1, 10, value=3, step=1, label="Epochs")
95
- batch_size_input = gr.Slider(1, 16, value=4, step=1, label="Batch Size")
96
- lr_input = gr.Number(value=2e-4, label="Learning Rate")
97
- lora_r_input = gr.Slider(4, 64, value=16, step=4, label="LoRA Rank")
98
 
99
- train_btn = gr.Button("Start Training", variant="primary")
100
- status_output = gr.Textbox(label="Status", interactive=False)
101
- progress_bar = gr.Slider(0, 1, value=0, label="Progress", interactive=False)
 
 
 
 
 
102
 
103
- # Auto-refresh status
104
- refresh_btn = gr.Button("Refresh Status")
 
 
 
 
105
 
 
106
  train_btn.click(
107
- start_training,
108
- inputs=[dataset_input, output_repo_input, epochs_input, batch_size_input, lr_input, lora_r_input],
109
- outputs=status_output
 
 
 
 
110
  )
111
 
112
- refresh_btn.click(get_status, outputs=status_output)
113
- refresh_btn.click(get_progress, outputs=progress_bar)
 
 
114
 
115
  gr.Markdown("""
116
- ## Instructions
 
117
 
118
- 1. Click **Start Training** to begin fine-tuning
119
- 2. Click **Refresh Status** to check progress
120
- 3. Once complete, your model will be at the Output Model Repo
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
 
122
  ## After Training
123
 
124
- Update your TTS Space to use the fine-tuned model:
125
  ```python
126
- from peft import PeftModel
127
- model = PeftModel.from_pretrained(base_model, "Imakandi-Labs/YarnGPT2-Nigerian-Finetuned")
 
 
128
  ```
129
  """)
130
 
131
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
1
  #!/usr/bin/env python3
2
  """
3
+ Morpheus-GPT v2 Training UI
4
+ ============================
5
+
6
+ HuggingFace Space for training Morpheus-GPT v2 - Nigerian Female TTS
7
+
8
+ Uses ALL available data:
9
+ - UbuntuFarms/morpheus-gpt-training (5000+ samples)
10
+ - ejiod/female-nigerian-tts
11
+ - benjaminogbonna/nigerian_common_voice_dataset
12
+ - MMS-TTS synthetic pronunciation data
13
+
14
+ Base: saheedniyi/YarnGPT2b
15
+ Output: UbuntuFarms/morpheus-gpt-v2
16
  """
17
 
18
  import gradio as gr
19
  import torch
20
  import threading
21
+ import time
22
+ from train_morpheus_gpt_v2 import TrainingConfig, prepare_training_data, train_model
23
 
24
  # Global state
25
+ training_state = {
26
+ "running": False,
27
+ "progress": 0.0,
28
+ "message": "Ready to train",
29
+ "log": [],
30
+ }
31
 
32
 
33
+ def update_progress(progress: float, message: str):
34
  """Update training progress."""
35
+ global training_state
36
+ training_state["progress"] = progress
37
+ training_state["message"] = message
38
+ training_state["log"].append(f"[{progress*100:.1f}%] {message}")
39
+
40
+
41
+ def get_system_info():
42
+ """Get system information."""
43
+ info = []
44
+ info.append(f"PyTorch: {torch.__version__}")
45
 
46
+ if torch.cuda.is_available():
47
+ info.append(f"GPU: {torch.cuda.get_device_name(0)}")
48
+ mem = torch.cuda.get_device_properties(0).total_memory / 1e9
49
+ info.append(f"GPU Memory: {mem:.1f} GB")
50
+ info.append("Status: Ready for training!")
51
+ else:
52
+ info.append("GPU: Not available")
53
+ info.append("Status: CPU only (training will be slow)")
54
 
55
+ return "\n".join(info)
56
+
57
+
58
+ def start_training(
59
+ output_repo: str,
60
+ epochs: int,
61
+ batch_size: int,
62
+ learning_rate: float,
63
+ lora_r: int,
64
+ use_morpheus_training: bool,
65
+ use_female_nigerian_tts: bool,
66
+ use_common_voice: bool,
67
+ use_mms_synthetic: bool,
68
+ female_only: bool,
69
+ ):
70
  """Start training in background thread."""
71
+ global training_state
72
 
73
+ if training_state["running"]:
74
+ return "Training already in progress!", get_progress_display()
75
 
76
+ training_state["running"] = True
77
+ training_state["progress"] = 0.0
78
+ training_state["message"] = "Starting..."
79
+ training_state["log"] = []
80
 
81
  def train_thread():
82
+ global training_state
83
  try:
84
+ config = TrainingConfig(
85
+ output_repo=output_repo or "UbuntuFarms/morpheus-gpt-v2",
 
86
  epochs=int(epochs),
87
  batch_size=int(batch_size),
88
  learning_rate=float(learning_rate),
89
  lora_r=int(lora_r),
90
+ use_morpheus_training=use_morpheus_training,
91
+ use_female_nigerian_tts=use_female_nigerian_tts,
92
+ use_common_voice=use_common_voice,
93
+ use_mms_synthetic=use_mms_synthetic,
94
+ female_only=female_only,
95
  )
96
+
97
+ # Prepare data
98
+ update_progress(0.1, "Loading training data...")
99
+ dataset = prepare_training_data(config)
100
+ update_progress(0.2, f"Loaded {len(dataset)} samples")
101
+
102
+ # Train
103
+ result = train_model(config, dataset, progress_callback=update_progress)
104
+
105
+ training_state["message"] = f"Complete! Model at: {result}"
106
+
107
  except Exception as e:
108
+ import traceback
109
+ traceback.print_exc()
110
+ training_state["message"] = f"Error: {str(e)}"
111
+ training_state["log"].append(f"ERROR: {str(e)}")
112
+
113
  finally:
114
+ training_state["running"] = False
115
 
116
  thread = threading.Thread(target=train_thread)
117
  thread.start()
118
 
119
+ return "Training started! Monitor progress below.", get_progress_display()
120
 
121
 
122
+ def get_progress_display():
123
+ """Get progress display string."""
124
+ global training_state
125
+ lines = [
126
+ f"Status: {training_state['message']}",
127
+ f"Progress: {training_state['progress']*100:.1f}%",
128
+ "",
129
+ "--- Log ---",
130
+ ]
131
+ lines.extend(training_state["log"][-20:]) # Last 20 log entries
132
+ return "\n".join(lines)
133
 
134
 
135
+ def refresh_progress():
136
+ """Refresh progress display."""
137
+ return get_progress_display(), training_state["progress"]
138
 
139
 
140
+ # ============================================================
141
+ # GRADIO UI
142
+ # ============================================================
143
+
144
+ with gr.Blocks(title="Morpheus-GPT v2 Trainer", theme=gr.themes.Soft()) as demo:
145
+ gr.Markdown("""
146
+ # πŸŽ™οΈ Morpheus-GPT v2 Training
147
 
148
+ **Train a Nigerian Female TTS model using ALL available data:**
 
 
 
149
 
150
+ | Data Source | Description |
151
+ |-------------|-------------|
152
+ | UbuntuFarms/morpheus-gpt-training | 5000+ samples (female Yoruba, Hausa, Igbo) |
153
+ | ejiod/female-nigerian-tts | Female Nigerian voices |
154
+ | benjaminogbonna/nigerian_common_voice_dataset | Yoruba, Hausa, Igbo |
155
+ | MMS-TTS Synthetic | Pronunciation training data |
156
 
157
+ **Base Model:** saheedniyi/YarnGPT2b (Nigerian language optimized)
158
  """)
159
 
160
  with gr.Row():
161
+ with gr.Column(scale=1):
162
+ sys_info = gr.Textbox(
163
+ label="System Info",
164
+ value=get_system_info(),
165
+ lines=5,
166
+ interactive=False,
167
+ )
168
+
169
+ gr.Markdown("---")
170
+ gr.Markdown("## Training Configuration")
171
+
172
+ with gr.Row():
173
+ with gr.Column(scale=1):
174
+ output_repo = gr.Textbox(
175
+ label="Output Model Repository",
176
+ value="UbuntuFarms/morpheus-gpt-v2",
177
+ info="Where to save the trained model"
178
  )
179
+
180
+ with gr.Row():
181
+ epochs = gr.Slider(
182
+ label="Epochs",
183
+ minimum=1,
184
+ maximum=20,
185
+ value=5,
186
+ step=1,
187
+ )
188
+ batch_size = gr.Slider(
189
+ label="Batch Size",
190
+ minimum=1,
191
+ maximum=8,
192
+ value=2,
193
+ step=1,
194
+ )
195
+
196
+ with gr.Row():
197
+ learning_rate = gr.Number(
198
+ label="Learning Rate",
199
+ value=2e-4,
200
+ )
201
+ lora_r = gr.Slider(
202
+ label="LoRA Rank",
203
+ minimum=4,
204
+ maximum=64,
205
+ value=16,
206
+ step=4,
207
+ )
208
+
209
+ with gr.Column(scale=1):
210
+ gr.Markdown("### Data Sources")
211
+
212
+ use_morpheus_training = gr.Checkbox(
213
+ label="UbuntuFarms/morpheus-gpt-training (5000+ samples)",
214
+ value=True,
215
+ )
216
+ use_female_nigerian_tts = gr.Checkbox(
217
+ label="ejiod/female-nigerian-tts",
218
+ value=True,
219
  )
220
+ use_common_voice = gr.Checkbox(
221
+ label="Nigerian Common Voice (Yoruba, Hausa, Igbo)",
222
+ value=True,
223
+ )
224
+ use_mms_synthetic = gr.Checkbox(
225
+ label="MMS-TTS Synthetic Pronunciation Data",
226
+ value=True,
227
+ )
228
+ female_only = gr.Checkbox(
229
+ label="Filter for Female Voices Only",
230
+ value=True,
231
+ )
232
+
233
+ with gr.Row():
234
+ train_btn = gr.Button("πŸš€ Start Training", variant="primary", size="lg")
235
+ refresh_btn = gr.Button("πŸ”„ Refresh Progress", size="lg")
236
 
237
+ gr.Markdown("---")
238
+ gr.Markdown("## Training Progress")
 
 
 
239
 
240
+ with gr.Row():
241
+ progress_bar = gr.Slider(
242
+ label="Progress",
243
+ minimum=0,
244
+ maximum=1,
245
+ value=0,
246
+ interactive=False,
247
+ )
248
 
249
+ progress_display = gr.Textbox(
250
+ label="Training Log",
251
+ value="Ready to train. Configure options above and click 'Start Training'.",
252
+ lines=15,
253
+ interactive=False,
254
+ )
255
 
256
+ # Event handlers
257
  train_btn.click(
258
+ fn=start_training,
259
+ inputs=[
260
+ output_repo, epochs, batch_size, learning_rate, lora_r,
261
+ use_morpheus_training, use_female_nigerian_tts, use_common_voice,
262
+ use_mms_synthetic, female_only
263
+ ],
264
+ outputs=[progress_display, progress_display],
265
  )
266
 
267
+ refresh_btn.click(
268
+ fn=refresh_progress,
269
+ outputs=[progress_display, progress_bar],
270
+ )
271
 
272
  gr.Markdown("""
273
+ ---
274
+ ## How It Works
275
 
276
+ ```
277
+ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
278
+ β”‚ DATA SOURCES β”‚
279
+ β”‚ β€’ morpheus-gpt-training (5000+ female Nigerian samples) β”‚
280
+ β”‚ β€’ female-nigerian-tts β”‚
281
+ β”‚ β€’ Nigerian Common Voice (Yoruba, Hausa, Igbo) β”‚
282
+ β”‚ β€’ MMS-TTS synthetic pronunciation β”‚
283
+ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
284
+ β”‚
285
+ β–Ό
286
+ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
287
+ β”‚ COMBINED DATASET (Female Nigerian voices only) β”‚
288
+ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
289
+ β”‚
290
+ β–Ό
291
+ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
292
+ β”‚ BASE MODEL: saheedniyi/YarnGPT2b β”‚
293
+ β”‚ (Already optimized for Nigerian languages) β”‚
294
+ β”‚ + β”‚
295
+ β”‚ LoRA Fine-tuning (efficient, GPU-friendly) β”‚
296
+ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
297
+ β”‚
298
+ β–Ό
299
+ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
300
+ β”‚ OUTPUT: UbuntuFarms/morpheus-gpt-v2 β”‚
301
+ β”‚ β€’ Clear pronunciation β”‚
302
+ β”‚ β€’ Natural female voice β”‚
303
+ β”‚ β€’ Nigerian languages (Yoruba, Hausa, Igbo, Pidgin) β”‚
304
+ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
305
+ ```
306
 
307
  ## After Training
308
 
309
+ Use the trained model:
310
  ```python
311
+ from transformers import AutoModelForCausalLM, AutoTokenizer
312
+
313
+ model = AutoModelForCausalLM.from_pretrained("UbuntuFarms/morpheus-gpt-v2")
314
+ tokenizer = AutoTokenizer.from_pretrained("UbuntuFarms/morpheus-gpt-v2")
315
  ```
316
  """)
317
 
318
+
319
+ demo.queue()
320
+
321
+ if __name__ == "__main__":
322
+ demo.launch()