jkorstad commited on
Commit
bff77f3
·
1 Parent(s): d63ba06

Fix generation bug: correct positional arg mapping for Quick Generate, main Generate, Save/Load. Add *args unpacking for 80 character inputs.

Browse files
Files changed (1) hide show
  1. app.py +114 -114
app.py CHANGED
@@ -6,7 +6,6 @@ file upload, chapter selection, segment previews, and project save/load.
6
 
7
  import os
8
  import json
9
- import base64
10
  from pathlib import Path
11
  from typing import Dict, List, Optional
12
 
@@ -147,24 +146,20 @@ input:focus, textarea:focus, select:focus {
147
  border-color: #334155 !important;
148
  }
149
 
150
- /* Fix checkbox and label contrast */
151
  input[type="checkbox"] + label,
152
  .checkbox-label,
153
  .gr-checkbox label {
154
  color: #f8fafc !important;
155
  }
156
 
157
- /* Fix list text in tips */
158
  li, .prose li, .gr-prose li {
159
  color: #cbd5e1 !important;
160
  }
161
 
162
- /* Ensure strong/bold text is visible */
163
  strong, b {
164
  color: #f8fafc !important;
165
  }
166
 
167
- /* Code inline styling */
168
  code {
169
  background: #334155 !important;
170
  color: #22d3ee !important;
@@ -172,7 +167,6 @@ code {
172
  border-radius: 4px !important;
173
  }
174
 
175
- /* Progress bar styling */
176
  progress {
177
  width: 100%;
178
  height: 8px;
@@ -188,7 +182,6 @@ progress::-webkit-progress-value {
188
  border-radius: 4px;
189
  }
190
 
191
- /* Segment list styling */
192
  .seg-item {
193
  background: #0f172a;
194
  border: 1px solid #334155;
@@ -214,10 +207,6 @@ progress::-webkit-progress-value {
214
  # ---------------------------------------------------------------------------
215
 
216
  _pipeline: Optional[AudiobookPipeline] = None
217
- _stored_text: str = ""
218
- _stored_chapters: List[Dict] = []
219
- _stored_segments_meta: List[Dict] = []
220
- _stored_segment_paths: List[str] = []
221
 
222
 
223
  def get_pipeline() -> AudiobookPipeline:
@@ -258,15 +247,13 @@ def handle_upload(file_obj) -> tuple:
258
  pipe = get_pipeline()
259
  text, fname = pipe.parse_upload(file_obj)
260
  text = pipe.processor.clean_text(text)
261
- global _stored_text, _stored_chapters
262
- _stored_text = text
263
- _stored_chapters = pipe.detect_chapters(text)
264
- ch_info = " | ".join([f"Ch{c['idx']+1}: {c['word_count']}w" for c in _stored_chapters[:5]])
265
- if len(_stored_chapters) > 5:
266
- ch_info += f" (+{len(_stored_chapters)-5} more)"
267
  wc = len(text.split())
268
  dur = estimate_duration(wc)
269
- return text, f"Loaded {fname} — {wc} words (~{dur}) | Chapters: {ch_info if _stored_chapters else '1 (auto)'}"
270
  except Exception as e:
271
  return "", f"Error: {e}"
272
 
@@ -280,11 +267,15 @@ def extract_chars(text: str, use_ai: bool) -> tuple:
280
  return chars, status
281
 
282
 
283
- def get_chapter_text(text: str, chapter_idx: int) -> str:
284
- if not text:
285
- return ""
286
- pipe = get_pipeline()
287
- return pipe.get_chapter_text(text, chapter_idx)
 
 
 
 
288
 
289
 
290
  # ---------------------------------------------------------------------------
@@ -295,11 +286,22 @@ def get_chapter_text(text: str, chapter_idx: int) -> str:
295
  def generate_audiobook_gpu(
296
  text,
297
  nar_mode, nar_preset, nar_audio, nar_ref_text, nar_design, nar_instruct, nar_lang, nar_speed,
298
- gen_temp, gen_seed, output_fmt,
299
- names, descs, modes, presets, audios, ref_texts, designs, instructs, langs, speeds,
300
  ):
301
  if not text or len(text.strip()) < 50:
302
- return None, None, "Error: Please provide at least 50 characters of story text.", ""
 
 
 
 
 
 
 
 
 
 
 
 
303
 
304
  pipe = get_pipeline()
305
 
@@ -312,7 +314,7 @@ def generate_audiobook_gpu(
312
  design_desc=nar_design if nar_mode == "design" else None,
313
  instruct=nar_instruct,
314
  language=nar_lang,
315
- speed=float(nar_speed),
316
  )
317
 
318
  char_configs = {}
@@ -348,11 +350,7 @@ def generate_audiobook_gpu(
348
  temperature=gen_temp,
349
  seed=int(gen_seed),
350
  )
351
- global _stored_segment_paths, _stored_segments_meta
352
- _stored_segment_paths = seg_paths
353
- _stored_segments_meta = seg_meta
354
 
355
- # Build segment list HTML
356
  seg_html = "<div style='max-height: 300px; overflow-y: auto;'>"
357
  for s in seg_meta[:50]:
358
  tclass = "narration" if s['type'] == 'narration' else "dialogue"
@@ -361,7 +359,6 @@ def generate_audiobook_gpu(
361
  seg_html += f"<div style='text-align:center;color:#94a3b8;padding:0.5rem;'>... and {len(seg_meta)-50} more segments</div>"
362
  seg_html += "</div>"
363
 
364
- # Extra export
365
  extra_path = None
366
  if output_fmt == "wav":
367
  extra_path = output_path.replace(".mp3", ".wav")
@@ -390,7 +387,7 @@ def preview_narrator_gpu(mode, preset, audio, ref_text, design, instruct, lang,
390
  design_desc=design if mode == "design" else None,
391
  instruct=instruct,
392
  language=lang,
393
- speed=float(speed),
394
  )
395
  try:
396
  wav, sr = pipe.preview_voice(vc)
@@ -425,19 +422,74 @@ def preview_char_voice_gpu(name, mode, preset, audio, ref_text, design, instruct
425
  return None, f"Preview failed: {e}"
426
 
427
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
428
  # ---------------------------------------------------------------------------
429
  # Project Save/Load
430
  # ---------------------------------------------------------------------------
431
 
432
- def do_save_project(text, nar_mode, nar_preset, nar_audio, nar_ref_text, nar_design, nar_instruct, nar_lang, nar_speed,
433
- names, descs, modes, presets, audios, ref_texts, designs, instructs, langs, speeds,
434
- gen_temp, gen_seed):
 
 
 
 
 
 
 
 
 
 
 
 
435
  nar_cfg = VoiceConfig(
436
  name="Narrator", mode=nar_mode, preset=nar_preset if nar_mode == "preset" else None,
437
  ref_audio=nar_audio if nar_mode == "clone" and nar_audio else None,
438
  ref_text=nar_ref_text if nar_mode == "clone" else None,
439
  design_desc=nar_design if nar_mode == "design" else None,
440
- instruct=nar_instruct, language=nar_lang, speed=float(nar_speed),
 
441
  )
442
  char_configs = {}
443
  for i in range(8):
@@ -463,7 +515,6 @@ def do_load_project(json_str):
463
  nar = data["narrator"]
464
  chars = data.get("characters", {})
465
 
466
- # Build updates for narrator
467
  nar_updates = [
468
  gr.update(value=nar.mode),
469
  gr.update(value=nar.preset if nar.preset else "Ryan", visible=nar.mode=="preset"),
@@ -475,7 +526,6 @@ def do_load_project(json_str):
475
  gr.update(value=nar.speed),
476
  ]
477
 
478
- # Build updates for characters (up to 8)
479
  char_updates = []
480
  char_items = list(chars.items())[:8]
481
  for i in range(8):
@@ -493,6 +543,8 @@ def do_load_project(json_str):
493
  gr.update(value=c.instruct, visible=True),
494
  gr.update(value=c.language, visible=True),
495
  gr.update(value=c.speed, visible=True),
 
 
496
  ])
497
  else:
498
  char_updates.extend([
@@ -507,55 +559,16 @@ def do_load_project(json_str):
507
  gr.update(visible=False),
508
  gr.update(visible=False),
509
  gr.update(visible=False),
 
 
510
  ])
511
 
512
  text_sample = data.get("text_sample", "")
513
  return [text_sample] + nar_updates + char_updates + [f"Project loaded! {len(chars)} characters configured."]
514
- except Exception as e:
515
- return [""] + [gr.update()]*43 + [f"Error loading project: {e}"]
516
-
517
-
518
- # ---------------------------------------------------------------------------
519
- # Quick Generate
520
- # ---------------------------------------------------------------------------
521
-
522
- @spaces.GPU(duration=180)
523
- def quick_generate_gpu(text, narrator_preset, gen_temp, gen_seed, output_fmt):
524
- """One-click generation with all defaults."""
525
- if not text or len(text.strip()) < 50:
526
- return None, "Error: Text too short."
527
-
528
- pipe = get_pipeline()
529
- nar_cfg = VoiceConfig(name="Narrator", mode="preset", preset=narrator_preset,
530
- language="English", speed=1.0)
531
-
532
- def prog_cb(ratio: float, msg: str):
533
- print(f"[{ratio*100:.0f}%] {msg}")
534
-
535
- try:
536
- output_path, seg_paths, seg_meta = pipe.generate(
537
- text=text,
538
- narrator_config=nar_cfg,
539
- character_configs={},
540
- progress_callback=prog_cb,
541
- temperature=gen_temp,
542
- seed=int(gen_seed),
543
- )
544
-
545
- extra_path = None
546
- if output_fmt == "wav":
547
- extra_path = output_path.replace(".mp3", ".wav")
548
- from backend import save_audiobook
549
- save_audiobook(seg_paths, extra_path, fmt="wav")
550
- elif output_fmt == "zip":
551
- extra_path = pipe.export_segments_zip(seg_paths)
552
-
553
- final_path = extra_path if extra_path else output_path
554
- return final_path, f"Quick audiobook ready! {len(seg_meta)} segments."
555
  except Exception as e:
556
  import traceback
557
  traceback.print_exc()
558
- return None, f"Error: {str(e)}"
559
 
560
 
561
  # ---------------------------------------------------------------------------
@@ -637,7 +650,7 @@ def build_app():
637
  quick_audio = gr.Audio(label="Quick Audiobook", interactive=False)
638
  quick_status = gr.Textbox(show_label=False, interactive=False)
639
  gr.Markdown("---")
640
- gr.Markdown("**Quick Generate** uses a single narrator voice for the entire text — perfect for articles, essays, and simple stories.")
641
 
642
  with gr.Row():
643
  chapter_selector = gr.Dropdown(
@@ -665,7 +678,6 @@ def build_app():
665
  outputs=[quick_audio, quick_status],
666
  )
667
 
668
- # Chapter detection
669
  def refresh_chapters(text):
670
  if not text:
671
  return gr.update(choices=["All"], value="All")
@@ -703,11 +715,11 @@ def build_app():
703
 
704
  with gr.Column(scale=2):
705
  gr.Markdown("## Character Voices")
706
- gr.Markdown("Configure up to 8 characters. Each can use Preset, Clone, or Design mode.")
707
 
708
  char_names, char_descs, char_modes, char_presets = [], [], [], []
709
  char_audios, char_ref_texts, char_designs, char_instructs, char_langs, char_speeds = [], [], [], [], [], []
710
- char_rows, char_preview_btns, char_preview_audios = [], [], []
711
 
712
  for i in range(8):
713
  visible_default = (i == 0)
@@ -727,12 +739,13 @@ def build_app():
727
  with gr.Row():
728
  cpv_btn = gr.Button("🔊 Preview", variant="secondary", visible=visible_default)
729
  cpv_audio = gr.Audio(label="Preview", interactive=False, visible=visible_default)
 
730
 
731
  cm.change(on_mode_change, inputs=cm, outputs=[cp, ca, crt, cdes])
732
  cpv_btn.click(
733
  preview_char_voice_gpu,
734
  inputs=[cn, cm, cp, ca, crt, cdes, cinstr, cl, cspd],
735
- outputs=[cpv_audio, cpv_btn], # reuse button for status
736
  )
737
 
738
  char_rows.append(row)
@@ -748,6 +761,7 @@ def build_app():
748
  char_speeds.append(cspd)
749
  char_preview_btns.append(cpv_btn)
750
  char_preview_audios.append(cpv_audio)
 
751
 
752
  # ==================== TAB 3: Generate ====================
753
  with gr.TabItem("⚡ Generate"):
@@ -833,6 +847,7 @@ def build_app():
833
  gr.update(value=chars[i].get("speed", 1.0), visible=True),
834
  gr.update(visible=True),
835
  gr.update(visible=True),
 
836
  ])
837
  else:
838
  updates.extend([
@@ -849,6 +864,7 @@ def build_app():
849
  gr.update(visible=False),
850
  gr.update(visible=False),
851
  gr.update(visible=False),
 
852
  ])
853
  return [status] + updates
854
 
@@ -856,7 +872,7 @@ def build_app():
856
  item for sublist in [
857
  [char_rows[i], char_names[i], char_descs[i], char_modes[i], char_presets[i],
858
  char_audios[i], char_ref_texts[i], char_designs[i], char_instructs[i], char_langs[i],
859
- char_speeds[i], char_preview_btns[i], char_preview_audios[i]]
860
  for i in range(8)
861
  ] for item in sublist
862
  ]
@@ -868,26 +884,16 @@ def build_app():
868
  char_audios + char_ref_texts + char_designs + char_instructs + char_langs + char_speeds
869
  )
870
 
871
- def get_text_for_gen(story_text, chapter_sel):
872
- if chapter_sel == "All" or not chapter_sel:
873
- return story_text
874
- # Extract chapter index
875
- try:
876
- idx = int(chapter_sel.split(":")[0].replace("Ch", "")) - 1
877
- return get_chapter_text(story_text, idx)
878
- except:
879
- return story_text
880
-
881
- def wrapped_generate(story_text, chapter_sel, *args):
882
- text = get_text_for_gen(story_text, chapter_sel)
883
- return generate_audiobook_gpu(text, *args)
884
-
885
  gen_inputs = [
886
  story_input, chapter_selector,
887
  nar_mode, nar_preset, nar_audio, nar_ref_text, nar_design, nar_instruct, nar_lang, nar_speed,
888
  gen_temp, gen_seed, output_fmt,
889
  ] + all_char_inputs
890
 
 
 
 
 
891
  gen_btn.click(
892
  wrapped_generate,
893
  inputs=gen_inputs,
@@ -895,20 +901,14 @@ def build_app():
895
  )
896
 
897
  # ---------- Project wiring ----------
898
- save_btn.click(
899
- do_save_project,
900
- inputs=[
901
- story_input,
902
- nar_mode, nar_preset, nar_audio, nar_ref_text, nar_design, nar_instruct, nar_lang, nar_speed,
903
- ] + all_char_inputs + [gen_temp, gen_seed],
904
- outputs=[project_json],
905
- )
906
 
907
- load_btn.click(
908
- do_load_project,
909
- inputs=[load_json],
910
- outputs=[story_input, nar_mode, nar_preset, nar_audio, nar_ref_text, nar_design, nar_instruct, nar_lang, nar_speed] + extract_outputs[1:] + [load_status],
911
- )
912
 
913
  return demo
914
 
 
6
 
7
  import os
8
  import json
 
9
  from pathlib import Path
10
  from typing import Dict, List, Optional
11
 
 
146
  border-color: #334155 !important;
147
  }
148
 
 
149
  input[type="checkbox"] + label,
150
  .checkbox-label,
151
  .gr-checkbox label {
152
  color: #f8fafc !important;
153
  }
154
 
 
155
  li, .prose li, .gr-prose li {
156
  color: #cbd5e1 !important;
157
  }
158
 
 
159
  strong, b {
160
  color: #f8fafc !important;
161
  }
162
 
 
163
  code {
164
  background: #334155 !important;
165
  color: #22d3ee !important;
 
167
  border-radius: 4px !important;
168
  }
169
 
 
170
  progress {
171
  width: 100%;
172
  height: 8px;
 
182
  border-radius: 4px;
183
  }
184
 
 
185
  .seg-item {
186
  background: #0f172a;
187
  border: 1px solid #334155;
 
207
  # ---------------------------------------------------------------------------
208
 
209
  _pipeline: Optional[AudiobookPipeline] = None
 
 
 
 
210
 
211
 
212
  def get_pipeline() -> AudiobookPipeline:
 
247
  pipe = get_pipeline()
248
  text, fname = pipe.parse_upload(file_obj)
249
  text = pipe.processor.clean_text(text)
250
+ chs = pipe.detect_chapters(text)
251
+ ch_info = " | ".join([f"Ch{c['idx']+1}: {c['word_count']}w" for c in chs[:5]])
252
+ if len(chs) > 5:
253
+ ch_info += f" (+{len(chs)-5} more)"
 
 
254
  wc = len(text.split())
255
  dur = estimate_duration(wc)
256
+ return text, f"Loaded {fname} — {wc} words (~{dur}) | {ch_info if chs else '1 section'}"
257
  except Exception as e:
258
  return "", f"Error: {e}"
259
 
 
267
  return chars, status
268
 
269
 
270
+ def get_chapter_text(text: str, chapter_sel: str) -> str:
271
+ if not text or chapter_sel == "All" or not chapter_sel:
272
+ return text
273
+ try:
274
+ idx = int(chapter_sel.split(":")[0].replace("Ch", "")) - 1
275
+ pipe = get_pipeline()
276
+ return pipe.get_chapter_text(text, idx)
277
+ except Exception:
278
+ return text
279
 
280
 
281
  # ---------------------------------------------------------------------------
 
286
  def generate_audiobook_gpu(
287
  text,
288
  nar_mode, nar_preset, nar_audio, nar_ref_text, nar_design, nar_instruct, nar_lang, nar_speed,
289
+ gen_temp, gen_seed, output_fmt, *args
 
290
  ):
291
  if not text or len(text.strip()) < 50:
292
+ return None, "", "Error: Please provide at least 50 characters of story text.", ""
293
+
294
+ # Unpack character args (80 values = 8 chars x 10 fields)
295
+ names = list(args[0:8])
296
+ descs = list(args[8:16])
297
+ modes = list(args[16:24])
298
+ presets = list(args[24:32])
299
+ audios = list(args[32:40])
300
+ ref_texts = list(args[40:48])
301
+ designs = list(args[48:56])
302
+ instructs = list(args[56:64])
303
+ langs = list(args[64:72])
304
+ speeds = list(args[72:80])
305
 
306
  pipe = get_pipeline()
307
 
 
314
  design_desc=nar_design if nar_mode == "design" else None,
315
  instruct=nar_instruct,
316
  language=nar_lang,
317
+ speed=float(nar_speed) if nar_speed else 1.0,
318
  )
319
 
320
  char_configs = {}
 
350
  temperature=gen_temp,
351
  seed=int(gen_seed),
352
  )
 
 
 
353
 
 
354
  seg_html = "<div style='max-height: 300px; overflow-y: auto;'>"
355
  for s in seg_meta[:50]:
356
  tclass = "narration" if s['type'] == 'narration' else "dialogue"
 
359
  seg_html += f"<div style='text-align:center;color:#94a3b8;padding:0.5rem;'>... and {len(seg_meta)-50} more segments</div>"
360
  seg_html += "</div>"
361
 
 
362
  extra_path = None
363
  if output_fmt == "wav":
364
  extra_path = output_path.replace(".mp3", ".wav")
 
387
  design_desc=design if mode == "design" else None,
388
  instruct=instruct,
389
  language=lang,
390
+ speed=float(speed) if speed else 1.0,
391
  )
392
  try:
393
  wav, sr = pipe.preview_voice(vc)
 
422
  return None, f"Preview failed: {e}"
423
 
424
 
425
+ # ---------------------------------------------------------------------------
426
+ # Quick Generate
427
+ # ---------------------------------------------------------------------------
428
+
429
+ @spaces.GPU(duration=180)
430
+ def quick_generate_gpu(text, narrator_preset, gen_temp, output_fmt, gen_seed=42):
431
+ if not text or len(text.strip()) < 50:
432
+ return None, "Error: Text too short."
433
+
434
+ pipe = get_pipeline()
435
+ nar_cfg = VoiceConfig(name="Narrator", mode="preset", preset=narrator_preset,
436
+ language="English", speed=1.0)
437
+
438
+ def prog_cb(ratio: float, msg: str):
439
+ print(f"[{ratio*100:.0f}%] {msg}")
440
+
441
+ try:
442
+ output_path, seg_paths, seg_meta = pipe.generate(
443
+ text=text,
444
+ narrator_config=nar_cfg,
445
+ character_configs={},
446
+ progress_callback=prog_cb,
447
+ temperature=gen_temp,
448
+ seed=int(gen_seed),
449
+ )
450
+
451
+ extra_path = None
452
+ if output_fmt == "wav":
453
+ extra_path = output_path.replace(".mp3", ".wav")
454
+ from backend import save_audiobook
455
+ save_audiobook(seg_paths, extra_path, fmt="wav")
456
+ elif output_fmt == "zip":
457
+ extra_path = pipe.export_segments_zip(seg_paths)
458
+
459
+ final_path = extra_path if extra_path else output_path
460
+ return final_path, f"Quick audiobook ready! {len(seg_meta)} segments."
461
+ except Exception as e:
462
+ import traceback
463
+ traceback.print_exc()
464
+ return None, f"Error: {str(e)}"
465
+
466
+
467
  # ---------------------------------------------------------------------------
468
  # Project Save/Load
469
  # ---------------------------------------------------------------------------
470
 
471
+ def do_save_project(text, nar_mode, nar_preset, nar_audio, nar_ref_text, nar_design, nar_instruct, nar_lang, nar_speed, *args):
472
+ # Unpack character args (80 values) + gen_temp + gen_seed
473
+ names = list(args[0:8])
474
+ descs = list(args[8:16])
475
+ modes = list(args[16:24])
476
+ presets = list(args[24:32])
477
+ audios = list(args[32:40])
478
+ ref_texts = list(args[40:48])
479
+ designs = list(args[48:56])
480
+ instructs = list(args[56:64])
481
+ langs = list(args[64:72])
482
+ speeds = list(args[72:80])
483
+ gen_temp = args[80] if len(args) > 80 else 0.7
484
+ gen_seed = args[81] if len(args) > 81 else 42
485
+
486
  nar_cfg = VoiceConfig(
487
  name="Narrator", mode=nar_mode, preset=nar_preset if nar_mode == "preset" else None,
488
  ref_audio=nar_audio if nar_mode == "clone" and nar_audio else None,
489
  ref_text=nar_ref_text if nar_mode == "clone" else None,
490
  design_desc=nar_design if nar_mode == "design" else None,
491
+ instruct=nar_instruct, language=nar_lang,
492
+ speed=float(nar_speed) if nar_speed else 1.0,
493
  )
494
  char_configs = {}
495
  for i in range(8):
 
515
  nar = data["narrator"]
516
  chars = data.get("characters", {})
517
 
 
518
  nar_updates = [
519
  gr.update(value=nar.mode),
520
  gr.update(value=nar.preset if nar.preset else "Ryan", visible=nar.mode=="preset"),
 
526
  gr.update(value=nar.speed),
527
  ]
528
 
 
529
  char_updates = []
530
  char_items = list(chars.items())[:8]
531
  for i in range(8):
 
543
  gr.update(value=c.instruct, visible=True),
544
  gr.update(value=c.language, visible=True),
545
  gr.update(value=c.speed, visible=True),
546
+ gr.update(visible=True),
547
+ gr.update(visible=True),
548
  ])
549
  else:
550
  char_updates.extend([
 
559
  gr.update(visible=False),
560
  gr.update(visible=False),
561
  gr.update(visible=False),
562
+ gr.update(visible=False),
563
+ gr.update(visible=False),
564
  ])
565
 
566
  text_sample = data.get("text_sample", "")
567
  return [text_sample] + nar_updates + char_updates + [f"Project loaded! {len(chars)} characters configured."]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
568
  except Exception as e:
569
  import traceback
570
  traceback.print_exc()
571
+ return [""] + [gr.update()] * 8 + [gr.update(visible=False)] * 104 + [f"Error loading project: {e}"]
572
 
573
 
574
  # ---------------------------------------------------------------------------
 
650
  quick_audio = gr.Audio(label="Quick Audiobook", interactive=False)
651
  quick_status = gr.Textbox(show_label=False, interactive=False)
652
  gr.Markdown("---")
653
+ gr.Markdown("**Quick Generate** uses a single narrator voice for the entire text.")
654
 
655
  with gr.Row():
656
  chapter_selector = gr.Dropdown(
 
678
  outputs=[quick_audio, quick_status],
679
  )
680
 
 
681
  def refresh_chapters(text):
682
  if not text:
683
  return gr.update(choices=["All"], value="All")
 
715
 
716
  with gr.Column(scale=2):
717
  gr.Markdown("## Character Voices")
718
+ gr.Markdown("Configure up to 8 characters. Use **preset** for built-in speakers, **clone** to upload a voice sample, or **design** to describe a voice from text.")
719
 
720
  char_names, char_descs, char_modes, char_presets = [], [], [], []
721
  char_audios, char_ref_texts, char_designs, char_instructs, char_langs, char_speeds = [], [], [], [], [], []
722
+ char_rows, char_preview_btns, char_preview_audios, char_preview_statuses = [], [], [], []
723
 
724
  for i in range(8):
725
  visible_default = (i == 0)
 
739
  with gr.Row():
740
  cpv_btn = gr.Button("🔊 Preview", variant="secondary", visible=visible_default)
741
  cpv_audio = gr.Audio(label="Preview", interactive=False, visible=visible_default)
742
+ cpv_status = gr.Textbox(show_label=False, interactive=False, visible=visible_default)
743
 
744
  cm.change(on_mode_change, inputs=cm, outputs=[cp, ca, crt, cdes])
745
  cpv_btn.click(
746
  preview_char_voice_gpu,
747
  inputs=[cn, cm, cp, ca, crt, cdes, cinstr, cl, cspd],
748
+ outputs=[cpv_audio, cpv_status],
749
  )
750
 
751
  char_rows.append(row)
 
761
  char_speeds.append(cspd)
762
  char_preview_btns.append(cpv_btn)
763
  char_preview_audios.append(cpv_audio)
764
+ char_preview_statuses.append(cpv_status)
765
 
766
  # ==================== TAB 3: Generate ====================
767
  with gr.TabItem("⚡ Generate"):
 
847
  gr.update(value=chars[i].get("speed", 1.0), visible=True),
848
  gr.update(visible=True),
849
  gr.update(visible=True),
850
+ gr.update(visible=True),
851
  ])
852
  else:
853
  updates.extend([
 
864
  gr.update(visible=False),
865
  gr.update(visible=False),
866
  gr.update(visible=False),
867
+ gr.update(visible=False),
868
  ])
869
  return [status] + updates
870
 
 
872
  item for sublist in [
873
  [char_rows[i], char_names[i], char_descs[i], char_modes[i], char_presets[i],
874
  char_audios[i], char_ref_texts[i], char_designs[i], char_instructs[i], char_langs[i],
875
+ char_speeds[i], char_preview_btns[i], char_preview_audios[i], char_preview_statuses[i]]
876
  for i in range(8)
877
  ] for item in sublist
878
  ]
 
884
  char_audios + char_ref_texts + char_designs + char_instructs + char_langs + char_speeds
885
  )
886
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
887
  gen_inputs = [
888
  story_input, chapter_selector,
889
  nar_mode, nar_preset, nar_audio, nar_ref_text, nar_design, nar_instruct, nar_lang, nar_speed,
890
  gen_temp, gen_seed, output_fmt,
891
  ] + all_char_inputs
892
 
893
+ def wrapped_generate(story_text, chapter_sel, *args):
894
+ text = get_chapter_text(story_text, chapter_sel)
895
+ return generate_audiobook_gpu(text, *args)
896
+
897
  gen_btn.click(
898
  wrapped_generate,
899
  inputs=gen_inputs,
 
901
  )
902
 
903
  # ---------- Project wiring ----------
904
+ save_inputs = [
905
+ story_input,
906
+ nar_mode, nar_preset, nar_audio, nar_ref_text, nar_design, nar_instruct, nar_lang, nar_speed,
907
+ ] + all_char_inputs + [gen_temp, gen_seed]
908
+ save_btn.click(do_save_project, inputs=save_inputs, outputs=[project_json])
 
 
 
909
 
910
+ load_outputs = [story_input, nar_mode, nar_preset, nar_audio, nar_ref_text, nar_design, nar_instruct, nar_lang, nar_speed] + extract_outputs[1:] + [load_status]
911
+ load_btn.click(do_load_project, inputs=[load_json], outputs=load_outputs)
 
 
 
912
 
913
  return demo
914