rafibra93 commited on
Commit
53b5571
·
verified ·
1 Parent(s): 7bb1380

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -9
app.py CHANGED
@@ -40,7 +40,7 @@ def tts_bark(text, preset_choice, custom_preset, seed, export_mp3, word_mode):
40
  history_prompt = None
41
  if custom_preset and custom_preset.strip():
42
  history_prompt = custom_preset.strip()
43
- elif preset_choice and preset_choice != "v2/de_speaker_9":
44
  history_prompt = preset_choice
45
 
46
  # Reproduzierbarkeit (optional)
@@ -82,23 +82,44 @@ def tts_bark(text, preset_choice, custom_preset, seed, export_mp3, word_mode):
82
  out_path = wav_path
83
 
84
  # Bei Wortmodus: bestes Segment auto‑trimmen
 
85
  if do_trim:
86
  try:
87
  audio = AudioSegment.from_wav(wav_path)
 
 
88
  chunks = silence.split_on_silence(
89
  audio,
90
- min_silence_len=150,
91
- silence_thresh=audio.dBFS - 16,
92
- keep_silence=30
93
  )
 
 
94
  if chunks:
95
- best = max(chunks, key=lambda c: len(c))
96
- trimmed = best.normalize(headroom=1.0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  trimmed_path = wav_path.replace(".wav", "_word.wav")
98
- trimmed.export(trimmed_path, format="wav")
99
  out_path = trimmed_path
 
 
 
100
  except Exception as e:
101
- # Trim‑Fehler ist nicht kritisch
102
  print(f"[Trim] Hinweis: {e}")
103
 
104
  # Optional MP3 exportieren
@@ -144,7 +165,7 @@ with gr.Blocks() as demo:
144
  )
145
  text = gr.Textbox(label="Text (z. B. Igbo / Russisch / Englisch ...)", placeholder="Ndeewo! Kedu ka i mere?", lines=3)
146
  with gr.Row():
147
- preset_choice = gr.Dropdown(COMMON_PRESETS, value="Auto (kein Preset)", label="Voice Preset (optional)")
148
  custom_preset = gr.Textbox(label="Eigenes Preset (optional)", placeholder="z. B. v2/en_speaker_0")
149
  with gr.Row():
150
  seed = gr.Number(value=42, precision=0, label="Seed (optional)")
 
40
  history_prompt = None
41
  if custom_preset and custom_preset.strip():
42
  history_prompt = custom_preset.strip()
43
+ elif preset_choice and preset_choice != "Auto (kein Preset)":
44
  history_prompt = preset_choice
45
 
46
  # Reproduzierbarkeit (optional)
 
82
  out_path = wav_path
83
 
84
  # Bei Wortmodus: bestes Segment auto‑trimmen
85
+ # Bei Wortmodus: bestes Segment auto‑trimmen (robust)
86
  if do_trim:
87
  try:
88
  audio = AudioSegment.from_wav(wav_path)
89
+
90
+ # 1) Erst normal versuchen, etwas großzügiger
91
  chunks = silence.split_on_silence(
92
  audio,
93
+ min_silence_len=120, # etwas kürzer
94
+ silence_thresh=audio.dBFS - 18, # toleranter
95
+ keep_silence=20
96
  )
97
+
98
+ best_seg = None
99
  if chunks:
100
+ best_seg = max(chunks, key=lambda c: len(c))
101
+ else:
102
+ # 2) Fallback: nicht‑stille Abschnitte selbst detektieren
103
+ spans = silence.detect_nonsilent(
104
+ audio,
105
+ min_silence_len=120,
106
+ silence_thresh=audio.dBFS - 18
107
+ )
108
+ if spans:
109
+ # längsten nicht‑stillen Abschnitt wählen
110
+ start, end = max(spans, key=lambda s: s[1]-s[0])
111
+ best_seg = audio[start:end]
112
+
113
+ if best_seg:
114
+ # leichte Nachbearbeitung: normalisieren + winzige Ränder
115
+ best_seg = best_seg.normalize(headroom=1.0)
116
  trimmed_path = wav_path.replace(".wav", "_word.wav")
117
+ best_seg.export(trimmed_path, format="wav")
118
  out_path = trimmed_path
119
+ else:
120
+ print("[Trim] Kein Segment gefunden – liefere Original-WAV zurück.")
121
+
122
  except Exception as e:
 
123
  print(f"[Trim] Hinweis: {e}")
124
 
125
  # Optional MP3 exportieren
 
165
  )
166
  text = gr.Textbox(label="Text (z. B. Igbo / Russisch / Englisch ...)", placeholder="Ndeewo! Kedu ka i mere?", lines=3)
167
  with gr.Row():
168
+ preset_choice = gr.Dropdown(COMMON_PRESETS, value="v2/de_speaker_9", label="v2/de_speaker_9")
169
  custom_preset = gr.Textbox(label="Eigenes Preset (optional)", placeholder="z. B. v2/en_speaker_0")
170
  with gr.Row():
171
  seed = gr.Number(value=42, precision=0, label="Seed (optional)")