ibrahimabdelaal commited on
Commit
b19aabf
·
1 Parent(s): f66f843

Use subprocess with better error handling and timeout

Browse files
Files changed (1) hide show
  1. app.py +80 -65
app.py CHANGED
@@ -4,44 +4,32 @@ import torchaudio
4
  import spaces
5
  import os
6
  import tempfile
 
 
7
  from pathlib import Path
8
  from huggingface_hub import hf_hub_download
9
 
10
- # Import F5-TTS - use the same approach as working Colab
11
- from f5_tts.infer.utils_infer import infer_process, load_model
12
- from f5_tts.model import DiT
13
 
14
- # Global cache for models
15
- model_cache = {}
16
-
17
- def load_f5_model():
18
- """Load F5-TTS model (cached) - exactly like Colab."""
19
- if "model" not in model_cache:
20
- print("Loading F5-TTS model...")
21
-
22
- # Download model files
23
- vocab_file = hf_hub_download(repo_id="IbrahimSalah/Arabic-F5-TTS-v2", filename="vocab.txt")
24
- ckpt_file = hf_hub_download(repo_id="IbrahimSalah/Arabic-F5-TTS-v2", filename="model_547500_8_18.pt")
25
- config_file = hf_hub_download(repo_id="IbrahimSalah/Arabic-F5-TTS-v2", filename="F5TTS_Base_8_18.yaml")
26
-
27
- device = "cuda" if torch.cuda.is_available() else "cpu"
28
-
29
- # Load model - pass config_file as string path (the function will handle it)
30
- model, vocab_char_map, vocab_size = load_model(
31
- model_cls=DiT,
32
- model_cfg=config_file, # Pass path, load_model will load it internally
33
- ckpt_path=ckpt_file,
34
- vocab_file=vocab_file,
35
- device=device
36
  )
37
-
38
- model_cache["model"] = model
39
- model_cache["vocab_char_map"] = vocab_char_map
40
- model_cache["vocab_size"] = vocab_size
41
- model_cache["device"] = device
42
- print("Model loaded successfully!")
43
-
44
- return model_cache["model"], model_cache["vocab_char_map"], model_cache["vocab_size"], model_cache["device"]
 
 
45
 
46
 
47
  @spaces.GPU(duration=120)
@@ -54,12 +42,8 @@ def generate_speech(
54
  speed: float = 1.0,
55
  progress=gr.Progress()
56
  ):
57
- """Generate speech using F5-TTS - same as Colab."""
58
  try:
59
- # Load model
60
- progress(0.1, desc="Loading model...")
61
- model, vocab_char_map, vocab_size, device = load_f5_model()
62
-
63
  # Validate inputs
64
  if not text.strip():
65
  return None, "❌ Please enter text to synthesize."
@@ -70,44 +54,75 @@ def generate_speech(
70
  if not reference_transcript.strip():
71
  return None, "❌ Please enter the reference transcript."
72
 
73
- # Generate audio
74
- progress(0.3, desc="Generating audio...")
 
75
 
76
  # Create temporary output file
77
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
78
  output_path = tmp_file.name
79
 
80
- # Run inference - exactly like Colab
81
- audio, sample_rate, _ = infer_process(
82
- ref_audio=reference_audio,
83
- ref_text=reference_transcript,
84
- gen_text=text,
85
- model_obj=model,
86
- vocoder=None,
87
- mel_spec_type="vocos",
88
- show_info=print,
89
- progress=gr.Progress(),
90
- target_rms=0.1,
91
- cross_fade_duration=0.15,
92
- nfe_step=nfe_step,
93
- cfg_strength=cfg_strength,
94
- sway_sampling_coef=-1.0,
95
- speed=speed,
96
- fix_duration=None,
97
- device=device,
98
- vocab_char_map=vocab_char_map,
 
 
 
 
 
 
 
99
  )
100
 
101
- # Save audio
102
- progress(0.9, desc="Saving audio...")
103
- torchaudio.save(output_path, audio, sample_rate)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
- duration = audio.shape[-1] / sample_rate
106
- status = f"✅ Generated {duration:.2f}s audio"
 
 
 
 
 
107
 
108
  progress(1.0, desc="Complete!")
109
  return output_path, status
110
 
 
 
111
  except Exception as e:
112
  import traceback
113
  error_msg = f"❌ Error: {str(e)}\n{traceback.format_exc()}"
 
4
  import spaces
5
  import os
6
  import tempfile
7
+ import subprocess
8
+ import shlex
9
  from pathlib import Path
10
  from huggingface_hub import hf_hub_download
11
 
12
+ # Global cache for model files
13
+ model_files_cache = {}
 
14
 
15
+ def download_model_files():
16
+ """Download model files once and cache paths."""
17
+ if not model_files_cache:
18
+ print("Downloading model files...")
19
+ model_files_cache["vocab_file"] = hf_hub_download(
20
+ repo_id="IbrahimSalah/Arabic-F5-TTS-v2",
21
+ filename="vocab.txt"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  )
23
+ model_files_cache["ckpt_file"] = hf_hub_download(
24
+ repo_id="IbrahimSalah/Arabic-F5-TTS-v2",
25
+ filename="model_547500_8_18.pt"
26
+ )
27
+ model_files_cache["config_file"] = hf_hub_download(
28
+ repo_id="IbrahimSalah/Arabic-F5-TTS-v2",
29
+ filename="F5TTS_Base_8_18.yaml"
30
+ )
31
+ print("Model files downloaded!")
32
+ return model_files_cache
33
 
34
 
35
  @spaces.GPU(duration=120)
 
42
  speed: float = 1.0,
43
  progress=gr.Progress()
44
  ):
45
+ """Generate speech using F5-TTS CLI - exactly like working Colab."""
46
  try:
 
 
 
 
47
  # Validate inputs
48
  if not text.strip():
49
  return None, "❌ Please enter text to synthesize."
 
54
  if not reference_transcript.strip():
55
  return None, "❌ Please enter the reference transcript."
56
 
57
+ # Download model files
58
+ progress(0.1, desc="Loading model files...")
59
+ files = download_model_files()
60
 
61
  # Create temporary output file
62
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav", mode='w') as tmp_file:
63
  output_path = tmp_file.name
64
 
65
+ # Build CLI command - EXACTLY like working Colab
66
+ progress(0.3, desc="Generating audio...")
67
+
68
+ cmd = [
69
+ "python", "-m", "f5_tts.infer.infer_cli",
70
+ "--model_cfg", files["config_file"],
71
+ "--output_file", output_path,
72
+ "--model", "F5TTS_Base",
73
+ "--ckpt_file", files["ckpt_file"],
74
+ "--vocab_file", files["vocab_file"],
75
+ "--ref_audio", reference_audio,
76
+ "--nfe_step", str(nfe_step),
77
+ "--cfg_strength", str(cfg_strength),
78
+ "--speed", str(speed),
79
+ "--ref_text", reference_transcript,
80
+ "--gen_text", text
81
+ ]
82
+
83
+ print(f"Running command: {' '.join(cmd)}")
84
+
85
+ # Run the CLI command
86
+ result = subprocess.run(
87
+ cmd,
88
+ capture_output=True,
89
+ text=True,
90
+ timeout=300 # 5 minute timeout
91
  )
92
 
93
+ # Print outputs for debugging
94
+ if result.stdout:
95
+ print("STDOUT:", result.stdout)
96
+ if result.stderr:
97
+ print("STDERR:", result.stderr)
98
+
99
+ # Check for errors
100
+ if result.returncode != 0:
101
+ error_msg = f"❌ CLI failed with return code {result.returncode}\n"
102
+ error_msg += f"STDERR: {result.stderr}\n"
103
+ error_msg += f"STDOUT: {result.stdout}"
104
+ return None, error_msg
105
+
106
+ # Check if output file was created
107
+ if not os.path.exists(output_path):
108
+ return None, f"❌ Output file not created. Check logs above."
109
+
110
+ if os.path.getsize(output_path) == 0:
111
+ return None, "❌ Output file is empty."
112
 
113
+ # Get audio duration
114
+ try:
115
+ audio, sample_rate = torchaudio.load(output_path)
116
+ duration = audio.shape[-1] / sample_rate
117
+ status = f"✅ Generated {duration:.2f}s audio"
118
+ except Exception as e:
119
+ status = f"✅ Audio generated (duration unknown: {str(e)})"
120
 
121
  progress(1.0, desc="Complete!")
122
  return output_path, status
123
 
124
+ except subprocess.TimeoutExpired:
125
+ return None, "❌ Generation timed out (>5 minutes)"
126
  except Exception as e:
127
  import traceback
128
  error_msg = f"❌ Error: {str(e)}\n{traceback.format_exc()}"