Spaces:
Runtime error
Runtime error
video quality
Browse files- app.py +6 -1
- audio_processing.py +14 -3
- lipsync.py +19 -72
- lipsync_processing.py +6 -13
- processing.py +5 -2
app.py
CHANGED
|
@@ -93,6 +93,11 @@ with gr.Blocks(css=css) as demo:
|
|
| 93 |
value="LatentSync v1.6",
|
| 94 |
label="Model",
|
| 95 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
lipsync_only_btn = gr.Button("👄 Lipsync", variant="primary", size="lg")
|
| 97 |
|
| 98 |
with gr.Row():
|
|
@@ -115,7 +120,7 @@ with gr.Blocks(css=css) as demo:
|
|
| 115 |
|
| 116 |
lipsync_only_btn.click(
|
| 117 |
fn=lipsync_with_audio_target,
|
| 118 |
-
inputs=[video_input, audio_input, session_state, model_type],
|
| 119 |
outputs=[
|
| 120 |
final_video,
|
| 121 |
video_normalized_output,
|
|
|
|
| 93 |
value="LatentSync v1.6",
|
| 94 |
label="Model",
|
| 95 |
)
|
| 96 |
+
quality_level = gr.Radio(
|
| 97 |
+
choices=["Fast", "Normal", "Medium", "Best", "Super Best"],
|
| 98 |
+
value="Normal",
|
| 99 |
+
label="Quality",
|
| 100 |
+
)
|
| 101 |
lipsync_only_btn = gr.Button("👄 Lipsync", variant="primary", size="lg")
|
| 102 |
|
| 103 |
with gr.Row():
|
|
|
|
| 120 |
|
| 121 |
lipsync_only_btn.click(
|
| 122 |
fn=lipsync_with_audio_target,
|
| 123 |
+
inputs=[video_input, audio_input, session_state, model_type, quality_level],
|
| 124 |
outputs=[
|
| 125 |
final_video,
|
| 126 |
video_normalized_output,
|
audio_processing.py
CHANGED
|
@@ -5,14 +5,18 @@ import subprocess
|
|
| 5 |
from ffmpy import FFmpeg, FFRuntimeError
|
| 6 |
|
| 7 |
|
| 8 |
-
def get_audio_duration(audio_path: str) -> float:
|
| 9 |
-
"""Get audio file duration
|
| 10 |
|
| 11 |
Args:
|
| 12 |
audio_path: Path to audio file
|
|
|
|
| 13 |
|
| 14 |
Returns:
|
| 15 |
Duration in seconds
|
|
|
|
|
|
|
|
|
|
| 16 |
"""
|
| 17 |
cmd = [
|
| 18 |
"ffprobe",
|
|
@@ -25,7 +29,14 @@ def get_audio_duration(audio_path: str) -> float:
|
|
| 25 |
audio_path,
|
| 26 |
]
|
| 27 |
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
|
| 31 |
# def prepare_target_audio(audio_path: str, output_dir: str) -> tuple:
|
|
|
|
| 5 |
from ffmpy import FFmpeg, FFRuntimeError
|
| 6 |
|
| 7 |
|
| 8 |
+
def get_audio_duration(audio_path: str, max_duration: float = 30.0) -> float:
|
| 9 |
+
"""Get audio file duration, raise error if exceeds max_duration
|
| 10 |
|
| 11 |
Args:
|
| 12 |
audio_path: Path to audio file
|
| 13 |
+
max_duration: Maximum duration in seconds (default 30)
|
| 14 |
|
| 15 |
Returns:
|
| 16 |
Duration in seconds
|
| 17 |
+
|
| 18 |
+
Raises:
|
| 19 |
+
ValueError: If audio duration exceeds max_duration
|
| 20 |
"""
|
| 21 |
cmd = [
|
| 22 |
"ffprobe",
|
|
|
|
| 29 |
audio_path,
|
| 30 |
]
|
| 31 |
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
| 32 |
+
duration = float(result.stdout.strip())
|
| 33 |
+
|
| 34 |
+
if duration > max_duration:
|
| 35 |
+
raise ValueError(
|
| 36 |
+
f"Audio duration {duration:.2f}s exceeds maximum {max_duration}s"
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
return duration
|
| 40 |
|
| 41 |
|
| 42 |
# def prepare_target_audio(audio_path: str, output_dir: str) -> tuple:
|
lipsync.py
CHANGED
|
@@ -18,59 +18,29 @@ torch.backends.cudnn.deterministic = False
|
|
| 18 |
os.makedirs("checkpoints", exist_ok=True)
|
| 19 |
|
| 20 |
|
| 21 |
-
def
|
| 22 |
-
"""Get
|
| 23 |
-
if not torch.cuda.is_available():
|
| 24 |
-
return "CUDA not available"
|
| 25 |
-
|
| 26 |
-
device = torch.cuda.current_device()
|
| 27 |
-
total = torch.cuda.get_device_properties(device).total_memory / 1024**3
|
| 28 |
-
allocated = torch.cuda.memory_allocated(device) / 1024**3
|
| 29 |
-
reserved = torch.cuda.memory_reserved(device) / 1024**3
|
| 30 |
-
free = total - reserved
|
| 31 |
-
|
| 32 |
-
return f"Total: {total:.2f}GB | Allocated: {allocated:.2f}GB | Reserved: {reserved:.2f}GB | Free: {free:.2f}GB"
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
def get_available_vram():
|
| 36 |
-
"""Get available VRAM in GB"""
|
| 37 |
-
if not torch.cuda.is_available():
|
| 38 |
-
return 0.0
|
| 39 |
-
|
| 40 |
-
device = torch.cuda.current_device()
|
| 41 |
-
total = torch.cuda.get_device_properties(device).total_memory / 1024**3
|
| 42 |
-
reserved = torch.cuda.memory_reserved(device) / 1024**3
|
| 43 |
-
free = total - reserved
|
| 44 |
-
|
| 45 |
-
return free
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
def get_optimal_params(available_vram_gb: float) -> tuple:
|
| 49 |
-
"""Get optimal lipsync parameters based on total VRAM
|
| 50 |
|
| 51 |
Args:
|
| 52 |
-
|
| 53 |
|
| 54 |
Returns:
|
| 55 |
-
tuple of (num_frames, num_inference_steps)
|
| 56 |
"""
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
return 20, 40, 2.0
|
| 66 |
-
elif available_vram_gb >= 60.0:
|
| 67 |
-
return 24, 50, 2.5
|
| 68 |
-
else:
|
| 69 |
-
return 16, 15, 1.5
|
| 70 |
|
| 71 |
|
| 72 |
@spaces.GPU
|
| 73 |
-
def apply_lipsync(
|
|
|
|
|
|
|
| 74 |
print(f"\n{'=' * 60}")
|
| 75 |
print(f"LIPSYNC START")
|
| 76 |
print(f"Input video: {video_input_path}")
|
|
@@ -79,8 +49,6 @@ def apply_lipsync(video_input_path, audio_path, video_out_path, crop_size=256):
|
|
| 79 |
print(f"Crop size: {crop_size}x{crop_size}")
|
| 80 |
print(f"{'=' * 60}\n")
|
| 81 |
|
| 82 |
-
print(f"GPU Memory Before: {get_gpu_memory_info()}")
|
| 83 |
-
|
| 84 |
manager = ModelManager.get_instance()
|
| 85 |
|
| 86 |
config = manager.get_latentsync_config()
|
|
@@ -104,34 +72,18 @@ def apply_lipsync(video_input_path, audio_path, video_out_path, crop_size=256):
|
|
| 104 |
if not torch.cuda.is_available():
|
| 105 |
raise RuntimeError("CUDA not available - GPU required for lipsync")
|
| 106 |
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
available_vram = get_available_vram()
|
| 111 |
-
print(f"Available VRAM before processing: {available_vram:.2f} GB")
|
| 112 |
-
|
| 113 |
-
torch.cuda.empty_cache()
|
| 114 |
-
available_vram_after_clear = get_available_vram()
|
| 115 |
-
print(f"Available VRAM after cache clear: {available_vram_after_clear:.2f} GB")
|
| 116 |
-
|
| 117 |
-
print(
|
| 118 |
-
f"\nCalling get_optimal_params with input: {total_memory / 1024**3:.2f} GB"
|
| 119 |
-
)
|
| 120 |
-
num_frames, num_inference_steps, guidance_scale = get_optimal_params(
|
| 121 |
-
total_memory / 1024**3
|
| 122 |
-
)
|
| 123 |
-
print(
|
| 124 |
-
f"get_optimal_params output: num_frames={num_frames}, num_inference_steps={num_inference_steps}"
|
| 125 |
)
|
| 126 |
|
| 127 |
-
print(f"\
|
|
|
|
| 128 |
print(f" num_frames: {num_frames}")
|
| 129 |
print(f" num_inference_steps: {num_inference_steps}")
|
| 130 |
print(f" guidance_scale: {guidance_scale}")
|
| 131 |
print(f" resolution: {config.data.resolution}")
|
| 132 |
|
| 133 |
print(f"Initial seed: {torch.initial_seed()}")
|
| 134 |
-
print(f"GPU Memory After model load: {get_gpu_memory_info()}")
|
| 135 |
|
| 136 |
print("\nStarting pipeline inference...")
|
| 137 |
print(
|
|
@@ -154,16 +106,13 @@ def apply_lipsync(video_input_path, audio_path, video_out_path, crop_size=256):
|
|
| 154 |
height=crop_size,
|
| 155 |
)
|
| 156 |
print("Pipeline completed successfully")
|
| 157 |
-
print(f"GPU Memory After pipeline: {get_gpu_memory_info()}")
|
| 158 |
|
| 159 |
except RuntimeError as e:
|
| 160 |
error_msg = str(e).lower()
|
| 161 |
print(f"RuntimeError in pipeline: {e}")
|
| 162 |
if "out of memory" in error_msg or "cuda out of memory" in error_msg:
|
| 163 |
print("GPU OOM DETECTED!")
|
| 164 |
-
print(f"GPU Memory at crash: {get_gpu_memory_info()}")
|
| 165 |
torch.cuda.empty_cache()
|
| 166 |
-
print(f"GPU Memory after OOM cleanup: {get_gpu_memory_info()}")
|
| 167 |
raise RuntimeError(
|
| 168 |
"GPU out of memory during lipsync. Try: 1) Shorter video 2) Lower resolution 3) Close other GPU apps"
|
| 169 |
)
|
|
@@ -172,13 +121,11 @@ def apply_lipsync(video_input_path, audio_path, video_out_path, crop_size=256):
|
|
| 172 |
print(f"Unexpected error in pipeline: {e}")
|
| 173 |
print(f"Error type: {type(e).__name__}")
|
| 174 |
traceback.print_exc()
|
| 175 |
-
print(f"GPU Memory at error: {get_gpu_memory_info()}")
|
| 176 |
raise
|
| 177 |
finally:
|
| 178 |
print("Clearing GPU cache...")
|
| 179 |
torch.cuda.empty_cache()
|
| 180 |
gc.collect()
|
| 181 |
-
print(f"GPU Memory After cleanup: {get_gpu_memory_info()}")
|
| 182 |
|
| 183 |
print(f"\n{'=' * 60}")
|
| 184 |
print(f"LIPSYNC SUCCESS - Output: {video_out_path}")
|
|
|
|
| 18 |
os.makedirs("checkpoints", exist_ok=True)
|
| 19 |
|
| 20 |
|
| 21 |
+
def get_quality_params(level: str) -> tuple:
|
| 22 |
+
"""Get lipsync parameters based on quality level
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
Args:
|
| 25 |
+
level: Quality level (Fast, Normal, Medium, Best, Super Best)
|
| 26 |
|
| 27 |
Returns:
|
| 28 |
+
tuple of (num_frames, num_inference_steps, guidance_scale)
|
| 29 |
"""
|
| 30 |
+
params = {
|
| 31 |
+
"Fast": (12, 15, 1.0),
|
| 32 |
+
"Normal": (12, 20, 1.0),
|
| 33 |
+
"Medium": (16, 30, 1.5),
|
| 34 |
+
"Best": (20, 40, 2.0),
|
| 35 |
+
"Super Best": (24, 50, 2.5),
|
| 36 |
+
}
|
| 37 |
+
return params.get(level, (12, 20, 1.0))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
|
| 40 |
@spaces.GPU
|
| 41 |
+
def apply_lipsync(
|
| 42 |
+
video_input_path, audio_path, video_out_path, crop_size=256, quality_level="Normal"
|
| 43 |
+
):
|
| 44 |
print(f"\n{'=' * 60}")
|
| 45 |
print(f"LIPSYNC START")
|
| 46 |
print(f"Input video: {video_input_path}")
|
|
|
|
| 49 |
print(f"Crop size: {crop_size}x{crop_size}")
|
| 50 |
print(f"{'=' * 60}\n")
|
| 51 |
|
|
|
|
|
|
|
| 52 |
manager = ModelManager.get_instance()
|
| 53 |
|
| 54 |
config = manager.get_latentsync_config()
|
|
|
|
| 72 |
if not torch.cuda.is_available():
|
| 73 |
raise RuntimeError("CUDA not available - GPU required for lipsync")
|
| 74 |
|
| 75 |
+
num_frames, num_inference_steps, guidance_scale = get_quality_params(
|
| 76 |
+
quality_level
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
)
|
| 78 |
|
| 79 |
+
print(f"\nQuality level: {quality_level}")
|
| 80 |
+
print(f"Parameters:")
|
| 81 |
print(f" num_frames: {num_frames}")
|
| 82 |
print(f" num_inference_steps: {num_inference_steps}")
|
| 83 |
print(f" guidance_scale: {guidance_scale}")
|
| 84 |
print(f" resolution: {config.data.resolution}")
|
| 85 |
|
| 86 |
print(f"Initial seed: {torch.initial_seed()}")
|
|
|
|
| 87 |
|
| 88 |
print("\nStarting pipeline inference...")
|
| 89 |
print(
|
|
|
|
| 106 |
height=crop_size,
|
| 107 |
)
|
| 108 |
print("Pipeline completed successfully")
|
|
|
|
| 109 |
|
| 110 |
except RuntimeError as e:
|
| 111 |
error_msg = str(e).lower()
|
| 112 |
print(f"RuntimeError in pipeline: {e}")
|
| 113 |
if "out of memory" in error_msg or "cuda out of memory" in error_msg:
|
| 114 |
print("GPU OOM DETECTED!")
|
|
|
|
| 115 |
torch.cuda.empty_cache()
|
|
|
|
| 116 |
raise RuntimeError(
|
| 117 |
"GPU out of memory during lipsync. Try: 1) Shorter video 2) Lower resolution 3) Close other GPU apps"
|
| 118 |
)
|
|
|
|
| 121 |
print(f"Unexpected error in pipeline: {e}")
|
| 122 |
print(f"Error type: {type(e).__name__}")
|
| 123 |
traceback.print_exc()
|
|
|
|
| 124 |
raise
|
| 125 |
finally:
|
| 126 |
print("Clearing GPU cache...")
|
| 127 |
torch.cuda.empty_cache()
|
| 128 |
gc.collect()
|
|
|
|
| 129 |
|
| 130 |
print(f"\n{'=' * 60}")
|
| 131 |
print(f"LIPSYNC SUCCESS - Output: {video_out_path}")
|
lipsync_processing.py
CHANGED
|
@@ -49,6 +49,7 @@ def apply_lipsync_to_video(
|
|
| 49 |
audio_16k_path: str,
|
| 50 |
output_dir: str,
|
| 51 |
model_type: str = "LatentSync v1.6",
|
|
|
|
| 52 |
) -> tuple:
|
| 53 |
"""Apply lipsync to video using clean 16k audio
|
| 54 |
|
|
@@ -57,6 +58,7 @@ def apply_lipsync_to_video(
|
|
| 57 |
audio_16k_path: Path to 16kHz audio
|
| 58 |
output_dir: Directory to save output
|
| 59 |
model_type: Model type for lipsync ("LatentSync v1.6" or "MuseTalk v1.5")
|
|
|
|
| 60 |
|
| 61 |
Returns:
|
| 62 |
Tuple of (lipsynced_video_path, video_info)
|
|
@@ -67,9 +69,11 @@ def apply_lipsync_to_video(
|
|
| 67 |
if model_type == "LatentSync v1.6":
|
| 68 |
crop_size = 512
|
| 69 |
print(
|
| 70 |
-
f"Using LatentSync: video={video_path}, audio={audio_16k_path}, crop_size={crop_size}"
|
|
|
|
|
|
|
|
|
|
| 71 |
)
|
| 72 |
-
apply_lipsync(video_path, audio_16k_path, lipsynced_video, crop_size)
|
| 73 |
|
| 74 |
elif model_type == "MuseTalk v1.5":
|
| 75 |
from musetalk import apply_musetalk_lipsync
|
|
@@ -101,14 +105,3 @@ def apply_lipsync_to_video(
|
|
| 101 |
print(f"Runtime Error in lipsync processing: {e}")
|
| 102 |
traceback.print_exc()
|
| 103 |
raise
|
| 104 |
-
except Exception:
|
| 105 |
-
raise
|
| 106 |
-
except Exception as e:
|
| 107 |
-
print(f"Error in apply_lipsync_to_video: {e}")
|
| 108 |
-
traceback.print_exc()
|
| 109 |
-
raise
|
| 110 |
-
|
| 111 |
-
except Exception as e:
|
| 112 |
-
print(f"Error in apply_lipsync_to_video: {e}")
|
| 113 |
-
traceback.print_exc()
|
| 114 |
-
raise
|
|
|
|
| 49 |
audio_16k_path: str,
|
| 50 |
output_dir: str,
|
| 51 |
model_type: str = "LatentSync v1.6",
|
| 52 |
+
quality_level: str = "Normal",
|
| 53 |
) -> tuple:
|
| 54 |
"""Apply lipsync to video using clean 16k audio
|
| 55 |
|
|
|
|
| 58 |
audio_16k_path: Path to 16kHz audio
|
| 59 |
output_dir: Directory to save output
|
| 60 |
model_type: Model type for lipsync ("LatentSync v1.6" or "MuseTalk v1.5")
|
| 61 |
+
quality_level: Quality level ("Fast", "Normal", "Medium", "Best", "Super Best")
|
| 62 |
|
| 63 |
Returns:
|
| 64 |
Tuple of (lipsynced_video_path, video_info)
|
|
|
|
| 69 |
if model_type == "LatentSync v1.6":
|
| 70 |
crop_size = 512
|
| 71 |
print(
|
| 72 |
+
f"Using LatentSync: video={video_path}, audio={audio_16k_path}, crop_size={crop_size}, quality={quality_level}"
|
| 73 |
+
)
|
| 74 |
+
apply_lipsync(
|
| 75 |
+
video_path, audio_16k_path, lipsynced_video, crop_size, quality_level
|
| 76 |
)
|
|
|
|
| 77 |
|
| 78 |
elif model_type == "MuseTalk v1.5":
|
| 79 |
from musetalk import apply_musetalk_lipsync
|
|
|
|
| 105 |
print(f"Runtime Error in lipsync processing: {e}")
|
| 106 |
traceback.print_exc()
|
| 107 |
raise
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
processing.py
CHANGED
|
@@ -321,6 +321,7 @@ def process_lipsync_with_audio_target_new(
|
|
| 321 |
audio_file,
|
| 322 |
session_id=None,
|
| 323 |
model_type="latentsync",
|
|
|
|
| 324 |
progress=gr.Progress(track_tqdm=True),
|
| 325 |
):
|
| 326 |
"""Workflow mới: Chuẩn hóa YouTube rồi lipsync
|
|
@@ -338,6 +339,7 @@ def process_lipsync_with_audio_target_new(
|
|
| 338 |
audio_file: Path to audio target (English only)
|
| 339 |
session_id: Session identifier
|
| 340 |
model_type: Model type for lipsync ("latentsync" or "musetalk")
|
|
|
|
| 341 |
progress: Progress tracking object
|
| 342 |
|
| 343 |
Returns:
|
|
@@ -427,7 +429,7 @@ def process_lipsync_with_audio_target_new(
|
|
| 427 |
with timer("Applying lipsync"):
|
| 428 |
try:
|
| 429 |
lipsynced_video, lipsynced_info = apply_lipsync_to_video(
|
| 430 |
-
video_normalized, audio_16k, output_dir, model_type
|
| 431 |
)
|
| 432 |
logger.info(
|
| 433 |
f"Lipsynced video: {lipsynced_video}, size: {lipsynced_info['width']}x{lipsynced_info['height']}"
|
|
@@ -471,6 +473,7 @@ def lipsync_with_audio_target(
|
|
| 471 |
audio_file,
|
| 472 |
session_id=None,
|
| 473 |
model_type="LatentSync v1.6",
|
|
|
|
| 474 |
progress=gr.Progress(track_tqdm=True),
|
| 475 |
):
|
| 476 |
"""Wrapper for Gradio: Lipsync video source with audio target (English only)
|
|
@@ -483,5 +486,5 @@ def lipsync_with_audio_target(
|
|
| 483 |
if audio_file is None:
|
| 484 |
raise gr.Error("Please upload a target audio.")
|
| 485 |
return process_lipsync_with_audio_target_new(
|
| 486 |
-
video_file, audio_file, session_id, model_type, progress
|
| 487 |
)
|
|
|
|
| 321 |
audio_file,
|
| 322 |
session_id=None,
|
| 323 |
model_type="latentsync",
|
| 324 |
+
quality_level="Normal",
|
| 325 |
progress=gr.Progress(track_tqdm=True),
|
| 326 |
):
|
| 327 |
"""Workflow mới: Chuẩn hóa YouTube rồi lipsync
|
|
|
|
| 339 |
audio_file: Path to audio target (English only)
|
| 340 |
session_id: Session identifier
|
| 341 |
model_type: Model type for lipsync ("latentsync" or "musetalk")
|
| 342 |
+
quality_level: Quality level ("Fast", "Normal", "Medium", "Best", "Super Best")
|
| 343 |
progress: Progress tracking object
|
| 344 |
|
| 345 |
Returns:
|
|
|
|
| 429 |
with timer("Applying lipsync"):
|
| 430 |
try:
|
| 431 |
lipsynced_video, lipsynced_info = apply_lipsync_to_video(
|
| 432 |
+
video_normalized, audio_16k, output_dir, model_type, quality_level
|
| 433 |
)
|
| 434 |
logger.info(
|
| 435 |
f"Lipsynced video: {lipsynced_video}, size: {lipsynced_info['width']}x{lipsynced_info['height']}"
|
|
|
|
| 473 |
audio_file,
|
| 474 |
session_id=None,
|
| 475 |
model_type="LatentSync v1.6",
|
| 476 |
+
quality_level="Normal",
|
| 477 |
progress=gr.Progress(track_tqdm=True),
|
| 478 |
):
|
| 479 |
"""Wrapper for Gradio: Lipsync video source with audio target (English only)
|
|
|
|
| 486 |
if audio_file is None:
|
| 487 |
raise gr.Error("Please upload a target audio.")
|
| 488 |
return process_lipsync_with_audio_target_new(
|
| 489 |
+
video_file, audio_file, session_id, model_type, quality_level, progress
|
| 490 |
)
|