Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -9,9 +9,10 @@ import subprocess
|
|
| 9 |
from flask import Flask, request, jsonify, render_template_string, send_from_directory
|
| 10 |
import whisper
|
| 11 |
import edge_tts
|
| 12 |
-
|
| 13 |
-
# Nonaktifkan logging Flask default jika ingin benar-benar bersih
|
| 14 |
import logging
|
|
|
|
|
|
|
|
|
|
| 15 |
log = logging.getLogger('werkzeug')
|
| 16 |
log.setLevel(logging.ERROR)
|
| 17 |
|
|
@@ -27,7 +28,7 @@ VOICE_MAP = {
|
|
| 27 |
'ja-JP': 'ja-JP-KeitaNeural'
|
| 28 |
}
|
| 29 |
|
| 30 |
-
# Load Whisper
|
| 31 |
whisper_model = whisper.load_model("base")
|
| 32 |
|
| 33 |
def get_audio_duration(file_path):
|
|
@@ -71,16 +72,15 @@ async def generate_tts(text, voice, path):
|
|
| 71 |
|
| 72 |
def process_dubbing(task_id, video_path, target_voice, custom_prompt):
|
| 73 |
try:
|
| 74 |
-
tasks[task_id]['status'] = '
|
| 75 |
orig_audio = os.path.join(app.config['UPLOAD_FOLDER'], f"{task_id}_orig.wav")
|
| 76 |
-
# Extract audio (Silent)
|
| 77 |
subprocess.run(['ffmpeg', '-loglevel', 'quiet', '-y', '-i', video_path, '-vn', '-acodec', 'pcm_s16le', '-ar', '44100', orig_audio], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
| 78 |
|
| 79 |
-
#
|
| 80 |
result = whisper_model.transcribe(orig_audio, verbose=False)
|
| 81 |
segments = result['segments']
|
| 82 |
|
| 83 |
-
#
|
| 84 |
translated_segments = translate_segments_llm(segments, custom_prompt)
|
| 85 |
|
| 86 |
processed_audio_files = []
|
|
@@ -96,17 +96,19 @@ def process_dubbing(task_id, video_path, target_voice, custom_prompt):
|
|
| 96 |
|
| 97 |
asyncio.run(generate_tts(text, VOICE_MAP.get(target_voice, 'id-ID-ArdiNeural'), raw_tts))
|
| 98 |
|
|
|
|
| 99 |
tts_dur = get_audio_duration(raw_tts)
|
| 100 |
-
speed = min(max(tts_dur / duration_orig, 0.
|
| 101 |
|
| 102 |
-
# Sync (Silent)
|
| 103 |
subprocess.run(['ffmpeg', '-loglevel', 'quiet', '-y', '-i', raw_tts, '-filter:a', f'atempo={speed}', '-ar', '44100', sync_tts], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
| 104 |
processed_audio_files.append({'path': sync_tts, 'start': start_t})
|
| 105 |
|
| 106 |
-
tasks[task_id]['status'] = '
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
|
| 108 |
-
# Filter: Hapus vokal (bandreject), Volume 50%, Mix Dubbing
|
| 109 |
-
filter_complex = "[0:a]bandreject=f=1200:width_type=h:w=1000,volume=0.5[bg];"
|
| 110 |
inputs_cmd = ['ffmpeg', '-loglevel', 'quiet', '-y', '-i', video_path]
|
| 111 |
amix_inputs = "[bg]"
|
| 112 |
|
|
@@ -117,10 +119,16 @@ def process_dubbing(task_id, video_path, target_voice, custom_prompt):
|
|
| 117 |
filter_complex += f"[{idx}:a]adelay={start_ms}|{start_ms}[dub{idx}];"
|
| 118 |
amix_inputs += f"[dub{idx}]"
|
| 119 |
|
|
|
|
| 120 |
filter_complex += f"{amix_inputs}amix=inputs={len(processed_audio_files)+1}:duration=first:dropout_transition=0[outa]"
|
| 121 |
|
| 122 |
output_video = os.path.join(app.config['UPLOAD_FOLDER'], f"{task_id}_output.mp4")
|
| 123 |
-
final_cmd = inputs_cmd + [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
|
| 125 |
subprocess.run(final_cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
| 126 |
|
|
@@ -137,7 +145,7 @@ def process_dubbing(task_id, video_path, target_voice, custom_prompt):
|
|
| 137 |
tasks[task_id]['status'] = 'Error'
|
| 138 |
tasks[task_id]['error_message'] = str(e)
|
| 139 |
|
| 140 |
-
# ---
|
| 141 |
|
| 142 |
@app.route('/')
|
| 143 |
def index():
|
|
@@ -165,28 +173,31 @@ HTML_TEMPLATE = """
|
|
| 165 |
<!DOCTYPE html>
|
| 166 |
<html>
|
| 167 |
<head>
|
| 168 |
-
<title>AI Dubbing
|
| 169 |
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
|
| 170 |
-
<style>body{background:#000;color:#fff;display:flex;align-items:center;justify-content:center;height:100vh;}.card{background:#111;border:1px solid #333;width:
|
| 171 |
</head>
|
| 172 |
<body>
|
| 173 |
-
<div class="card
|
| 174 |
-
<h4>๐๏ธ
|
| 175 |
-
<p class="text-secondary small">
|
| 176 |
<form id="u">
|
| 177 |
-
<input type="file" id="v" class="form-control mb-2 bg-dark text-white" required>
|
| 178 |
-
<select id="s" class="form-select mb-2 bg-dark text-white">
|
| 179 |
-
<option value="id-ID">Indonesia</option>
|
| 180 |
-
<option value="en-US">English</option>
|
| 181 |
-
<option value="ja-JP">Japanese</option>
|
| 182 |
</select>
|
| 183 |
-
<textarea id="p" class="form-control mb-3 bg-dark text-white" placeholder="
|
| 184 |
<button type="submit" id="b" class="btn btn-primary w-100">Mulai Proses</button>
|
| 185 |
</form>
|
| 186 |
-
<div id="l" class="mt-
|
| 187 |
-
|
| 188 |
-
<
|
| 189 |
-
|
|
|
|
|
|
|
|
|
|
| 190 |
</div>
|
| 191 |
</div>
|
| 192 |
<script>
|
|
|
|
| 9 |
from flask import Flask, request, jsonify, render_template_string, send_from_directory
|
| 10 |
import whisper
|
| 11 |
import edge_tts
|
|
|
|
|
|
|
| 12 |
import logging
|
| 13 |
+
|
| 14 |
+
# Matikan log Flask & Werkzeug
|
| 15 |
+
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
| 16 |
log = logging.getLogger('werkzeug')
|
| 17 |
log.setLevel(logging.ERROR)
|
| 18 |
|
|
|
|
| 28 |
'ja-JP': 'ja-JP-KeitaNeural'
|
| 29 |
}
|
| 30 |
|
| 31 |
+
# Load Whisper (Model Base sangat ringan untuk RAM 16GB)
|
| 32 |
whisper_model = whisper.load_model("base")
|
| 33 |
|
| 34 |
def get_audio_duration(file_path):
|
|
|
|
| 72 |
|
| 73 |
def process_dubbing(task_id, video_path, target_voice, custom_prompt):
|
| 74 |
try:
|
| 75 |
+
tasks[task_id]['status'] = 'Mengekstrak Suara...'
|
| 76 |
orig_audio = os.path.join(app.config['UPLOAD_FOLDER'], f"{task_id}_orig.wav")
|
|
|
|
| 77 |
subprocess.run(['ffmpeg', '-loglevel', 'quiet', '-y', '-i', video_path, '-vn', '-acodec', 'pcm_s16le', '-ar', '44100', orig_audio], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
| 78 |
|
| 79 |
+
# Transkripsi Whisper (Silent)
|
| 80 |
result = whisper_model.transcribe(orig_audio, verbose=False)
|
| 81 |
segments = result['segments']
|
| 82 |
|
| 83 |
+
# Translasi AI
|
| 84 |
translated_segments = translate_segments_llm(segments, custom_prompt)
|
| 85 |
|
| 86 |
processed_audio_files = []
|
|
|
|
| 96 |
|
| 97 |
asyncio.run(generate_tts(text, VOICE_MAP.get(target_voice, 'id-ID-ArdiNeural'), raw_tts))
|
| 98 |
|
| 99 |
+
# Sync Speed
|
| 100 |
tts_dur = get_audio_duration(raw_tts)
|
| 101 |
+
speed = min(max(tts_dur / duration_orig, 0.6), 1.9) if duration_orig > 0 else 1.0
|
| 102 |
|
|
|
|
| 103 |
subprocess.run(['ffmpeg', '-loglevel', 'quiet', '-y', '-i', raw_tts, '-filter:a', f'atempo={speed}', '-ar', '44100', sync_tts], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
| 104 |
processed_audio_files.append({'path': sync_tts, 'start': start_t})
|
| 105 |
|
| 106 |
+
tasks[task_id]['status'] = 'Menghapus Vokal & Rendering...'
|
| 107 |
+
|
| 108 |
+
# LOGIKA HAPUS VOKAL (Stereotools Center Removal) + Volume BG 100% (1.0)
|
| 109 |
+
# Filter stereotools mengekstrak side channel (instrumen) dan membuang center channel (vokal).
|
| 110 |
+
filter_complex = "[0:a]stereotools=mlev=0.0:slev=1.0:phasel=1:phaser=1[bg];"
|
| 111 |
|
|
|
|
|
|
|
| 112 |
inputs_cmd = ['ffmpeg', '-loglevel', 'quiet', '-y', '-i', video_path]
|
| 113 |
amix_inputs = "[bg]"
|
| 114 |
|
|
|
|
| 119 |
filter_complex += f"[{idx}:a]adelay={start_ms}|{start_ms}[dub{idx}];"
|
| 120 |
amix_inputs += f"[dub{idx}]"
|
| 121 |
|
| 122 |
+
# Mixing: BG tetap volume 1.0 (aslinya), Dubbing volume 1.0
|
| 123 |
filter_complex += f"{amix_inputs}amix=inputs={len(processed_audio_files)+1}:duration=first:dropout_transition=0[outa]"
|
| 124 |
|
| 125 |
output_video = os.path.join(app.config['UPLOAD_FOLDER'], f"{task_id}_output.mp4")
|
| 126 |
+
final_cmd = inputs_cmd + [
|
| 127 |
+
'-filter_complex', filter_complex,
|
| 128 |
+
'-map', '0:v', '-map', '[outa]',
|
| 129 |
+
'-c:v', 'libx264', '-preset', 'ultrafast', '-crf', '23', '-c:a', 'aac', '-b:a', '192k',
|
| 130 |
+
output_video
|
| 131 |
+
]
|
| 132 |
|
| 133 |
subprocess.run(final_cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
| 134 |
|
|
|
|
| 145 |
tasks[task_id]['status'] = 'Error'
|
| 146 |
tasks[task_id]['error_message'] = str(e)
|
| 147 |
|
| 148 |
+
# --- UI DASAR ---
|
| 149 |
|
| 150 |
@app.route('/')
|
| 151 |
def index():
|
|
|
|
| 173 |
<!DOCTYPE html>
|
| 174 |
<html>
|
| 175 |
<head>
|
| 176 |
+
<title>AI Dubbing No Vocal</title>
|
| 177 |
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
|
| 178 |
+
<style>body{background:#000;color:#fff;display:flex;align-items:center;justify-content:center;height:100vh;}.card{background:#111;border:1px solid #333;width:420px;padding:25px;border-radius:20px;}</style>
|
| 179 |
</head>
|
| 180 |
<body>
|
| 181 |
+
<div class="card shadow-lg">
|
| 182 |
+
<h4 class="text-center mb-3">๐๏ธ Dubbing Sync Pro</h4>
|
| 183 |
+
<p class="text-center text-secondary small mb-4">Background Asli 100% | Vokal Dihapus</p>
|
| 184 |
<form id="u">
|
| 185 |
+
<input type="file" id="v" class="form-control mb-2 bg-dark text-white border-secondary" required>
|
| 186 |
+
<select id="s" class="form-select mb-2 bg-dark text-white border-secondary">
|
| 187 |
+
<option value="id-ID">Indonesia ๐ฎ๐ฉ</option>
|
| 188 |
+
<option value="en-US">English ๐บ๐ธ</option>
|
| 189 |
+
<option value="ja-JP">Japanese ๐ฏ๐ต</option>
|
| 190 |
</select>
|
| 191 |
+
<textarea id="p" class="form-control mb-3 bg-dark text-white border-secondary" placeholder="Prompt (Opsional)..."></textarea>
|
| 192 |
<button type="submit" id="b" class="btn btn-primary w-100">Mulai Proses</button>
|
| 193 |
</form>
|
| 194 |
+
<div id="l" class="mt-4 d-none text-center">
|
| 195 |
+
<div class="spinner-border text-primary spinner-border-sm"></div>
|
| 196 |
+
<span id="st" class="ms-2 small">Menyiapkan...</span>
|
| 197 |
+
</div>
|
| 198 |
+
<div id="r" class="mt-4 d-none text-center">
|
| 199 |
+
<video id="vd" controls class="w-100 rounded border border-secondary mb-3"></video>
|
| 200 |
+
<a id="dl" href="#" class="btn btn-success w-100 btn-sm" download>Download Hasil</a>
|
| 201 |
</div>
|
| 202 |
</div>
|
| 203 |
<script>
|