Spaces:
Paused
Paused
File size: 15,734 Bytes
14b0ed7 38dbb96 3d3f60b 355e25c 14b0ed7 715617d 59ecd42 715617d 3d3f60b d0df8c3 14b0ed7 3d3f60b 14b0ed7 355e25c 14b0ed7 355e25c 85efeac 355e25c 14b0ed7 85efeac 14b0ed7 85f08bf 85efeac 85f08bf 85efeac 355e25c 85efeac 355e25c 85efeac 355e25c 911b297 d0df8c3 14b0ed7 85efeac 38dbb96 355e25c 85efeac d0df8c3 911b297 5d5777e 85efeac 911b297 14b0ed7 3d3f60b 38dbb96 85efeac 5d5777e 3d3f60b 911b297 355e25c 85efeac 355e25c 14b0ed7 85efeac 38dbb96 355e25c 5d5777e 911b297 85efeac 14b0ed7 85efeac 355e25c 85efeac 355e25c 38dbb96 5d5777e 355e25c 5d5777e 38dbb96 3d3f60b 38dbb96 d0df8c3 38dbb96 59ecd42 3d3f60b 38dbb96 85efeac d0df8c3 38dbb96 d0df8c3 38dbb96 d0df8c3 59ecd42 38dbb96 5d5777e 59ecd42 38dbb96 715617d 3d3f60b 715617d 38dbb96 d0df8c3 5d5777e 59ecd42 3d3f60b 5d5777e 14b0ed7 3d3f60b 5d5777e 14b0ed7 355e25c 3d3f60b 5d5777e 911b297 3d3f60b 5d5777e 85efeac 5d5777e 59ecd42 14b0ed7 59ecd42 14b0ed7 59ecd42 14b0ed7 59ecd42 85efeac 355e25c 59ecd42 85efeac 59ecd42 85efeac 59ecd42 38dbb96 59ecd42 715617d 59ecd42 14b0ed7 59ecd42 14b0ed7 59ecd42 14b0ed7 59ecd42 85efeac 59ecd42 14b0ed7 59ecd42 911b297 14b0ed7 1ec94e9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 | import os
import uuid
import threading
import asyncio
import requests
import json
import time
import subprocess
import logging
import numpy as np
from flask import Flask, request, jsonify, render_template_string, send_from_directory
import whisper
import edge_tts
# --- KONFIGURASI SILENT LOGS ---
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
logging.getLogger('werkzeug').setLevel(logging.ERROR)
app = Flask(__name__)
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
UPLOAD_FOLDER = os.path.join(BASE_DIR, 'uploads')
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
tasks = {}
# --- MAP SUARA (MALE & FEMALE) ---
VOICE_MAP = {
'id-ID': {'Male': 'id-ID-ArdiNeural', 'Female': 'id-ID-GadisNeural'},
'en-US': {'Male': 'en-US-ChristopherNeural', 'Female': 'en-US-AriaNeural'},
'ja-JP': {'Male': 'ja-JP-KeitaNeural', 'Female': 'ja-JP-NanamiNeural'}
}
# Mapping Bahasa untuk Prompt AI
LANG_MAP = {
'id-ID': 'Indonesia',
'en-US': 'Inggris',
'ja-JP': 'Jepang'
}
# Load Whisper (CPU Friendly, FP16 Fixed)
whisper_model = whisper.load_model("base")
def get_audio_duration(file_path):
cmd = [
'ffprobe', '-v', 'error', '-show_entries', 'format=duration',
'-of', 'default=noprint_wrappers=1:nokey=1', file_path
]
result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
try:
return float(result.stdout)
except:
return 0.0
def analyze_gender_and_pitch(audio_path):
"""Menganalisis potongan audio untuk menentukan gender dan variasi pitch."""
try:
import librosa
# Load audio dengan sample rate standard
y, sr = librosa.load(audio_path, sr=22050)
if len(y) == 0: return "Male", "+0Hz"
# Deteksi Fundamental Frequency (F0)
f0 = librosa.yin(y, fmin=65, fmax=300)
valid_f0 = f0[~np.isnan(f0)]
if len(valid_f0) > 0:
mean_f0 = np.mean(valid_f0)
# Threshold umum: > 165Hz = Perempuan, < 165Hz = Laki-laki
gender = "Female" if mean_f0 >= 165 else "Male"
# Hitung variasi pitch (agar tiap orang suaranya beda)
# Normal cowok ~120Hz, cewek ~210Hz. Dibagi 2 agar tidak terlalu ekstrem
base_f0 = 210.0 if gender == "Female" else 120.0
pitch_shift = int((mean_f0 - base_f0) / 2)
# Batasi modifikasi pitch Edge TTS agar tidak rusak (antara -20Hz s/d +20Hz)
pitch_shift = max(-20, min(20, pitch_shift))
pitch_str = f"+{pitch_shift}Hz" if pitch_shift >= 0 else f"{pitch_shift}Hz"
return gender, pitch_str
except Exception as e:
print(f"Pitch analysis warning: {e}")
return "Male", "+0Hz" # Default fallback
def translate_segments_llm(segments, custom_prompt, target_voice):
target_lang = LANG_MAP.get(target_voice, 'Indonesia')
# PERBAIKAN: Memasukkan bahasa target secara paksa ke dalam prompt
if custom_prompt:
instruction = f"{custom_prompt}\n\nPENTING: Terjemahkan SEMUA teks ke dalam bahasa {target_lang}."
else:
instruction = f"Terjemahkan teks dalam JSON ini ke bahasa {target_lang} dengan akurat. Balas HANYA dengan JSON array."
input_data = [{"id": i, "text": s['text']} for i, s in enumerate(segments)]
full_prompt = f"{instruction}\n\nFormat: [{{'id': 0, 'text': '...'}}]\n\nData:\n{json.dumps(input_data)}"
url = "https://www.puruboy.kozow.com/api/ai/notegpt"
payload = {"prompt": full_prompt, "model": "gemini-3-flash-preview", "chat_mode": "standard"}
try:
response = requests.post(url, json=payload, timeout=60)
full_text = ""
for line in response.iter_lines():
if line:
decoded = line.decode('utf-8')
if decoded.startswith("data: "):
data = json.loads(decoded[6:])
full_text += data.get("text", "")
start_idx = full_text.find('[')
end_idx = full_text.rfind(']') + 1
translated_list = json.loads(full_text[start_idx:end_idx])
for item in translated_list:
segments[item['id']]['translated_text'] = item['text']
except Exception as e:
print(f"Translation Error: {e}")
for s in segments: s['translated_text'] = s['text']
return segments
# PERBAIKAN: Menambahkan parameter pitch
async def generate_tts(text, voice, path, pitch_str="+0Hz"):
communicate = edge_tts.Communicate(text, voice, pitch=pitch_str)
await communicate.save(path)
def process_dubbing(task_id, video_path, target_voice, custom_prompt):
try:
tasks[task_id]['status'] = 'Mengekstrak Audio...'
orig_audio = os.path.join(app.config['UPLOAD_FOLDER'], f"{task_id}_orig.wav")
subprocess.run(['ffmpeg', '-loglevel', 'quiet', '-y', '-i', video_path, '-vn', '-acodec', 'pcm_s16le', '-ar', '44100', orig_audio], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
tasks[task_id]['status'] = 'Transkripsi...'
result = whisper_model.transcribe(orig_audio, verbose=False, fp16=False)
segments = result['segments']
tasks[task_id]['status'] = f'Translasi AI ({LANG_MAP.get(target_voice, target_voice)})...'
# Pass target_voice ke translator
translated_segments = translate_segments_llm(segments, custom_prompt, target_voice)
tasks[task_id]['status'] = 'Menganalisis Suara & Dubbing...'
processed_audio_files = []
for i, seg in enumerate(translated_segments):
start_t = seg['start']
end_t = seg['end']
duration_orig = end_t - start_t
text = seg.get('translated_text', seg['text'])
if not text.strip(): continue
# Potong audio asli khusus untuk segmen ini guna deteksi suara
chunk_wav = os.path.join(app.config['UPLOAD_FOLDER'], f"{task_id}_chunk_{i}.wav")
subprocess.run(['ffmpeg', '-loglevel', 'quiet', '-y', '-i', orig_audio, '-ss', str(start_t), '-t', str(duration_orig), chunk_wav], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
# Deteksi Cewek/Cowok dan variasi pitch
gender, pitch_str = analyze_gender_and_pitch(chunk_wav)
# Pilih Voice ID yang sesuai berdasarkan bahasa dan gender
selected_voice = VOICE_MAP.get(target_voice, VOICE_MAP['id-ID'])[gender]
raw_tts = os.path.join(app.config['UPLOAD_FOLDER'], f"{task_id}_raw_{i}.mp3")
sync_tts = os.path.join(app.config['UPLOAD_FOLDER'], f"{task_id}_sync_{i}.wav")
# Generate TTS dengan pitch modifier
asyncio.run(generate_tts(text, selected_voice, raw_tts, pitch_str))
tts_dur = get_audio_duration(raw_tts)
speed = min(max(tts_dur / duration_orig, 0.7), 1.8) if duration_orig > 0 else 1.0
subprocess.run(['ffmpeg', '-loglevel', 'quiet', '-y', '-i', raw_tts, '-filter:a', f'atempo={speed}', '-ar', '44100', sync_tts], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
processed_audio_files.append({'path': sync_tts, 'start': start_t})
tasks[task_id]['status'] = 'Mixing Audio & Rendering...'
output_filename = f"{task_id}_output.mp4"
output_path = os.path.join(app.config['UPLOAD_FOLDER'], output_filename)
# LOGIKA AUDIO BARU:
filter_complex = "[0:a]equalizer=f=1000:width_type=o:w=2:g=-15,volume=0.4[bg];"
inputs_cmd = ['ffmpeg', '-loglevel', 'quiet', '-y', '-i', video_path]
amix_inputs = "[bg]"
for i, item in enumerate(processed_audio_files):
idx = i + 1
inputs_cmd.extend(['-i', item['path']])
start_ms = int(item['start'] * 1000)
filter_complex += f"[{idx}:a]adelay={start_ms}|{start_ms},volume=3.0[dub{idx}];"
amix_inputs += f"[dub{idx}]"
filter_complex += f"{amix_inputs}amix=inputs={len(processed_audio_files)+1}:duration=first:dropout_transition=0,volume=1.5[outa]"
final_cmd = inputs_cmd + [
'-filter_complex', filter_complex, '-map', '0:v', '-map', '[outa]',
'-c:v', 'libx264', '-preset', 'ultrafast', '-c:a', 'aac', '-b:a', '192k', output_path
]
subprocess.run(final_cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
# Cleanup file temporary
for file in os.listdir(app.config['UPLOAD_FOLDER']):
if task_id in file and not file.endswith("_output.mp4"):
try: os.remove(os.path.join(app.config['UPLOAD_FOLDER'], file))
except: pass
tasks[task_id]['status'] = 'Selesai'
tasks[task_id]['result_video'] = f"/download/{output_filename}"
except Exception as e:
tasks[task_id]['status'] = 'Error'
tasks[task_id]['error_message'] = str(e)
# --- ROUTES ---
@app.route('/')
def index():
return render_template_string(HTML_TEMPLATE)
@app.route('/generate', methods=['POST'])
def generate():
file = request.files.get('video')
if not file: return jsonify({'error': 'No file'})
task_id = str(uuid.uuid4())
path = os.path.join(app.config['UPLOAD_FOLDER'], f"{task_id}.mp4")
file.save(path)
tasks[task_id] = {'status': 'Queued', 'result_video': None, 'error_message': None}
threading.Thread(target=process_dubbing, args=(task_id, path, request.form.get('voice'), request.form.get('prompt'))).start()
return jsonify({'task_id': task_id})
@app.route('/status')
def status():
return jsonify(tasks.get(request.args.get('task_id'), {}))
@app.route('/download/<f>')
def download(f):
return send_from_directory(app.config['UPLOAD_FOLDER'], f)
# --- HTML DENGAN TAILWIND CSS ---
HTML_TEMPLATE = """
<!DOCTYPE html>
<html lang="id">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>AI Dubbing Pro</title>
<script src="https://cdn.tailwindcss.com"></script>
</head>
<body class="bg-gray-900 text-gray-100 min-h-screen flex items-center justify-center p-4 font-sans">
<div class="bg-gray-800 rounded-2xl shadow-2xl p-8 w-full max-w-md border border-gray-700">
<h2 class="text-2xl font-bold text-center mb-2 text-white">๐๏ธ Dubbing Sync Pro</h2>
<p class="text-sm text-center text-gray-400 mb-6">Deteksi Gender & Multi-Speaker Auto-Pitch</p>
<form id="uploadForm" class="space-y-4">
<div>
<label class="block text-sm font-medium text-gray-300 mb-1">Upload Video (MP4)</label>
<input type="file" id="videoFile" accept="video/*" required
class="block w-full text-sm text-gray-400 file:mr-4 file:py-2 file:px-4 file:rounded-lg file:border-0 file:text-sm file:font-semibold file:bg-blue-600 file:text-white hover:file:bg-blue-700 focus:outline-none bg-gray-700 rounded-lg p-2 border border-gray-600">
</div>
<div>
<label class="block text-sm font-medium text-gray-300 mb-1">Bahasa Target</label>
<select id="targetVoice" class="w-full bg-gray-700 border border-gray-600 rounded-lg p-2.5 text-white focus:ring-2 focus:ring-blue-500 focus:outline-none">
<option value="id-ID">Indonesia ๐ฎ๐ฉ</option>
<option value="en-US">English ๐บ๐ธ</option>
<option value="ja-JP">Japanese ๐ฏ๐ต</option>
</select>
</div>
<div>
<label class="block text-sm font-medium text-gray-300 mb-1">Custom Prompt AI (Opsional)</label>
<textarea id="customPrompt" rows="2" placeholder="Gaya bahasa santai, dll..."
class="w-full bg-gray-700 border border-gray-600 rounded-lg p-2.5 text-white focus:ring-2 focus:ring-blue-500 focus:outline-none resize-none"></textarea>
</div>
<button type="submit" id="btnSubmit"
class="w-full bg-blue-600 hover:bg-blue-700 text-white font-bold py-3 px-4 rounded-lg transition duration-200 shadow-lg shadow-blue-500/30">
Mulai Dubbing
</button>
</form>
<!-- Loading State -->
<div id="loadingSection" class="hidden mt-6 flex flex-col items-center justify-center space-y-3">
<svg class="animate-spin h-8 w-8 text-blue-500" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24">
<circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4"></circle>
<path class="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"></path>
</svg>
<span id="statusText" class="text-blue-400 font-medium tracking-wide">Menyiapkan...</span>
</div>
<!-- Result State -->
<div id="resultSection" class="hidden mt-6 space-y-4">
<video id="resVideo" controls class="w-full rounded-lg border border-gray-600 bg-black"></video>
<a id="dlBtn" href="#" download
class="block text-center w-full bg-green-600 hover:bg-green-700 text-white font-bold py-3 px-4 rounded-lg transition duration-200 shadow-lg shadow-green-500/30">
โฌ๏ธ Download Video
</a>
</div>
</div>
<script>
const form = document.getElementById('uploadForm');
form.onsubmit = async (e) => {
e.preventDefault();
const fd = new FormData();
fd.append('video', document.getElementById('videoFile').files[0]);
fd.append('voice', document.getElementById('targetVoice').value);
fd.append('prompt', document.getElementById('customPrompt').value);
// UI Changes
document.getElementById('btnSubmit').disabled = true;
document.getElementById('btnSubmit').classList.add('opacity-50', 'cursor-not-allowed');
document.getElementById('loadingSection').classList.remove('hidden');
document.getElementById('resultSection').classList.add('hidden');
const res = await fetch('/generate', { method: 'POST', body: fd });
const data = await res.json();
const timer = setInterval(async () => {
const sRes = await fetch('/status?task_id=' + data.task_id);
const sData = await sRes.json();
document.getElementById('statusText').innerText = sData.status;
if (sData.status === 'Selesai') {
clearInterval(timer);
document.getElementById('loadingSection').classList.add('hidden');
document.getElementById('resultSection').classList.remove('hidden');
document.getElementById('resVideo').src = sData.result_video;
document.getElementById('dlBtn').href = sData.result_video;
// Reset button
document.getElementById('btnSubmit').disabled = false;
document.getElementById('btnSubmit').classList.remove('opacity-50', 'cursor-not-allowed');
} else if (sData.status === 'Error') {
clearInterval(timer);
alert("Terjadi Kesalahan: " + sData.error_message);
location.reload();
}
}, 2000);
};
</script>
</body>
</html>
"""
if __name__ == '__main__':
app.run(host='0.0.0.0', port=7860) |