Ricky01anjay commited on
Commit
38dbb96
·
verified ·
1 Parent(s): 911b297

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +139 -164
app.py CHANGED
@@ -5,12 +5,8 @@ import asyncio
5
  import requests
6
  import json
7
  import time
 
8
  from flask import Flask, request, jsonify, render_template_string, send_from_directory
9
-
10
- # Import MoviePy dengan cara yang lebih aman untuk menghindari ImportError
11
- import moviepy.editor as mp
12
- from moviepy.editor import VideoFileClip, AudioFileClip, CompositeAudioClip
13
-
14
  import whisper
15
  import edge_tts
16
 
@@ -29,48 +25,41 @@ VOICE_MAP = {
29
  print("Loading Whisper Model...")
30
  whisper_model = whisper.load_model("base")
31
 
32
- def translate_segments_llm(segments, custom_prompt, max_retries=3):
 
 
 
 
 
 
 
 
 
 
33
  instruction = custom_prompt if custom_prompt else "Terjemahkan teks dalam JSON ini ke bahasa target. Balas HANYA dengan JSON array yang valid."
34
-
35
- # Ambil data esensial saja untuk menghemat token
36
  input_data = [{"id": i, "text": s['text']} for i, s in enumerate(segments)]
37
  full_prompt = f"{instruction}\n\nFormat Output: [{{'id': 0, 'text': 'hasil terjemahan'}}, ...]\n\nData:\n{json.dumps(input_data)}"
38
 
39
  url = "https://www.puruboy.kozow.com/api/ai/notegpt"
40
- payload = {
41
- "prompt": full_prompt,
42
- "model": "gemini-3-flash-preview",
43
- "chat_mode": "standard"
44
- }
45
 
46
- for attempt in range(max_retries):
47
- try:
48
- response = requests.post(url, json=payload, timeout=60)
49
- full_response = ""
50
- for line in response.iter_lines():
51
- if line:
52
- decoded = line.decode('utf-8')
53
- if decoded.startswith("data: "):
54
- try:
55
- data = json.loads(decoded[6:])
56
- full_response += data.get("text", "")
57
- except: continue
58
-
59
- # Ekstrak JSON Array
60
- start_idx = full_response.find('[')
61
- end_idx = full_response.rfind(']') + 1
62
- if start_idx != -1 and end_idx != -1:
63
- translated_list = json.loads(full_response[start_idx:end_idx])
64
- # Mapping kembali ke segments asli
65
- for item in translated_list:
66
- segments[item['id']]['translated_text'] = item['text']
67
- return segments
68
- except Exception as e:
69
- print(f"Retry {attempt+1} translation error: {e}")
70
- time.sleep(2)
71
-
72
- # Fallback: gunakan teks asli jika gagal
73
- for s in segments: s['translated_text'] = s['text']
74
  return segments
75
 
76
  async def generate_tts(text, voice, path):
@@ -79,82 +68,93 @@ async def generate_tts(text, voice, path):
79
 
80
  def process_dubbing(task_id, video_path, target_voice, custom_prompt):
81
  try:
82
- tasks[task_id]['status'] = 'Transkripsi Video...'
83
- video = VideoFileClip(video_path)
84
-
85
- if video.duration > 120.0:
86
- raise Exception("Durasi maksimal 2 menit.")
87
 
88
- audio_temp = os.path.join(app.config['UPLOAD_FOLDER'], f"{task_id}_temp.wav")
89
- video.audio.write_audiofile(audio_temp, logger=None)
90
-
91
- # 1. Whisper Transcription
92
- result = whisper_model.transcribe(audio_temp)
93
  segments = result['segments']
94
 
95
- # 2. AI Translation
96
- tasks[task_id]['status'] = 'Menerjemahkan Teks...'
97
  translated_segments = translate_segments_llm(segments, custom_prompt)
98
 
99
- # 3. TTS & Syncing
100
- tasks[task_id]['status'] = 'Sinkronisasi Suara...'
101
- dubbing_clips = []
102
-
103
- # Simpan timestamp untuk ducking
104
- speech_intervals = []
105
 
106
  for i, seg in enumerate(translated_segments):
107
  start_t = seg['start']
108
  end_t = seg['end']
109
  duration_orig = end_t - start_t
110
  text = seg.get('translated_text', seg['text'])
111
-
112
  if not text.strip(): continue
113
 
114
- seg_path = os.path.join(app.config['UPLOAD_FOLDER'], f"{task_id}_seg_{i}.mp3")
115
- voice = VOICE_MAP.get(target_voice, 'id-ID-ArdiNeural')
116
-
117
- asyncio.run(generate_tts(text, voice, seg_path))
118
 
119
- # Load audio hasil TTS
120
- tts_audio = AudioFileClip(seg_path)
121
 
122
- # HITUNG SINKRONISASI KECEPATAN
123
- # Jika TTS terlalu panjang, percepat agar muat di durasi aslinya
124
- if tts_audio.duration > duration_orig:
125
- speed_factor = tts_audio.duration / duration_orig
126
- # Batas maksimal percepat 1.8x agar tetap terdengar manusiawi
127
- speed_factor = min(speed_factor, 1.8)
128
- tts_audio = tts_audio.fx(mp.vfx.speedx, speed_factor)
 
 
 
 
 
 
 
 
 
 
 
 
129
 
130
- # Atur posisi audio
131
- tts_audio = tts_audio.set_start(start_t).set_duration(duration_orig)
132
- dubbing_clips.append(tts_audio)
133
- speech_intervals.append((start_t, end_t))
134
 
135
- # 4. AUDIO MIXING (DYNAMIC DUCKING)
136
- # Kita buat fungsi volume untuk mengecilkan suara asli saat dubbing menyala
137
- def volume_filter(t):
138
- for start, end in speech_intervals:
139
- if start <= t <= end:
140
- return 0.1 # Kecilkan ke 10% saat ada dubbing
141
- return 1.0 # Normal 100% saat tidak ada dubbing
142
-
143
- original_audio = video.audio.fl_audio(lambda get_frame, t: volume_filter(t) * get_frame(t))
 
 
144
 
145
- # Gabungkan suara asli yang sudah di-ducking dengan semua clip dubbing
146
- final_audio = CompositeAudioClip([original_audio] + dubbing_clips)
 
 
 
 
147
 
148
- # 5. RENDER
149
- tasks[task_id]['status'] = 'Rendering Video...'
150
- final_video = video.set_audio(final_audio)
151
- output_path = os.path.join(app.config['UPLOAD_FOLDER'], f"{task_id}_output.mp4")
152
- final_video.write_videofile(output_path, codec='libx264', audio_codec='aac', logger=None)
 
 
 
 
 
 
 
153
 
154
  # Cleanup
155
- video.close()
156
  for f in os.listdir(app.config['UPLOAD_FOLDER']):
157
- if f.startswith(f"{task_id}_seg_") or f.endswith("_temp.wav"):
158
  try: os.remove(os.path.join(app.config['UPLOAD_FOLDER'], f))
159
  except: pass
160
 
@@ -162,11 +162,11 @@ def process_dubbing(task_id, video_path, target_voice, custom_prompt):
162
  tasks[task_id]['result_video'] = f"/download/{task_id}_output.mp4"
163
 
164
  except Exception as e:
165
- print(f"Error detail: {e}")
166
  tasks[task_id]['status'] = 'Error'
167
  tasks[task_id]['error_message'] = str(e)
168
 
169
- # --- ROUTES ---
170
 
171
  @app.route('/')
172
  def index():
@@ -193,88 +193,63 @@ def download(f):
193
 
194
  HTML_TEMPLATE = """
195
  <!DOCTYPE html>
196
- <html lang="id">
197
  <head>
198
- <meta charset="UTF-8">
199
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
200
- <title>AI Dubbing Sync</title>
201
  <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
202
- <style>
203
- body { background: #f8f9fa; font-family: sans-serif; }
204
- .main-card { max-width: 500px; margin: 50px auto; border-radius: 20px; box-shadow: 0 10px 30px rgba(0,0,0,0.1); }
205
- </style>
206
  </head>
207
  <body>
208
- <div class="container">
209
- <div class="card main-card p-4">
210
- <h3 class="text-center mb-4">🎙️ AI Dubbing Pro</h3>
211
- <p class="text-muted text-center small">Video asli tetap ada, suara disinkronkan otomatis.</p>
212
- <form id="upForm">
213
- <div class="mb-3">
214
- <label class="form-label">Pilih Video (Max 2 Menit)</label>
215
- <input type="file" id="vid" class="form-control" accept="video/*" required>
216
- </div>
217
- <div class="mb-3">
218
- <label class="form-label">Bahasa Tujuan</label>
219
- <select id="vc" class="form-select">
220
- <option value="id-ID">Indonesia 🇮🇩</option>
221
- <option value="en-US">English 🇺🇸</option>
222
- <option value="ja-JP">Japanese 🇯🇵</option>
223
- </select>
224
- </div>
225
- <div class="mb-3">
226
- <label class="form-label">Instruksi Tambahan (Opsional)</label>
227
- <textarea id="pt" class="form-control" rows="2" placeholder="Contoh: Terjemahkan dengan gaya bahasa santai..."></textarea>
228
- </div>
229
- <button type="submit" id="btn" class="btn btn-primary w-100 py-2">Mulai Proses Dubbing</button>
230
- </form>
231
-
232
- <div id="loading" class="mt-4 d-none text-center">
233
- <div class="spinner-grow text-primary" role="status"></div>
234
- <p id="stText" class="mt-2 fw-bold text-primary">Menyiapkan...</p>
235
- </div>
236
-
237
- <div id="resBox" class="mt-4 d-none">
238
- <video id="vRes" controls class="w-100 rounded shadow-sm"></video>
239
- <a id="dBtn" href="#" class="btn btn-success w-100 mt-3" download>Simpan Video</a>
240
  </div>
 
 
 
 
 
 
 
 
 
 
241
  </div>
242
  </div>
243
-
244
  <script>
245
- const form = document.getElementById('upForm');
246
  form.onsubmit = async (e) => {
247
  e.preventDefault();
248
  const fd = new FormData();
249
- fd.append('video', document.getElementById('vid').files[0]);
250
- fd.append('voice', document.getElementById('vc').value);
251
- fd.append('prompt', document.getElementById('pt').value);
252
-
253
  document.getElementById('btn').disabled = true;
254
- document.getElementById('loading').classList.remove('d-none');
255
- document.getElementById('resBox').classList.add('d-none');
256
-
257
- const res = await fetch('/generate', {method: 'POST', body: fd});
258
  const data = await res.json();
259
-
260
- const timer = setInterval(async () => {
261
- const sRes = await fetch('/status?task_id=' + data.task_id);
262
- const sData = await sRes.json();
263
- document.getElementById('stText').innerText = sData.status;
264
-
265
- if (sData.status === 'Selesai') {
266
- clearInterval(timer);
267
- document.getElementById('loading').classList.add('d-none');
268
- document.getElementById('resBox').classList.remove('d-none');
269
- document.getElementById('vRes').src = sData.result_video;
270
- document.getElementById('dBtn').href = sData.result_video;
271
  document.getElementById('btn').disabled = false;
272
- } else if (sData.status === 'Error') {
273
- clearInterval(timer);
274
- alert("Error: " + sData.error_message);
275
- location.reload();
276
  }
277
- }, 2500);
278
  };
279
  </script>
280
  </body>
 
5
  import requests
6
  import json
7
  import time
8
+ import subprocess
9
  from flask import Flask, request, jsonify, render_template_string, send_from_directory
 
 
 
 
 
10
  import whisper
11
  import edge_tts
12
 
 
25
  print("Loading Whisper Model...")
26
  whisper_model = whisper.load_model("base")
27
 
28
+ def get_audio_duration(file_path):
29
+ """Mendapatkan durasi audio menggunakan ffprobe"""
30
+ cmd = [
31
+ 'ffprobe', '-v', 'error', '-show_entries', 'format=duration',
32
+ '-of', 'default=noprint_wrappers=1:nokey=1', file_path
33
+ ]
34
+ result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
35
+ return float(result.stdout)
36
+
37
+ def translate_segments_llm(segments, custom_prompt):
38
+ """Menerjemahkan segmen menggunakan AI LLM"""
39
  instruction = custom_prompt if custom_prompt else "Terjemahkan teks dalam JSON ini ke bahasa target. Balas HANYA dengan JSON array yang valid."
 
 
40
  input_data = [{"id": i, "text": s['text']} for i, s in enumerate(segments)]
41
  full_prompt = f"{instruction}\n\nFormat Output: [{{'id': 0, 'text': 'hasil terjemahan'}}, ...]\n\nData:\n{json.dumps(input_data)}"
42
 
43
  url = "https://www.puruboy.kozow.com/api/ai/notegpt"
44
+ payload = {"prompt": full_prompt, "model": "gemini-3-flash-preview", "chat_mode": "standard"}
 
 
 
 
45
 
46
+ try:
47
+ response = requests.post(url, json=payload, timeout=60)
48
+ full_text = ""
49
+ for line in response.iter_lines():
50
+ if line:
51
+ decoded = line.decode('utf-8')
52
+ if decoded.startswith("data: "):
53
+ data = json.loads(decoded[6:])
54
+ full_text += data.get("text", "")
55
+
56
+ start_idx = full_text.find('[')
57
+ end_idx = full_text.rfind(']') + 1
58
+ translated_list = json.loads(full_text[start_idx:end_idx])
59
+ for item in translated_list:
60
+ segments[item['id']]['translated_text'] = item['text']
61
+ except:
62
+ for s in segments: s['translated_text'] = s['text'] # Fallback
 
 
 
 
 
 
 
 
 
 
 
63
  return segments
64
 
65
  async def generate_tts(text, voice, path):
 
68
 
69
  def process_dubbing(task_id, video_path, target_voice, custom_prompt):
70
  try:
71
+ tasks[task_id]['status'] = 'Mengekstrak Audio...'
72
+ orig_audio = os.path.join(app.config['UPLOAD_FOLDER'], f"{task_id}_orig.wav")
73
+ # Extract audio original
74
+ subprocess.run(['ffmpeg', '-y', '-i', video_path, '-vn', '-acodec', 'pcm_s16le', '-ar', '44100', '-ac', '2', orig_audio], check=True)
 
75
 
76
+ tasks[task_id]['status'] = 'Transkripsi Whisper...'
77
+ result = whisper_model.transcribe(orig_audio)
 
 
 
78
  segments = result['segments']
79
 
80
+ tasks[task_id]['status'] = 'Translasi AI...'
 
81
  translated_segments = translate_segments_llm(segments, custom_prompt)
82
 
83
+ tasks[task_id]['status'] = 'Pemrosesan Segmen Audio...'
84
+ processed_audio_files = []
85
+ ducking_filters = []
 
 
 
86
 
87
  for i, seg in enumerate(translated_segments):
88
  start_t = seg['start']
89
  end_t = seg['end']
90
  duration_orig = end_t - start_t
91
  text = seg.get('translated_text', seg['text'])
 
92
  if not text.strip(): continue
93
 
94
+ raw_tts = os.path.join(app.config['UPLOAD_FOLDER'], f"{task_id}_raw_{i}.mp3")
95
+ sync_tts = os.path.join(app.config['UPLOAD_FOLDER'], f"{task_id}_sync_{i}.wav")
 
 
96
 
97
+ # 1. Generate TTS
98
+ asyncio.run(generate_tts(text, VOICE_MAP.get(target_voice, 'id-ID-ArdiNeural'), raw_tts))
99
 
100
+ # 2. Sync Speed (FFmpeg atempo)
101
+ tts_dur = get_audio_duration(raw_tts)
102
+ speed = 1.0
103
+ if tts_dur > duration_orig:
104
+ speed = tts_dur / duration_orig
105
+ speed = min(speed, 2.0) # Maksimal 2x cepat
106
+
107
+ # Gunakan FFmpeg untuk atur speed dan format wav
108
+ subprocess.run([
109
+ 'ffmpeg', '-y', '-i', raw_tts,
110
+ '-filter:a', f'atempo={speed}',
111
+ '-ar', '44100', sync_tts
112
+ ], check=True)
113
+
114
+ processed_audio_files.append({
115
+ 'path': sync_tts,
116
+ 'start': start_t,
117
+ 'duration': duration_orig
118
+ })
119
 
120
+ # 3. Catat interval untuk Ducking (volume 10%)
121
+ ducking_filters.append(f"volume=enable='between(t,{start_t},{end_t})':volume=0.1")
 
 
122
 
123
+ # --- FINAL MIXING DENGAN FFMPEG COMPLEX FILTER ---
124
+ tasks[task_id]['status'] = 'Mixing & Rendering...'
125
+
126
+ # Build Complex Filter
127
+ # a) Ducking Suara Asli
128
+ filter_complex = f"[0:a]{','.join(ducking_filters)}[bg];"
129
+
130
+ # b) Overlaying Dubbing Clips
131
+ inputs_cmd = ['ffmpeg', '-y', '-i', video_path]
132
+ for item in processed_audio_files:
133
+ inputs_cmd.extend(['-i', item['path']])
134
 
135
+ amix_inputs = ""
136
+ for i in range(len(processed_audio_files)):
137
+ idx = i + 1
138
+ start_ms = int(processed_audio_files[i]['start'] * 1000)
139
+ filter_complex += f"[{idx}:a]adelay={start_ms}|{start_ms}[dub{idx}];"
140
+ amix_inputs += f"[dub{idx}]"
141
 
142
+ filter_complex += f"[bg]{amix_inputs}amix=inputs={len(processed_audio_files)+1}:duration=first[outa]"
143
+
144
+ output_video = os.path.join(app.config['UPLOAD_FOLDER'], f"{task_id}_output.mp4")
145
+
146
+ final_cmd = inputs_cmd + [
147
+ '-filter_complex', filter_complex,
148
+ '-map', '0:v', '-map', '[outa]',
149
+ '-c:v', 'libx264', '-preset', 'ultrafast', '-c:a', 'aac', '-b:a', '192k',
150
+ output_video
151
+ ]
152
+
153
+ subprocess.run(final_cmd, check=True)
154
 
155
  # Cleanup
 
156
  for f in os.listdir(app.config['UPLOAD_FOLDER']):
157
+ if task_id in f and "_output.mp4" not in f:
158
  try: os.remove(os.path.join(app.config['UPLOAD_FOLDER'], f))
159
  except: pass
160
 
 
162
  tasks[task_id]['result_video'] = f"/download/{task_id}_output.mp4"
163
 
164
  except Exception as e:
165
+ print(f"Error: {e}")
166
  tasks[task_id]['status'] = 'Error'
167
  tasks[task_id]['error_message'] = str(e)
168
 
169
+ # --- ROUTES & UI (Flask) ---
170
 
171
  @app.route('/')
172
  def index():
 
193
 
194
  HTML_TEMPLATE = """
195
  <!DOCTYPE html>
196
+ <html>
197
  <head>
198
+ <title>AI Dubbing FFMPEG</title>
 
 
199
  <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
200
+ <style> body { background:#f4f4f9; padding:20px; } .card { max-width:500px; margin:auto; border-radius:15px; } </style>
 
 
 
201
  </head>
202
  <body>
203
+ <div class="card shadow p-4">
204
+ <h3 class="text-center mb-4">🎙️ AI Dubbing Sync</h3>
205
+ <form id="uForm">
206
+ <div class="mb-3"><label>Video (MP4)</label><input type="file" id="v" class="form-control" required></div>
207
+ <div class="mb-3"><label>Target Suara</label>
208
+ <select id="s" class="form-select">
209
+ <option value="id-ID">Indonesia</option>
210
+ <option value="en-US">English</option>
211
+ <option value="ja-JP">Japanese</option>
212
+ </select>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
  </div>
214
+ <div class="mb-3"><label>Custom Prompt</label><textarea id="p" class="form-control"></textarea></div>
215
+ <button type="submit" id="btn" class="btn btn-primary w-100">Proses Sekarang</button>
216
+ </form>
217
+ <div id="ld" class="mt-4 d-none text-center">
218
+ <div class="spinner-border text-primary"></div>
219
+ <p id="st" class="mt-2 fw-bold">Processing...</p>
220
+ </div>
221
+ <div id="rs" class="mt-4 d-none">
222
+ <video id="vd" controls class="w-100 rounded"></video>
223
+ <a id="dl" href="#" class="btn btn-success w-100 mt-2" download>Download</a>
224
  </div>
225
  </div>
 
226
  <script>
227
+ const form = document.getElementById('uForm');
228
  form.onsubmit = async (e) => {
229
  e.preventDefault();
230
  const fd = new FormData();
231
+ fd.append('video', document.getElementById('v').files[0]);
232
+ fd.append('voice', document.getElementById('s').value);
233
+ fd.append('prompt', document.getElementById('p').value);
 
234
  document.getElementById('btn').disabled = true;
235
+ document.getElementById('ld').classList.remove('d-none');
236
+ const res = await fetch('/generate', {method:'POST', body:fd});
 
 
237
  const data = await res.json();
238
+ const pol = setInterval(async () => {
239
+ const r = await fetch('/status?task_id=' + data.task_id);
240
+ const d = await r.json();
241
+ document.getElementById('st').innerText = d.status;
242
+ if(d.status === 'Selesai'){
243
+ clearInterval(pol);
244
+ document.getElementById('ld').classList.add('d-none');
245
+ document.getElementById('rs').classList.remove('d-none');
246
+ document.getElementById('vd').src = d.result_video;
247
+ document.getElementById('dl').href = d.result_video;
 
 
248
  document.getElementById('btn').disabled = false;
249
+ } else if(d.status === 'Error'){
250
+ alert(d.error_message); location.reload();
 
 
251
  }
252
+ }, 2000);
253
  };
254
  </script>
255
  </body>