Opera8 commited on
Commit
ed6eb02
·
verified ·
1 Parent(s): bb89bb8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +218 -252
app.py CHANGED
@@ -1,301 +1,267 @@
1
- import gradio as gr
2
  import os
3
- import subprocess
4
  import json
5
- import tempfile
6
- import shutil
7
  import asyncio
 
8
  import requests
 
 
 
 
 
 
9
  from pydub import AudioSegment
10
  import yt_dlp
11
- from google import genai
12
- from google.genai import types
13
 
14
- # ==========================================
15
- # لیست گویندگان برای معرفی به هوش مصنوعی
16
- # ==========================================
17
- # این متن دقیقاً به پرامپت جیمینای اضافه می‌شود تا بداند چه کسانی را در اختیار دارد
18
- CAST_PROMPT = """
19
- AVAILABLE VOICE ACTORS (Use these IDs only):
20
 
21
- -- MALE VOICES --
22
- 1. Charon: Powerful, resonant, empathetic.
23
- 2. Achird: Young, energetic.
24
- 3. Zubenelgenubi: Warm, intimate, commanding.
25
- 4. Rasalgethi: News-anchor style, informative.
26
- 5. Sadachbia: Lively, dynamic, happy.
27
- 6. Sadaltager: Serious, urgent, confident.
28
- 7. Alnilam: Epic, deep, sarcastic.
29
- 8. Schedar: Kind, sweet, excited.
30
- 9. Umbriel: Creative, cartoonish.
31
- 10. Algieba: Stylish, classy, rich.
32
- 11. Algenib: Motivational, loud.
33
- 12. Orus: Sporty, commentator style.
34
- 13. Enceladus: Military, strict, shouting.
35
- 14. Iapetus: Tour-guide, bright.
36
- 15. Puck: Playful, childish.
37
- 16. Fenrir: Bold, aggressive, beast-like.
38
 
39
- -- FEMALE VOICES --
40
- 1. Zephyr: Gentle, soft, pleasant.
41
- 2. Vindemiatrix: Formal, dignified.
42
- 3. Sulafat: Calm, whispering, motherly.
43
- 4. Laomedeia: Friendly, dramatic.
44
- 5. Achernar: Professional, clear.
45
- 6. Gacrux: Mature, raspy, reliable.
46
- 7. Pulcherrima: Modern, cool, robotic.
47
- 8. Despina: Emotional, poetic.
48
- 9. Erinome: Transparent, expressive.
49
- 10. Aoede: Musical, melodic.
50
- 11. Callirrhoe: Storyteller, fantasy.
51
- 12. Autonoe: Natural, casual.
52
- 13. Kore: Soothing, whispering.
53
- 14. Leda: Classic, old-fashioned.
54
- """
55
 
56
- # ==========================================
57
- # توابع کمکی (دانلود و پردازش)
58
- # ==========================================
59
- def download_youtube_video(url, output_path):
60
- ydl_opts = {'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best', 'outtmpl': output_path, 'quiet': True, 'no_warnings': True}
61
- with yt_dlp.YoutubeDL(ydl_opts) as ydl: ydl.download([url])
62
- return output_path
63
 
64
- def extract_audio_from_video(video_path, audio_path):
65
- subprocess.run(['ffmpeg', '-i', video_path, '-vn', '-acodec', 'mp3', '-ar', '24000', '-ac', '1', '-b:a', '128k', '-y', audio_path], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True)
66
- return audio_path
67
 
68
  def get_video_duration(video_path):
69
- result = subprocess.run(['ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', video_path], capture_output=True, text=True, check=True)
70
- return float(result.stdout.strip())
71
 
72
- def remove_silence_from_edges(audio_segment, silence_thresh=-45.0, chunk_size=10):
73
- if len(audio_segment) < 100: return audio_segment
74
- trim_ms = 0
75
- while trim_ms < len(audio_segment) and audio_segment[trim_ms:trim_ms+chunk_size].dBFS < silence_thresh: trim_ms += chunk_size
76
- start_trim = trim_ms
77
- trim_ms = 0
78
- while trim_ms < len(audio_segment) and audio_segment[len(audio_segment)-trim_ms-chunk_size:len(audio_segment)-trim_ms].dBFS < silence_thresh: trim_ms += chunk_size
79
- end_trim = len(audio_segment) - trim_ms
80
- return audio_segment[start_trim:end_trim]
 
 
 
 
81
 
82
- def adjust_audio_speed_ffmpeg(input_wav, output_wav, target_duration):
 
83
  try:
84
- audio = AudioSegment.from_file(input_wav)
85
- trimmed_audio = remove_silence_from_edges(audio)
86
- if len(trimmed_audio) < 50: trimmed_audio = audio
87
- temp_trimmed = input_wav.replace(".wav", "_trimmed.wav")
88
- trimmed_audio.export(temp_trimmed, format="wav")
89
-
90
- orig_dur = len(trimmed_audio) / 1000.0
91
- if orig_dur <= 0.1 or target_duration <= 0.1:
92
- shutil.copy(temp_trimmed, output_wav); return
93
-
94
- speed_factor = orig_dur / target_duration
95
- if speed_factor > 2.0: speed_factor = 2.0
96
- if speed_factor < 0.6: speed_factor = 0.6
97
-
98
- atempo_filters = []
99
- current = speed_factor
100
- while current > 2.0: atempo_filters.append("atempo=2.0"); current /= 2.0
101
- while current < 0.5: atempo_filters.append("atempo=0.5"); current /= 0.5
102
- if current != 1.0: atempo_filters.append(f"atempo={current}")
103
-
104
- if not atempo_filters: shutil.copy(temp_trimmed, output_wav); return
105
-
106
- subprocess.run(['ffmpeg', '-y', '-i', temp_trimmed, '-filter:a', ",".join(atempo_filters), output_wav], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True)
107
- except: shutil.copy(input_wav, output_wav)
108
 
109
- # ==========================================
110
- # تابع تولید صدا با استفاده از API اسپیس پادکست شما
111
- # ==========================================
112
- async def generate_audio_via_podcast_api(text, speaker_name, output_path):
113
- def _sync_request():
114
- API_URL = "https://ezmarynoori-podgen.hf.space/api/generate"
115
- payload = {
116
- "text": text,
117
- "speaker": speaker_name,
118
- "temperature": 0.9,
119
- "is_custom": False
120
- }
121
- try:
122
- response = requests.post(API_URL, json=payload, timeout=300)
123
- if response.status_code == 200:
124
- with open(output_path, 'wb') as f:
125
- f.write(response.content)
126
- return True
127
- else:
128
- print(f"Podcast API Error ({speaker_name}): HTTP {response.status_code} - {response.text}")
129
- return False
130
- except Exception as e:
131
- print(f"Request Exception ({speaker_name}): {e}")
132
- return False
133
 
134
- return await asyncio.to_thread(_sync_request)
135
 
136
- # پردازش تک‌تک قطعات زیرنویس جهت اجرای همزمان (Concurrency)
137
- async def process_single_subtitle(i, sub, temp_dir, sem):
138
- async with sem:
139
- assigned_speaker = sub.get('speaker_id', 'Charon')
140
- text = sub.get('text', '')
141
- start = float(sub.get('start', 0))
142
- end = float(sub.get('end', 0))
143
- dur = end - start
144
 
145
- if not text or dur <= 0.1:
146
- return None
 
147
 
148
- raw_p = os.path.join(temp_dir, f"r_{i}.wav")
149
- adj_p = os.path.join(temp_dir, f"a_{i}.wav")
 
150
 
151
- success = await generate_audio_via_podcast_api(text, assigned_speaker, raw_p)
 
 
152
 
153
- if success and os.path.exists(raw_p):
154
- # اجرای تغییر سرعت در ترد جداگانه برای جلوگیری از مسدود شدن Async Event Loop
155
- await asyncio.to_thread(adjust_audio_speed_ffmpeg, raw_p, adj_p, dur)
156
- return {
157
- "index": i,
158
- "adj_p": adj_p,
159
- "start": start,
160
- "speaker": assigned_speaker
161
- }
162
- else:
163
- print(f"Failed seg {i} with speaker {assigned_speaker}")
164
- return None
165
 
166
- # ==========================================
167
- # هسته اصلی: کارگردان هوشمند
168
- # ==========================================
169
- async def process_dubbing(api_key, video_file, youtube_url, target_lang, progress=gr.Progress()):
170
- if not api_key: raise gr.Error("کلید API جمینای الزامی است.")
171
- if not video_file and not youtube_url: raise gr.Error("ویدیو الزامی است.")
172
 
173
- temp_dir = tempfile.mkdtemp()
174
-
175
  try:
176
- client = genai.Client(http_options={"api_version": "v1beta"}, api_key=api_key)
177
-
178
- video_path = os.path.join(temp_dir, "input.mp4")
179
- audio_path = os.path.join(temp_dir, "source.mp3")
180
- final_path = os.path.join(temp_dir, "output.mp4")
181
-
182
- progress(0.05, desc="دریافت ویدیو...")
183
- if youtube_url: download_youtube_video(youtube_url, video_path)
184
  else:
185
- s_path = video_file.name if hasattr(video_file, 'name') else str(video_file)
186
- shutil.copy(s_path, video_path)
187
-
188
- duration = get_video_duration(video_path)
189
-
190
- progress(0.1, desc="استخراج صدا...")
191
- extract_audio_from_video(video_path, audio_path)
192
 
193
- # --- مرحله هوشمند: تشخیص گوینده و انتخاب نقش (AI Casting) ---
194
- progress(0.2, desc="تحلیل ویدیو، تشخیص گویندگان و انتخاب دوبلور مناسب...")
195
- gemini_file = client.files.upload(file=audio_path)
196
 
197
  prompt = f"""
198
- ROLE: You are an expert Dubbing Director using AI Voice Actors.
199
-
200
  {CAST_PROMPT}
201
 
202
  TASK:
203
- 1. Listen to the audio and identify different speakers (Male/Female, Tone, Emotion).
204
- 2. For EACH sentence, select the BEST MATCH from the 'AVAILABLE VOICE ACTORS' list above.
205
- - If original speaker is Male -> Pick a Male ID (e.g., Charon, Achird...).
206
- - If original speaker is Female -> Pick a Female ID (e.g., Zephyr, Vindemiatrix...).
207
- - Match the emotion (e.g., if original is angry, pick Enceladus).
208
- 3. Translate the text EXACTLY to {target_lang}.
209
- 4. Output JSON Array.
210
 
211
- JSON FORMAT:
212
  [
213
- {{
214
- "start": 0.0,
215
- "end": 3.5,
216
- "speaker_id": "Charon",
217
- "original_speaker_desc": "Male, Deep voice",
218
- "text": "ترجمه دقیق متن به زبان مقصد"
219
- }},
220
- {{
221
- "start": 3.6,
222
- "end": 6.0,
223
- "speaker_id": "Zephyr",
224
- "original_speaker_desc": "Female, Soft voice",
225
- "text": "پاسخ زن در ویدیو"
226
- }}
227
  ]
228
  """
229
 
230
- transcription = client.models.generate_content(
231
- model='gemini-2.5-flash',
232
- contents=[gemini_file, prompt],
233
- config=types.GenerateContentConfig(response_mime_type="application/json")
234
- )
235
-
236
- try: client.files.delete(name=gemini_file.name)
237
- except: pass
238
-
239
- json_clean = transcription.text.strip().replace("```json", "").replace("```", "")
240
- subtitles = json.loads(json_clean)
241
 
242
- if not subtitles: raise ValueError("زیرنویس خالی است")
 
 
 
 
 
 
243
 
244
- # --- تولید صدا با گویندگان متغیر به صورت همزمان ---
245
- final_track = AudioSegment.silent(duration=int(duration * 1000))
246
- total = len(subtitles)
247
- ok_cnt = 0
248
 
249
- sem = asyncio.Semaphore(20) # اجازه اجرای همزمان ۲۰ درخواست به API پادکست
250
- tasks = [process_single_subtitle(i, sub, temp_dir, sem) for i, sub in enumerate(subtitles)]
251
 
252
- completed = 0
253
- for coro in asyncio.as_completed(tasks):
254
- res = await coro
255
- completed += 1
256
- progress(0.3 + (0.6 * (completed / total)), desc=f"تولید صداها ({completed} از {total})...")
257
-
258
- if res is not None:
259
- seg = AudioSegment.from_file(res["adj_p"])
260
- final_track = final_track.overlay(seg, position=int(res["start"] * 1000))
261
- ok_cnt += 1
 
 
 
 
 
 
 
 
 
262
 
263
- if ok_cnt == 0: raise gr.Error("خطا: صدایی تولید نشد.")
 
 
 
 
 
 
 
264
 
265
- progress(0.95, desc="میکس نهایی...")
266
- final_audio_p = os.path.join(temp_dir, "final_mix.wav")
267
- final_track.export(final_audio_p, format="wav")
 
268
 
269
- cmd = ['ffmpeg', '-y', '-i', video_path, '-i', final_audio_p, '-c:v', 'copy', '-c:a', 'aac', '-map', '0:v:0', '-map', '1:a:0', final_path]
270
- subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271
 
272
- return final_path, json.dumps(subtitles, ensure_ascii=False, indent=2)
273
-
274
- except Exception as e:
275
- raise gr.Error(f"Error: {str(e)}")
276
-
277
- # ==========================================
278
- # رابط کاربری
279
- # ==========================================
280
- with gr.Blocks(title="AI Smart Director Dubbing", theme=gr.themes.Soft()) as app:
281
- gr.Markdown("""
282
- # 🎬 استودیو دوبله هوشمند (AI Director)
283
- **قابلیت ویژه:** تشخیص خودکار گوینده‌های ویدیو (زن/مرد) و انتخاب بهترین صدا از بین ۳۰ گوینده حرفه‌ای توسط هوش مصنوعی.
284
- """)
285
-
286
- with gr.Row():
287
- with gr.Column():
288
- api_key = gr.Textbox(label="کلید API جمینای", type="password")
289
- vid = gr.Video(label="فایل ویدیو")
290
- url = gr.Textbox(label="لینک یوتیوب")
291
- lang = gr.Dropdown(["Persian (فارسی)", "English", "Arabic"], value="Persian (فارسی)", label="زبان مقصد")
292
- btn = gr.Button("🚀 شروع دوبله هوشمند", variant="primary")
293
 
294
- with gr.Column():
295
- out_vid = gr.Video(label="ویدیو خروجی")
296
- out_log = gr.Code(label="گزارش کستینگ (چه کسی چه گفت؟)", language="json")
 
 
 
 
 
 
 
 
 
 
 
 
297
 
298
- btn.click(process_dubbing, [api_key, vid, url, lang], [out_vid, out_log])
 
299
 
300
- if __name__ == "__main__":
301
- app.launch(ssr_mode=False)
 
 
1
  import os
 
2
  import json
3
+ import uuid
4
+ import time
5
  import asyncio
6
+ import aiohttp
7
  import requests
8
+ import subprocess
9
+ import shutil
10
+ from flask import Flask, request, jsonify, send_file, render_template
11
+ from flask_cors import CORS
12
+ from werkzeug.utils import secure_filename
13
+ import google.generativeai as genai
14
  from pydub import AudioSegment
15
  import yt_dlp
 
 
16
 
17
+ app = Flask(__name__, template_folder='templates', static_folder='static')
18
+ CORS(app)
 
 
 
 
19
 
20
+ # تنظیمات مسیرها
21
+ UPLOAD_FOLDER = 'uploads'
22
+ TEMP_AUDIO_FOLDER = 'temp_audio'
23
+ os.makedirs(UPLOAD_FOLDER, exist_ok=True)
24
+ os.makedirs(TEMP_AUDIO_FOLDER, exist_ok=True)
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
+ # آدرس API پادکست شما
27
+ PODCAST_API_URL = "https://ezmarynoori-podgen.hf.space/api/generate"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
+ # لیست گویندگان برای پرامپت
30
+ CAST_PROMPT = """
31
+ AVAILABLE VOICE ACTORS (Use these IDs only):
32
+ -- MALE --: Charon, Achird, Zubenelgenubi, Rasalgethi, Sadachbia, Sadaltager, Alnilam, Schedar, Umbriel, Algieba, Algenib, Orus, Enceladus, Iapetus, Puck, Fenrir.
33
+ -- FEMALE --: Zephyr, Vindemiatrix, Sulafat, Laomedeia, Achernar, Gacrux, Pulcherrima, Despina, Erinome, Aoede, Callirrhoe, Autonoe, Kore, Leda.
34
+ """
 
35
 
36
+ # --- توابع کمکی ---
 
 
37
 
38
  def get_video_duration(video_path):
39
+ cmd = ['ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', video_path]
40
+ return float(subprocess.check_output(cmd).decode('utf-8').strip())
41
 
42
+ def extract_audio(video_path):
43
+ audio_path = video_path.rsplit('.', 1)[0] + '.mp3'
44
+ subprocess.run(['ffmpeg', '-i', video_path, '-vn', '-acodec', 'mp3', '-y', audio_path],
45
+ stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
46
+ return audio_path
47
+
48
+ def download_youtube(url):
49
+ filename = f"{uuid.uuid4()}.mp4"
50
+ filepath = os.path.join(UPLOAD_FOLDER, filename)
51
+ ydl_opts = {'format': 'best[ext=mp4]', 'outtmpl': filepath, 'quiet': True}
52
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
53
+ ydl.download([url])
54
+ return filepath
55
 
56
+ async def generate_audio_async(session, text, speaker, index):
57
+ """تولید صدا به صورت غیرهمزمان از API پادکست"""
58
  try:
59
+ payload = {"text": text, "speaker": speaker, "temperature": 0.9}
60
+ async with session.post(PODCAST_API_URL, json=payload, timeout=300) as resp:
61
+ if resp.status == 200:
62
+ audio_data = await resp.read()
63
+ filename = f"seg_{uuid.uuid4()}.wav"
64
+ path = os.path.join(TEMP_AUDIO_FOLDER, filename)
65
+ with open(path, 'wb') as f:
66
+ f.write(audio_data)
67
+ return {"index": index, "status": "success", "file": filename}
68
+ except Exception as e:
69
+ print(f"Error gen audio {index}: {e}")
70
+ return {"index": index, "status": "failed"}
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
+ async def batch_generate_audio(segments):
73
+ """مدیریت درخواستهای همزمان (۲۰ تا همزمان)"""
74
+ sem = asyncio.Semaphore(20)
75
+ async with aiohttp.ClientSession() as session:
76
+ tasks = []
77
+ for i, seg in enumerate(segments):
78
+ async def task_wrapper(s_idx, s_item):
79
+ async with sem:
80
+ return await generate_audio_async(session, s_item['text'], s_item['speaker_id'], s_idx)
81
+ tasks.append(task_wrapper(i, seg))
82
+ return await asyncio.gather(*tasks)
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
+ # --- مسیرهای فلاسک ---
85
 
86
+ @app.route('/')
87
+ def index():
88
+ return render_template('index.html')
 
 
 
 
 
89
 
90
+ @app.route('/static/<path:filename>')
91
+ def serve_static(filename):
92
+ return send_file(os.path.join('static', filename))
93
 
94
+ @app.route('/uploads/<path:filename>')
95
+ def serve_uploads(filename):
96
+ return send_file(os.path.join(UPLOAD_FOLDER, filename))
97
 
98
+ @app.route('/temp_audio/<path:filename>')
99
+ def serve_audio(filename):
100
+ return send_file(os.path.join(TEMP_AUDIO_FOLDER, filename))
101
 
102
+ @app.route('/api/analyze', methods=['POST'])
103
+ def analyze_video():
104
+ api_key = request.form.get('api_key')
105
+ youtube_url = request.form.get('youtube_url')
106
+ video_file = request.files.get('video_file')
107
+ target_lang = request.form.get('language', 'Persian')
 
 
 
 
 
 
108
 
109
+ if not api_key: return jsonify({"error": "API Key is required"}), 400
 
 
 
 
 
110
 
 
 
111
  try:
112
+ # 1. دریافت ویدیو
113
+ if youtube_url:
114
+ video_path = download_youtube(youtube_url)
115
+ elif video_file:
116
+ filename = secure_filename(f"{uuid.uuid4()}_{video_file.filename}")
117
+ video_path = os.path.join(UPLOAD_FOLDER, filename)
118
+ video_file.save(video_path)
 
119
  else:
120
+ return jsonify({"error": "No video provided"}), 400
121
+
122
+ # 2. استخراج صدا
123
+ audio_path = extract_audio(video_path)
 
 
 
124
 
125
+ # 3. ارسال به Gemini
126
+ genai.configure(api_key=api_key)
127
+ model = genai.GenerativeModel('gemini-2.5-flash')
128
 
129
  prompt = f"""
130
+ You are a Dubbing Director.
 
131
  {CAST_PROMPT}
132
 
133
  TASK:
134
+ 1. Identify speakers in the audio.
135
+ 2. Assign a Voice Actor ID from the list to each segment based on gender/tone.
136
+ 3. Translate the dialogue to {target_lang}.
137
+ 4. Return a JSON Array.
 
 
 
138
 
139
+ Format:
140
  [
141
+ {{"start": 0.0, "end": 4.5, "speaker_id": "Charon", "text": "Translated text..."}},
142
+ ...
 
 
 
 
 
 
 
 
 
 
 
 
143
  ]
144
  """
145
 
146
+ audio_file = genai.upload_file(audio_path)
147
+ while audio_file.state.name == "PROCESSING":
148
+ time.sleep(1)
149
+ audio_file = genai.get_file(audio_file.name)
150
+
151
+ res = model.generate_content([prompt, audio_file], generation_config={"response_mime_type": "application/json"})
152
+ script = json.loads(res.text)
153
+
154
+ # 4. تولید صدای اولیه به صورت همزمان (Batch Processing)
155
+ results = asyncio.run(batch_generate_audio(script))
 
156
 
157
+ # اتصال فایل‌های صوتی به اسکریپت
158
+ for res_item in results:
159
+ idx = res_item['index']
160
+ if res_item['status'] == 'success':
161
+ script[idx]['audio_file'] = res_item['file']
162
+ else:
163
+ script[idx]['audio_file'] = None
164
 
165
+ return jsonify({
166
+ "video_filename": os.path.basename(video_path),
167
+ "script": script
168
+ })
169
 
170
+ except Exception as e:
171
+ return jsonify({"error": str(e)}), 500
172
 
173
+ @app.route('/api/regenerate_segment', methods=['POST'])
174
+ def regenerate_segment():
175
+ data = request.get_json()
176
+ text = data.get('text')
177
+ speaker = data.get('speaker_id')
178
+
179
+ try:
180
+ # درخواست تکی به API پادکست (Sync request via standard requests lib)
181
+ resp = requests.post(PODCAST_API_URL, json={"text": text, "speaker": speaker, "temperature": 0.9})
182
+ if resp.status_code == 200:
183
+ filename = f"seg_{uuid.uuid4()}.wav"
184
+ path = os.path.join(TEMP_AUDIO_FOLDER, filename)
185
+ with open(path, 'wb') as f:
186
+ f.write(resp.content)
187
+ return jsonify({"audio_file": filename})
188
+ else:
189
+ return jsonify({"error": "Generation failed"}), 500
190
+ except Exception as e:
191
+ return jsonify({"error": str(e)}), 500
192
 
193
+ @app.route('/api/render_final', methods=['POST'])
194
+ def render_final():
195
+ data = request.get_json()
196
+ video_filename = data.get('video_filename')
197
+ script = data.get('script')
198
+
199
+ video_path = os.path.join(UPLOAD_FOLDER, video_filename)
200
+ if not os.path.exists(video_path): return jsonify({"error": "Video not found"}), 404
201
 
202
+ try:
203
+ # میکس صداها
204
+ video_duration = get_video_duration(video_path)
205
+ final_audio = AudioSegment.silent(duration=int(video_duration * 1000))
206
 
207
+ for seg in script:
208
+ if not seg.get('audio_file'): continue
209
+
210
+ seg_path = os.path.join(TEMP_AUDIO_FOLDER, seg['audio_file'])
211
+ if not os.path.exists(seg_path): continue
212
+
213
+ # تنظیم سرعت (Time Stretch)
214
+ audio = AudioSegment.from_file(seg_path)
215
+ target_dur_ms = (seg['end'] - seg['start']) * 1000
216
+ current_dur_ms = len(audio)
217
+
218
+ if current_dur_ms > 0:
219
+ # اگر تفاوت زمان زیاد بود، سرعت را تغییر بده (محدودیت بین 0.5 تا 2.0 برابر)
220
+ speed = current_dur_ms / target_dur_ms
221
+ speed = max(0.6, min(2.0, speed))
222
+
223
+ if abs(speed - 1.0) > 0.05:
224
+ # تغییر سرعت با FFmpeg چون کیفیت بهتری از pydub دارد
225
+ temp_out = seg_path.replace('.wav', '_speed.wav')
226
+ atempo = f"atempo={speed}"
227
+ # برای سرعت‌های خیلی زیاد یا کم نیاز به زنجیره فیلتر است
228
+ if speed > 2.0: atempo = "atempo=2.0,atempo={}".format(speed/2)
229
+ elif speed < 0.5: atempo = "atempo=0.5,atempo={}".format(speed/0.5)
230
+
231
+ subprocess.run(['ffmpeg', '-y', '-i', seg_path, '-filter:a', atempo, temp_out],
232
+ stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
233
+ if os.path.exists(temp_out):
234
+ audio = AudioSegment.from_file(temp_out)
235
+
236
+ # قرار دادن در خط زمانی
237
+ start_ms = int(seg['start'] * 1000)
238
+ final_audio = final_audio.overlay(audio, position=start_ms)
239
+
240
+ # ذخیره فایل صوتی نهایی
241
+ final_mix_path = os.path.join(UPLOAD_FOLDER, f"mix_{uuid.uuid4()}.wav")
242
+ final_audio.export(final_mix_path, format="wav")
243
 
244
+ # ترکیب با ویدیو
245
+ final_video_path = os.path.join(UPLOAD_FOLDER, f"dubbed_{uuid.uuid4()}.mp4")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
 
247
+ # دستور FFmpeg: ویدیو اصلی + صدای جدید (جایگزین صدای قبلی)
248
+ cmd = [
249
+ 'ffmpeg', '-y',
250
+ '-i', video_path,
251
+ '-i', final_mix_path,
252
+ '-c:v', 'copy', # کپی ویدیو بدون انکود مجدد (سریع)
253
+ '-c:a', 'aac', # انکود صدا
254
+ '-map', '0:v:0', # تصویر از فایل اول
255
+ '-map', '1:a:0', # صدا از فایل دوم
256
+ '-shortest',
257
+ final_video_path
258
+ ]
259
+ subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
260
+
261
+ return jsonify({"download_url": f"/uploads/{os.path.basename(final_video_path)}"})
262
 
263
+ except Exception as e:
264
+ return jsonify({"error": str(e)}), 500
265
 
266
+ if __name__ == '__main__':
267
+ app.run(host='0.0.0.0', port=7860)