Spaces:

Opera8
/

Geminidub1

Sleeping

App Files Files Community

Opera8 commited on Feb 23

Commit

ed6eb02

verified ·

1 Parent(s): bb89bb8

Update app.py

Browse files

Files changed (1) hide show

app.py +218 -252

app.py CHANGED Viewed

@@ -1,301 +1,267 @@
-import gradio as gr
 import os
-import subprocess
 import json
-import tempfile
-import shutil
 import asyncio
 import requests
 from pydub import AudioSegment
 import yt_dlp
-from google import genai
-from google.genai import types
-# ==========================================
-# لیست گویندگان برای معرفی به هوش مصنوعی
-# ==========================================
-# این متن دقیقاً به پرامپت جیمینای اضافه می‌شود تا بداند چه کسانی را در اختیار دارد
-CAST_PROMPT = """
-AVAILABLE VOICE ACTORS (Use these IDs only):
--- MALE VOICES --
-1. Charon: Powerful, resonant, empathetic.
-2. Achird: Young, energetic.
-3. Zubenelgenubi: Warm, intimate, commanding.
-4. Rasalgethi: News-anchor style, informative.
-5. Sadachbia: Lively, dynamic, happy.
-6. Sadaltager: Serious, urgent, confident.
-7. Alnilam: Epic, deep, sarcastic.
-8. Schedar: Kind, sweet, excited.
-9. Umbriel: Creative, cartoonish.
-10. Algieba: Stylish, classy, rich.
-11. Algenib: Motivational, loud.
-12. Orus: Sporty, commentator style.
-13. Enceladus: Military, strict, shouting.
-14. Iapetus: Tour-guide, bright.
-15. Puck: Playful, childish.
-16. Fenrir: Bold, aggressive, beast-like.
--- FEMALE VOICES --
-1. Zephyr: Gentle, soft, pleasant.
-2. Vindemiatrix: Formal, dignified.
-3. Sulafat: Calm, whispering, motherly.
-4. Laomedeia: Friendly, dramatic.
-5. Achernar: Professional, clear.
-6. Gacrux: Mature, raspy, reliable.
-7. Pulcherrima: Modern, cool, robotic.
-8. Despina: Emotional, poetic.
-9. Erinome: Transparent, expressive.
-10. Aoede: Musical, melodic.
-11. Callirrhoe: Storyteller, fantasy.
-12. Autonoe: Natural, casual.
-13. Kore: Soothing, whispering.
-14. Leda: Classic, old-fashioned.
-"""
-# ==========================================
-# توابع کمکی (دانلود و پردازش)
-# ==========================================
-def download_youtube_video(url, output_path):
-    ydl_opts = {'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best', 'outtmpl': output_path, 'quiet': True, 'no_warnings': True}
-    with yt_dlp.YoutubeDL(ydl_opts) as ydl: ydl.download([url])
-    return output_path
-def extract_audio_from_video(video_path, audio_path):
-    subprocess.run(['ffmpeg', '-i', video_path, '-vn', '-acodec', 'mp3', '-ar', '24000', '-ac', '1', '-b:a', '128k', '-y', audio_path], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True)
-    return audio_path
 def get_video_duration(video_path):
-    result = subprocess.run(['ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', video_path], capture_output=True, text=True, check=True)
-    return float(result.stdout.strip())
-def remove_silence_from_edges(audio_segment, silence_thresh=-45.0, chunk_size=10):
-    if len(audio_segment) < 100: return audio_segment
-    trim_ms = 0
-    while trim_ms < len(audio_segment) and audio_segment[trim_ms:trim_ms+chunk_size].dBFS < silence_thresh: trim_ms += chunk_size
-    start_trim = trim_ms
-    trim_ms = 0
-    while trim_ms < len(audio_segment) and audio_segment[len(audio_segment)-trim_ms-chunk_size:len(audio_segment)-trim_ms].dBFS < silence_thresh: trim_ms += chunk_size
-    end_trim = len(audio_segment) - trim_ms
-    return audio_segment[start_trim:end_trim]
-def adjust_audio_speed_ffmpeg(input_wav, output_wav, target_duration):
     try:
-        audio = AudioSegment.from_file(input_wav)
-        trimmed_audio = remove_silence_from_edges(audio)
-        if len(trimmed_audio) < 50: trimmed_audio = audio
-        temp_trimmed = input_wav.replace(".wav", "_trimmed.wav")
-        trimmed_audio.export(temp_trimmed, format="wav")
-        orig_dur = len(trimmed_audio) / 1000.0
-        if orig_dur <= 0.1 or target_duration <= 0.1:
-            shutil.copy(temp_trimmed, output_wav); return
-        speed_factor = orig_dur / target_duration
-        if speed_factor > 2.0: speed_factor = 2.0
-        if speed_factor < 0.6: speed_factor = 0.6
-        atempo_filters = []
-        current = speed_factor
-        while current > 2.0: atempo_filters.append("atempo=2.0"); current /= 2.0
-        while current < 0.5: atempo_filters.append("atempo=0.5"); current /= 0.5
-        if current != 1.0: atempo_filters.append(f"atempo={current}")
-        if not atempo_filters: shutil.copy(temp_trimmed, output_wav); return
-        subprocess.run(['ffmpeg', '-y', '-i', temp_trimmed, '-filter:a', ",".join(atempo_filters), output_wav], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True)
-    except: shutil.copy(input_wav, output_wav)
-# ==========================================
-# تابع تولید صدا با استفاده از API اسپیس پادکست شما
-# ==========================================
-async def generate_audio_via_podcast_api(text, speaker_name, output_path):
-    def _sync_request():
-        API_URL = "https://ezmarynoori-podgen.hf.space/api/generate"
-        payload = {
-            "text": text,
-            "speaker": speaker_name,
-            "temperature": 0.9,
-            "is_custom": False
-        }
-        try:
-            response = requests.post(API_URL, json=payload, timeout=300)
-            if response.status_code == 200:
-                with open(output_path, 'wb') as f:
-                    f.write(response.content)
-                return True
-            else:
-                print(f"Podcast API Error ({speaker_name}): HTTP {response.status_code} - {response.text}")
-                return False
-        except Exception as e:
-            print(f"Request Exception ({speaker_name}): {e}")
-            return False
-    return await asyncio.to_thread(_sync_request)
-# پردازش تک‌تک قطعات زیرنویس جهت اجرای همزمان (Concurrency)
-async def process_single_subtitle(i, sub, temp_dir, sem):
-    async with sem:
-        assigned_speaker = sub.get('speaker_id', 'Charon')
-        text = sub.get('text', '')
-        start = float(sub.get('start', 0))
-        end = float(sub.get('end', 0))
-        dur = end - start
-        if not text or dur <= 0.1:
-            return None
-        raw_p = os.path.join(temp_dir, f"r_{i}.wav")
-        adj_p = os.path.join(temp_dir, f"a_{i}.wav")
-        success = await generate_audio_via_podcast_api(text, assigned_speaker, raw_p)
-        if success and os.path.exists(raw_p):
-            # اجرای تغییر سرعت در ترد جداگانه برای جلوگیری از مسدود شدن Async Event Loop
-            await asyncio.to_thread(adjust_audio_speed_ffmpeg, raw_p, adj_p, dur)
-            return {
-                "index": i,
-                "adj_p": adj_p,
-                "start": start,
-                "speaker": assigned_speaker
-            }
-        else:
-            print(f"Failed seg {i} with speaker {assigned_speaker}")
-            return None
-# ==========================================
-# هسته اصلی: کارگردان هوشمند
-# ==========================================
-async def process_dubbing(api_key, video_file, youtube_url, target_lang, progress=gr.Progress()):
-    if not api_key: raise gr.Error("کلید API جمینای الزامی است.")
-    if not video_file and not youtube_url: raise gr.Error("ویدیو الزامی است.")
-    temp_dir = tempfile.mkdtemp()
     try:
-        client = genai.Client(http_options={"api_version": "v1beta"}, api_key=api_key)
-        video_path = os.path.join(temp_dir, "input.mp4")
-        audio_path = os.path.join(temp_dir, "source.mp3")
-        final_path = os.path.join(temp_dir, "output.mp4")
-        progress(0.05, desc="دریافت ویدیو...")
-        if youtube_url: download_youtube_video(youtube_url, video_path)
         else:
-            s_path = video_file.name if hasattr(video_file, 'name') else str(video_file)
-            shutil.copy(s_path, video_path)
-        duration = get_video_duration(video_path)
-        progress(0.1, desc="استخراج صدا...")
-        extract_audio_from_video(video_path, audio_path)
-        # --- مرحله هوشمند: تشخیص گوینده و انتخاب نقش (AI Casting) ---
-        progress(0.2, desc="تحلیل ویدیو، تشخیص گویندگان و انتخاب دوبلور مناسب...")
-        gemini_file = client.files.upload(file=audio_path)
         prompt = f"""
-        ROLE: You are an expert Dubbing Director using AI Voice Actors.
         {CAST_PROMPT}
         TASK:
-        1. Listen to the audio and identify different speakers (Male/Female, Tone, Emotion).
-        2. For EACH sentence, select the BEST MATCH from the 'AVAILABLE VOICE ACTORS' list above.
-           - If original speaker is Male -> Pick a Male ID (e.g., Charon, Achird...).
-           - If original speaker is Female -> Pick a Female ID (e.g., Zephyr, Vindemiatrix...).
-           - Match the emotion (e.g., if original is angry, pick Enceladus).
-        3. Translate the text EXACTLY to {target_lang}.
-        4. Output JSON Array.
-        JSON FORMAT:
         [
-          {{
-            "start": 0.0,
-            "end": 3.5,
-            "speaker_id": "Charon",
-            "original_speaker_desc": "Male, Deep voice",
-            "text": "ترجمه دقیق متن به زبان مقصد"
-          }},
-          {{
-            "start": 3.6,
-            "end": 6.0,
-            "speaker_id": "Zephyr",
-            "original_speaker_desc": "Female, Soft voice",
-            "text": "پاسخ زن در ویدیو"
-          }}
         ]
         """
-        transcription = client.models.generate_content(
-            model='gemini-2.5-flash',
-            contents=[gemini_file, prompt],
-            config=types.GenerateContentConfig(response_mime_type="application/json")
-        )
-        try: client.files.delete(name=gemini_file.name)
-        except: pass
-        json_clean = transcription.text.strip().replace("```json", "").replace("```", "")
-        subtitles = json.loads(json_clean)
-        if not subtitles: raise ValueError("زیرنویس خالی است")
-        # --- تولید صدا با گویندگان متغیر به صورت همزمان ---
-        final_track = AudioSegment.silent(duration=int(duration * 1000))
-        total = len(subtitles)
-        ok_cnt = 0
-        sem = asyncio.Semaphore(20) # اجازه اجرای همزمان ۲۰ درخواست به API پادکست
-        tasks = [process_single_subtitle(i, sub, temp_dir, sem) for i, sub in enumerate(subtitles)]
-        completed = 0
-        for coro in asyncio.as_completed(tasks):
-            res = await coro
-            completed += 1
-            progress(0.3 + (0.6 * (completed / total)), desc=f"تولید صداها ({completed} از {total})...")
-            if res is not None:
-                seg = AudioSegment.from_file(res["adj_p"])
-                final_track = final_track.overlay(seg, position=int(res["start"] * 1000))
-                ok_cnt += 1
-        if ok_cnt == 0: raise gr.Error("خطا: صدایی تولید نشد.")
-        progress(0.95, desc="میکس نهایی...")
-        final_audio_p = os.path.join(temp_dir, "final_mix.wav")
-        final_track.export(final_audio_p, format="wav")
-        cmd = ['ffmpeg', '-y', '-i', video_path, '-i', final_audio_p, '-c:v', 'copy', '-c:a', 'aac', '-map', '0:v:0', '-map', '1:a:0', final_path]
-        subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True)
-        return final_path, json.dumps(subtitles, ensure_ascii=False, indent=2)
-    except Exception as e:
-        raise gr.Error(f"Error: {str(e)}")
-# ==========================================
-# رابط کاربری
-# ==========================================
-with gr.Blocks(title="AI Smart Director Dubbing", theme=gr.themes.Soft()) as app:
-    gr.Markdown("""
-    # 🎬 استودیو دوبله هوشمند (AI Director)
-    **قابلیت ویژه:** تشخیص خودکار گوینده‌های ویدیو (زن/مرد) و انتخاب بهترین صدا از بین ۳۰ گوینده حرفه‌ای توسط هوش مصنوعی.
-    """)
-    with gr.Row():
-        with gr.Column():
-            api_key = gr.Textbox(label="کلید API جمینای", type="password")
-            vid = gr.Video(label="فایل ویدیو")
-            url = gr.Textbox(label="لینک یوتیوب")
-            lang = gr.Dropdown(["Persian (فارسی)", "English", "Arabic"], value="Persian (فارسی)", label="زبان مقصد")
-            btn = gr.Button("🚀 شروع دوبله هوشمند", variant="primary")
-        with gr.Column():
-            out_vid = gr.Video(label="ویدیو خروجی")
-            out_log = gr.Code(label="گزارش کستینگ (چه کسی چه گفت؟)", language="json")
-    btn.click(process_dubbing, [api_key, vid, url, lang], [out_vid, out_log])
-if __name__ == "__main__":
-    app.launch(ssr_mode=False)

 import os
 import json
+import uuid
+import time
 import asyncio
+import aiohttp
 import requests
+import subprocess
+import shutil
+from flask import Flask, request, jsonify, send_file, render_template
+from flask_cors import CORS
+from werkzeug.utils import secure_filename
+import google.generativeai as genai
 from pydub import AudioSegment
 import yt_dlp
+app = Flask(__name__, template_folder='templates', static_folder='static')
+CORS(app)
+# تنظیمات مسیرها
+UPLOAD_FOLDER = 'uploads'
+TEMP_AUDIO_FOLDER = 'temp_audio'
+os.makedirs(UPLOAD_FOLDER, exist_ok=True)
+os.makedirs(TEMP_AUDIO_FOLDER, exist_ok=True)
+# آدرس API پادکست شما
+PODCAST_API_URL = "https://ezmarynoori-podgen.hf.space/api/generate"
+# لیست گویندگان برای پرامپت
+CAST_PROMPT = """
+AVAILABLE VOICE ACTORS (Use these IDs only):
+-- MALE --: Charon, Achird, Zubenelgenubi, Rasalgethi, Sadachbia, Sadaltager, Alnilam, Schedar, Umbriel, Algieba, Algenib, Orus, Enceladus, Iapetus, Puck, Fenrir.
+-- FEMALE --: Zephyr, Vindemiatrix, Sulafat, Laomedeia, Achernar, Gacrux, Pulcherrima, Despina, Erinome, Aoede, Callirrhoe, Autonoe, Kore, Leda.
+"""
+# --- توابع کمکی ---
 def get_video_duration(video_path):
+    cmd = ['ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', video_path]
+    return float(subprocess.check_output(cmd).decode('utf-8').strip())
+def extract_audio(video_path):
+    audio_path = video_path.rsplit('.', 1)[0] + '.mp3'
+    subprocess.run(['ffmpeg', '-i', video_path, '-vn', '-acodec', 'mp3', '-y', audio_path],
+                   stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+    return audio_path
+def download_youtube(url):
+    filename = f"{uuid.uuid4()}.mp4"
+    filepath = os.path.join(UPLOAD_FOLDER, filename)
+    ydl_opts = {'format': 'best[ext=mp4]', 'outtmpl': filepath, 'quiet': True}
+    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+        ydl.download([url])
+    return filepath
+async def generate_audio_async(session, text, speaker, index):
+    """تولید صدا به صورت غیرهمزمان از API پادکست"""
     try:
+        payload = {"text": text, "speaker": speaker, "temperature": 0.9}
+        async with session.post(PODCAST_API_URL, json=payload, timeout=300) as resp:
+            if resp.status == 200:
+                audio_data = await resp.read()
+                filename = f"seg_{uuid.uuid4()}.wav"
+                path = os.path.join(TEMP_AUDIO_FOLDER, filename)
+                with open(path, 'wb') as f:
+                    f.write(audio_data)
+                return {"index": index, "status": "success", "file": filename}
+    except Exception as e:
+        print(f"Error gen audio {index}: {e}")
+    return {"index": index, "status": "failed"}
+async def batch_generate_audio(segments):
+    """مدیریت درخواست‌های همزمان (۲۰ تا همزمان)"""
+    sem = asyncio.Semaphore(20)
+    async with aiohttp.ClientSession() as session:
+        tasks = []
+        for i, seg in enumerate(segments):
+            async def task_wrapper(s_idx, s_item):
+                async with sem:
+                    return await generate_audio_async(session, s_item['text'], s_item['speaker_id'], s_idx)
+            tasks.append(task_wrapper(i, seg))
+        return await asyncio.gather(*tasks)
+# --- مسیرهای فلاسک ---
+@app.route('/')
+def index():
+    return render_template('index.html')
+@app.route('/static/<path:filename>')
+def serve_static(filename):
+    return send_file(os.path.join('static', filename))
+@app.route('/uploads/<path:filename>')
+def serve_uploads(filename):
+    return send_file(os.path.join(UPLOAD_FOLDER, filename))
+@app.route('/temp_audio/<path:filename>')
+def serve_audio(filename):
+    return send_file(os.path.join(TEMP_AUDIO_FOLDER, filename))
+@app.route('/api/analyze', methods=['POST'])
+def analyze_video():
+    api_key = request.form.get('api_key')
+    youtube_url = request.form.get('youtube_url')
+    video_file = request.files.get('video_file')
+    target_lang = request.form.get('language', 'Persian')
+    if not api_key: return jsonify({"error": "API Key is required"}), 400
     try:
+        # 1. دریافت ویدیو
+        if youtube_url:
+            video_path = download_youtube(youtube_url)
+        elif video_file:
+            filename = secure_filename(f"{uuid.uuid4()}_{video_file.filename}")
+            video_path = os.path.join(UPLOAD_FOLDER, filename)
+            video_file.save(video_path)
         else:
+            return jsonify({"error": "No video provided"}), 400
+        # 2. استخراج صدا
+        audio_path = extract_audio(video_path)
+        # 3. ارسال به Gemini
+        genai.configure(api_key=api_key)
+        model = genai.GenerativeModel('gemini-2.5-flash')
         prompt = f"""
+        You are a Dubbing Director.
         {CAST_PROMPT}
         TASK:
+        1. Identify speakers in the audio.
+        2. Assign a Voice Actor ID from the list to each segment based on gender/tone.
+        3. Translate the dialogue to {target_lang}.
+        4. Return a JSON Array.
+        Format:
         [
+          {{"start": 0.0, "end": 4.5, "speaker_id": "Charon", "text": "Translated text..."}},
+          ...
         ]
         """
+        audio_file = genai.upload_file(audio_path)
+        while audio_file.state.name == "PROCESSING":
+            time.sleep(1)
+            audio_file = genai.get_file(audio_file.name)
+        res = model.generate_content([prompt, audio_file], generation_config={"response_mime_type": "application/json"})
+        script = json.loads(res.text)
+        # 4. تولید صدای اولیه به صورت همزمان (Batch Processing)
+        results = asyncio.run(batch_generate_audio(script))
+        # اتصال فایل‌های صوتی به اسکریپت
+        for res_item in results:
+            idx = res_item['index']
+            if res_item['status'] == 'success':
+                script[idx]['audio_file'] = res_item['file']
+            else:
+                script[idx]['audio_file'] = None
+        return jsonify({
+            "video_filename": os.path.basename(video_path),
+            "script": script
+        })
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+@app.route('/api/regenerate_segment', methods=['POST'])
+def regenerate_segment():
+    data = request.get_json()
+    text = data.get('text')
+    speaker = data.get('speaker_id')
+    try:
+        # درخواست تکی به API پادکست (Sync request via standard requests lib)
+        resp = requests.post(PODCAST_API_URL, json={"text": text, "speaker": speaker, "temperature": 0.9})
+        if resp.status_code == 200:
+            filename = f"seg_{uuid.uuid4()}.wav"
+            path = os.path.join(TEMP_AUDIO_FOLDER, filename)
+            with open(path, 'wb') as f:
+                f.write(resp.content)
+            return jsonify({"audio_file": filename})
+        else:
+            return jsonify({"error": "Generation failed"}), 500
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+@app.route('/api/render_final', methods=['POST'])
+def render_final():
+    data = request.get_json()
+    video_filename = data.get('video_filename')
+    script = data.get('script')
+    video_path = os.path.join(UPLOAD_FOLDER, video_filename)
+    if not os.path.exists(video_path): return jsonify({"error": "Video not found"}), 404
+    try:
+        # میکس صداها
+        video_duration = get_video_duration(video_path)
+        final_audio = AudioSegment.silent(duration=int(video_duration * 1000))
+        for seg in script:
+            if not seg.get('audio_file'): continue
+            seg_path = os.path.join(TEMP_AUDIO_FOLDER, seg['audio_file'])
+            if not os.path.exists(seg_path): continue
+            # تنظیم سرعت (Time Stretch)
+            audio = AudioSegment.from_file(seg_path)
+            target_dur_ms = (seg['end'] - seg['start']) * 1000
+            current_dur_ms = len(audio)
+            if current_dur_ms > 0:
+                # اگر تفاوت زمان زیاد بود، سرعت را تغییر بده (محدودیت بین 0.5 تا 2.0 برابر)
+                speed = current_dur_ms / target_dur_ms
+                speed = max(0.6, min(2.0, speed))
+                if abs(speed - 1.0) > 0.05:
+                    # تغییر سرعت با FFmpeg چون کیفیت بهتری از pydub دارد
+                    temp_out = seg_path.replace('.wav', '_speed.wav')
+                    atempo = f"atempo={speed}"
+                    # برای سرعت‌های خیلی زیاد یا کم نیاز به زنجیره فیلتر است
+                    if speed > 2.0: atempo = "atempo=2.0,atempo={}".format(speed/2)
+                    elif speed < 0.5: atempo = "atempo=0.5,atempo={}".format(speed/0.5)
+                    subprocess.run(['ffmpeg', '-y', '-i', seg_path, '-filter:a', atempo, temp_out],
+                                   stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+                    if os.path.exists(temp_out):
+                        audio = AudioSegment.from_file(temp_out)
+            # قرار دادن در خط زمانی
+            start_ms = int(seg['start'] * 1000)
+            final_audio = final_audio.overlay(audio, position=start_ms)
+        # ذخیره فایل صوتی نهایی
+        final_mix_path = os.path.join(UPLOAD_FOLDER, f"mix_{uuid.uuid4()}.wav")
+        final_audio.export(final_mix_path, format="wav")
+        # ترکیب با ویدیو
+        final_video_path = os.path.join(UPLOAD_FOLDER, f"dubbed_{uuid.uuid4()}.mp4")
+        # دستور FFmpeg: ویدیو اصلی + صدای جدید (جایگزین صدای قبلی)
+        cmd = [
+            'ffmpeg', '-y',
+            '-i', video_path,
+            '-i', final_mix_path,
+            '-c:v', 'copy',      # کپی ویدیو بدون انکود مجدد (سریع)
+            '-c:a', 'aac',       # انکود صدا
+            '-map', '0:v:0',     # تصویر از فایل اول
+            '-map', '1:a:0',     # صدا از فایل دوم
+            '-shortest',
+            final_video_path
+        ]
+        subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+        return jsonify({"download_url": f"/uploads/{os.path.basename(final_video_path)}"})
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+if __name__ == '__main__':
+    app.run(host='0.0.0.0', port=7860)