Spaces:

suprimedev
/

hfvd

Running

App Files Files Community

suprimedev commited on Nov 4, 2025

Commit

4881f89

verified ·

1 Parent(s): 5abdca1

Update app.py

Browse files

Files changed (1) hide show

app.py +267 -219

app.py CHANGED Viewed

@@ -1,304 +1,352 @@
-# ─────────────────────────────────────────────────────────────────────
-#  File:  app.py
-#  ─────────────────────────────────────────────────────────────────────
-#  این اسکریپت دو نقش دارد
-#   1. یک UI Gradio برای تبدیل ویدیو به MP3 و استخراج متن
-#   2. یک API ساده (Flask) برای دریافت وضعیت و نهایی‌سازی خروجی
-#   3. نگه‌داری یک دیکشنری  `session_map`  به‌صورت thread‑safe
-#      تا `token` تولید‌شده در PHP را با `session_hash` Gradio مطابقت دهیم
-# ─────────────────────────────────────────────────────────────────────
 import os
-import uuid
-import json
 import shutil
-import tempfile
-import warnings
-import time
-import threading
-import yt_dlp
 import speech_recognition as sr
 from pydub import AudioSegment
-from typing import Tuple, Optional
-from flask import Flask, request, jsonify
-import gradio as gr
 warnings.filterwarnings("ignore")
-# ------------------------------------------------------------
-# ۱. نگه‌داری mapping token ↔︎ session_hash
-# ------------------------------------------------------------
-session_map = {}          # {token : session_hash}
-session_lock = threading.Lock()
-# ------------------------------------------------------------
-# ۲. توابع اصلی تبدیل/تبدیل‌تکست
-# ------------------------------------------------------------
-def language_display(lang_code: str) -> str:
-    return {"fa-IR":"پارسی","en-US":"انگلیسی"}.get(lang_code, lang_code)
-def transcribe_audio(mp3_path, progress, language,
-                     chunk_length_ms=60000, overlap_ms=5000) -> Tuple[Optional[str], str]:
     recognizer = sr.Recognizer()
     recognizer.energy_threshold = 300
     recognizer.dynamic_energy_threshold = True
     recognizer.pause_threshold = 0.8
     full_text = []
     bad_chunks = 0
     total_chunks = 0
     temp_wav_dir = tempfile.mkdtemp()
     audio = AudioSegment.from_mp3(mp3_path)
     duration_ms = len(audio)
     if duration_ms == 0:
         return None, "فایل صوتی خالی یا بدون صدا."
     step_size = chunk_length_ms - overlap_ms
     if step_size <= 0:
         step_size = chunk_length_ms // 2
     num_chunks = max(1, (duration_ms // step_size) + 1)
-    progress(0.5, desc=f"تقسیم به {num_chunks} chunk 60s …")
     i = 0
     chunk_idx = 1
     while i < duration_ms:
         end_pos = min(i + chunk_length_ms, duration_ms)
         chunk = audio[i:end_pos]
-        if len(chunk) < 3000:          # کوتاه‌تر از 3 ثانیه
             break
         temp_wav = os.path.join(temp_wav_dir, f"chunk_{i}.wav")
         try:
             chunk.export(temp_wav, format="wav")
             text_chunk = None
             retry_count = 0
             max_retries = 3
             while retry_count < max_retries:
                 try:
-                    progress(0.5 + (i / duration_ms) * 0.5,
-                             desc=f"Chunk {chunk_idx}/{num_chunks} ({i/1000:.0f}-{end_pos/1000:.0f}s) …")
                     with sr.AudioFile(temp_wav) as source:
                         recognizer.adjust_for_ambient_noise(source, duration=0.5)
                         audio_data = recognizer.record(source, duration=None)
                     text = recognizer.recognize_google(audio_data, language=language)
                     if text.strip():
                         text_chunk = text
                         break
                     else:
                         text_chunk = "[سکوت]"
                         break
                 except sr.UnknownValueError:
                     text_chunk = "[نامشخص]"
                     break
                 except sr.RequestError as e:
                     retry_count += 1
-                    time.sleep(2)
-                    if retry_count == max_retries:
-                        text_chunk = f"[خطا rate limit: {str(e)[:30]}…]"
                         bad_chunks += 1
             if text_chunk:
                 full_text.append(text_chunk)
                 if "[نامشخص" in text_chunk or "[خطا" in text_chunk:
                     bad_chunks += 1
             total_chunks += 1
         except Exception as chunk_e:
-            text_chunk = f"[خطا chunk: {str(chunk_e)[:30]}…]"
             full_text.append(text_chunk)
             bad_chunks += 1
             total_chunks += 1
-        finally:
-            if os.path.exists(temp_wav):
-                os.remove(temp_wav)
         i += step_size
         chunk_idx += 1
     shutil.rmtree(temp_wav_dir, ignore_errors=True)
     final_text = ' '.join(full_text).strip()
     if not final_text:
-        return None, "هیچ chunk موفقی نبود."
-    bad_ratio = bad_chunks / total_chunks if total_chunks else 1
     if bad_ratio > 0.7:
-        return None, f"بیش از 70% chunkها fail ({bad_ratio*100:.0f}%)"
-    return final_text, ""
-def convert_to_mp3_and_transcribe(video_url: str, language: str,
-                                  progress=gr.Progress()) -> Tuple[Optional[str], Optional[str], str]:
-    if not video_url:
-        return None, None, "لینک ویدیو را وارد کنید."
-    progress(0, desc="شروع دانلود…")
-    # ۱. دانلود
-    ydl_opts = {
-        'format': 'bestaudio[ext=m4a]/bestaudio/best',
-        'postprocessors': [{
-            'key': 'FFmpegExtractAudio',
-            'preferredcodec': 'mp3',
-            'preferredquality': '192',
-        }],
-        'outtmpl': 'temp.%(ext)s',
-        'quiet': True,
     }
-    try:
-        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-            ydl.download([video_url])
-    except Exception as e:
-        return None, None, f"خطا در دانلود: {e}"
-    # ۲. پیدا کردن فایل MP3
-    mp3_file = None
-    for f in os.listdir('.'):
-        if f.startswith('temp.') and f.endswith('.mp3'):
-            mp3_file = f
-            break
-    if not mp3_file:
-        return None, None, "فایل mp3 پیدا نشد."
-    # ۳. copy to temp
-    temp_mp3 = tempfile.NamedTemporaryFile(suffix='.mp3', delete=False)
-    temp_mp3.close()
-    shutil.copy2(mp3_file, temp_mp3.name)
-    os.remove(mp3_file)
-    progress(0.5, desc="MP3 آماده… شروع استخراج متن")
-    text, error_msg = transcribe_audio(temp_mp3.name, progress, language)
-    if text is None:
-        return temp_mp3.name, None, f"MP3 آماده است، ولی استخراج متن شکست خورد: {error_msg}"
-    progress(1.0, desc="استخراج متن کامل شد")
-    return temp_mp3.name, text, f"موفق! زبان: {language_display(language)}. {len(text.split())} کلمه استخراج شد."
-# ------------------------------------------------------------
-# ۳. Gradio Interface
-# ------------------------------------------------------------
 iface = gr.Interface(
     fn=convert_to_mp3_and_transcribe,
     inputs=[
-        gr.Textbox(label="لینک ویدیو (یوتیوب یا MP4 مستقیم)",
-                   placeholder="https://www.youtube.com/watch?v=…"),
-        gr.Dropdown(choices=[("پارسی","fa-IR"),("انگلیسی","en-US")],
-                    value="fa-IR", label="زبان")
     ],
     outputs=[
         gr.File(label="دانلود MP3"),
-        gr.Textbox(label="متن استخراج‌شده", lines=10),
-        gr.Textbox(label="پیام وضعیت")
     ],
-    title="تبدیل ویدیو به MP3 و استخراج متن",
-    description="لینک ویدیو را وارد کنید. برای پارسی: fa-IR، برای انگلیسی: en-US.",
     examples=[
-        ["https://www.youtube.com/watch?v=5qap5aO4i9A","fa-IR"],
-        ["https://www.youtube.com/watch?v=dQw4w9WgXcQ","en-US"]
     ],
     allow_flagging="never",
     cache_examples=False
 )
-# ------------------------------------------------------------
-# ۴. API (Flask) برای گرفتن status بر مبنای token
-# ------------------------------------------------------------
-app = Flask(__name__)
-@app.route('/api/status', methods=['POST'])
-def api_status():
-    """
-    ورودی: {"token":"<uuid>"}
-    خروجی: {status:0/1, transcript:"..."(در صورت تکمیل)}
-    """
-    data = request.get_json()
-    if not data or "token" not in data:
-        return jsonify({"status":0,"error":"token missing"}),400
-    token = data["token"]
-    with session_lock:
-        session_hash = session_map.get(token)
-    if not session_hash:
-        return jsonify({"status":0,"error":"unknown token"}),400
-    # query the gradio queue
-    import requests
-    sse_url = f"https://suprimedev-hfvd.hf.space/gradio_api/queue/data?session_hash={session_hash}"
-    resp = requests.get(sse_url, headers={
-        "Accept":"text/event-stream",
-        "Connection":"keep-alive"
-    })
-    if resp.status_code != 200:
-        return jsonify({"status":0,"error":"failed to fetch"}),500
-    frames = parse_sse(resp.text)
-    for frame in frames:
-        if frame.get('data',{}).get('msg') == 'process_completed':
-            out = frame['data'].get('output',{}).get('data')
-            if isinstance(out, list) and len(out)>=2:
-                transcript = out[1]
-                # Update DB outside – not done here (PHP handles)
-                return jsonify({"status":1,"transcript":transcript})
-    return jsonify({"status":0,"error":"not finished yet"})
-def parse_sse(sse_text: str):
-    frames = []
-    current = {}
-    for line in sse_text.splitlines():
-        if line == '':
-            if current:
-                frames.append(current)
-                current={}
-            continue
-        if line.startswith('event:'):
-            current['event']=line[6:].strip()
-        elif line.startswith('id:'):
-            current['id']=line[3:].strip()
-        elif line.startswith('data:'):
-            current['data']=line[5:].strip()
-    if current:
-        frames.append(current)
-    for frame in frames:
-        if 'data' in frame:
-            try:
-                frame['data']=json.loads(frame['data'])
-            except:
-                pass
-    return frames
-# ------------------------------------------------------------
-# ۵. راه‌اندازی Gradio + API
-# ------------------------------------------------------------
 if __name__ == "__main__":
-    # در اینجا 2 سرویس به هم‌زمان اجرا می‌شوند
-    # 1. Gradio (به‌صورت normal)
-    # 2. Flask (در پورت 5001)
-    from multiprocessing import Process
-    def run_gradio():
-        iface.launch(server_name="0.0.0.0", server_port=7860, share=False)
-    p1 = Process(target=run_gradio)
-    p1.start()
-    # Flask
-    app.run(host="0.0.0.0", port=5001)

+import gradio as gr
+import yt_dlp
+import tempfile
 import os
 import shutil
 import speech_recognition as sr
 from pydub import AudioSegment
+import time
+import warnings
+import json
+from datetime import datetime, timedelta
+import threading
+import hashlib
 warnings.filterwarnings("ignore")
+# ذخیره‌سازی نتایج برای جلوگیری از مشکل session
+results_cache = {}
+cache_lock = threading.Lock()
+# تابع برای پاکسازی cache قدیمی (بیش از 24 ساعت)
+def cleanup_old_cache():
+    while True:
+        time.sleep(3600)  # هر ساعت چک کن
+        with cache_lock:
+            current_time = datetime.now()
+            keys_to_remove = []
+            for key, value in results_cache.items():
+                if current_time - value['timestamp'] > timedelta(hours=24):
+                    keys_to_remove.append(key)
+            for key in keys_to_remove:
+                del results_cache[key]
+            if keys_to_remove:
+                print(f"[CACHE] {len(keys_to_remove)} نتیجه قدیمی پاک شد.")
+# شروع thread پاکسازی
+cleanup_thread = threading.Thread(target=cleanup_old_cache, daemon=True)
+cleanup_thread.start()
+def get_cache_key(video_url, language):
+    """ایجاد کلید یکتا برای cache بر اساس URL و زبان"""
+    return hashlib.md5(f"{video_url}_{language}".encode()).hexdigest()
+def save_result_to_cache(video_url, language, mp3_path, text, status_msg):
+    """ذخیره نتیجه در cache"""
+    cache_key = get_cache_key(video_url, language)
+    # کپی فایل MP3 به مکان دائمی
+    if mp3_path and os.path.exists(mp3_path):
+        cache_mp3_path = f"cache_{cache_key}.mp3"
+        shutil.copy2(mp3_path, cache_mp3_path)
+    else:
+        cache_mp3_path = None
+    with cache_lock:
+        results_cache[cache_key] = {
+            'timestamp': datetime.now(),
+            'mp3_path': cache_mp3_path,
+            'text': text,
+            'status_msg': status_msg,
+            'video_url': video_url,
+            'language': language
+        }
+    print(f"[CACHE] نتیجه ذخیره شد: {cache_key}")
+    return cache_key
+def get_result_from_cache(cache_key):
+    """دریافت نتیجه از cache"""
+    with cache_lock:
+        if cache_key in results_cache:
+            result = results_cache[cache_key]
+            print(f"[CACHE] نتیجه یافت شد: {cache_key}")
+            return result['mp3_path'], result['text'], result['status_msg']
+    return None, None, None
+def convert_to_mp3_and_transcribe(video_url, language, use_cache=True):
+    """
+    دانلود ویدیو/صوت از لینک، تبدیل به MP3، و استخراج متن با Google Speech Recognition.
+    """
+    if not video_url:
+        return None, None, "لینک ویدیو را وارد کنید.", None
+    # چک کردن cache
+    cache_key = get_cache_key(video_url, language)
+    if use_cache:
+        cached_mp3, cached_text, cached_status = get_result_from_cache(cache_key)
+        if cached_mp3 is not None or cached_text is not None:
+            return cached_mp3, cached_text, f"[از حافظه] {cached_status}", cache_key
+    progress = gr.Progress(track_tqdm=False)
+    try:
+        print(f"[DEBUG] شروع پردازش لینک: {video_url} (زبان: {language})")
+        progress(0, desc="شروع دانلود...")
+        # دانلود و تبدیل به MP3
+        ydl_opts = {
+            'format': 'bestaudio[ext=m4a]/bestaudio/best',
+            'postprocessors': [{
+                'key': 'FFmpegExtractAudio',
+                'preferredcodec': 'mp3',
+                'preferredquality': '192',
+            }],
+            'outtmpl': 'temp.%(ext)s',
+            'quiet': True,
+        }
+        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            ydl.download([video_url])
+        print("[DEBUG] دانلود کامل شد.")
+        # پیدا کردن MP3
+        mp3_file = None
+        for file in os.listdir('.'):
+            if file.startswith('temp.') and file.endswith('.mp3'):
+                mp3_file = file
+                break
+        if not mp3_file:
+            print("[DEBUG] هیچ MP3 پیدا نشد.")
+            status_msg = "خطا در دانلود یا تبدیل. مطمئن شوید لینک معتبر است."
+            save_result_to_cache(video_url, language, None, None, status_msg)
+            return None, None, status_msg, cache_key
+        progress(0.3, desc="دانلود کامل. کپی MP3...")
+        # کپی به موقت
+        temp_mp3 = tempfile.NamedTemporaryFile(suffix='.mp3', delete=False)
+        temp_mp3.close()
+        shutil.copy2(mp3_file, temp_mp3.name)
+        if os.path.exists(mp3_file):
+            os.remove(mp3_file)
+        print(f"[DEBUG] MP3 آماده: {temp_mp3.name}, اندازه: {os.path.getsize(temp_mp3.name)/1024:.1f} KB")
+        progress(0.5, desc="MP3 آماده. شروع استخراج متن با Google...")
+        # استخراج متن
+        text, error_msg = transcribe_audio(temp_mp3.name, progress, language)
+        if text is None:
+            status_msg = f"MP3 آماده است، اما استخراج متن fail شد. {error_msg}"
+            save_result_to_cache(video_url, language, temp_mp3.name, None, status_msg)
+            return temp_mp3.name, None, status_msg, cache_key
+        progress(1.0, desc="استخراج متن کامل شد!")
+        print(f"[DEBUG] متن نهایی (اولین 100 کاراکتر): {text[:100]}...")
+        status_msg = f"موفق! زبان: {language_display(language)}. {len(text.split())} کلمه استخراج شد."
+        save_result_to_cache(video_url, language, temp_mp3.name, text, status_msg)
+        return temp_mp3.name, text, status_msg, cache_key
+    except Exception as e:
+        print(f"[ERROR] خطای کلی: {str(e)}")
+        status_msg = f"خطا کلی: {str(e)}"
+        save_result_to_cache(video_url, language, None, None, status_msg)
+        return None, None, status_msg, cache_key
+def language_display(lang_code):
+    """نمایش نام زبان"""
+    if lang_code == 'fa-IR':
+        return "پارسی"
+    elif lang_code == 'en-US':
+        return "انگلیسی"
+    else:
+        return lang_code
+def transcribe_audio(mp3_path, progress, language, chunk_length_ms=60000, overlap_ms=5000):
+    """
+    استخراج متن با Google STT + retry برای rate limit. chunk 60s.
+    """
     recognizer = sr.Recognizer()
     recognizer.energy_threshold = 300
     recognizer.dynamic_energy_threshold = True
     recognizer.pause_threshold = 0.8
     full_text = []
     bad_chunks = 0
     total_chunks = 0
     temp_wav_dir = tempfile.mkdtemp()
     audio = AudioSegment.from_mp3(mp3_path)
     duration_ms = len(audio)
     if duration_ms == 0:
+        print("[DEBUG] فایل صوتی خالی!")
         return None, "فایل صوتی خالی یا بدون صدا."
     step_size = chunk_length_ms - overlap_ms
     if step_size <= 0:
         step_size = chunk_length_ms // 2
     num_chunks = max(1, (duration_ms // step_size) + 1)
+    print(f"[DEBUG] مدت: {duration_ms/1000:.1f}s, chunkها: {num_chunks}, گام: {step_size/1000:.1f}s")
+    progress(0.5, desc=f"تقسیم به {num_chunks} chunk 60s (زبان: {language_display(language)})...")
     i = 0
     chunk_idx = 1
     while i < duration_ms:
         end_pos = min(i + chunk_length_ms, duration_ms)
         chunk = audio[i:end_pos]
+        if len(chunk) < 3000:
+            print(f"[DEBUG] Chunk {chunk_idx} خیلی کوتاه ({len(chunk)/1000}s), رد شد.")
             break
         temp_wav = os.path.join(temp_wav_dir, f"chunk_{i}.wav")
         try:
             chunk.export(temp_wav, format="wav")
+            print(f"[DEBUG] Chunk {chunk_idx} export شد: {temp_wav}")
             text_chunk = None
             retry_count = 0
             max_retries = 3
             while retry_count < max_retries:
                 try:
+                    progress(0.5 + (i / duration_ms) * 0.5, desc=f"Chunk {chunk_idx}/{num_chunks} ({(i/1000):.0f}-{end_pos/1000:.0f}s, retry {retry_count+1})...")
                     with sr.AudioFile(temp_wav) as source:
                         recognizer.adjust_for_ambient_noise(source, duration=0.5)
                         audio_data = recognizer.record(source, duration=None)
                     text = recognizer.recognize_google(audio_data, language=language)
                     if text.strip():
                         text_chunk = text
+                        print(f"[DEBUG] Chunk {chunk_idx} موفق: {text[:50]}...")
                         break
                     else:
                         text_chunk = "[سکوت]"
+                        print(f"[DEBUG] Chunk {chunk_idx} سکوت.")
                         break
                 except sr.UnknownValueError:
                     text_chunk = "[نامشخص]"
+                    print(f"[DEBUG] Chunk {chunk_idx} نامشخص (نویز/سکوت).")
                     break
                 except sr.RequestError as e:
                     retry_count += 1
+                    print(f"[DEBUG] Chunk {chunk_idx} RequestError (rate limit?): {str(e)}. Retry {retry_count}/{max_retries}")
+                    if retry_count < max_retries:
+                        time.sleep(2)
+                    else:
+                        text_chunk = f"[خطا rate limit: {str(e)[:30]}...]"
                         bad_chunks += 1
+                        print(f"[DEBUG] Chunk {chunk_idx} fail پس از retryها.")
+                        break
             if text_chunk:
                 full_text.append(text_chunk)
                 if "[نامشخص" in text_chunk or "[خطا" in text_chunk:
                     bad_chunks += 1
             total_chunks += 1
         except Exception as chunk_e:
+            print(f"[ERROR] Chunk {chunk_idx} (خطای کلی): {str(chunk_e)}")
+            text_chunk = f"[خطا chunk: {str(chunk_e)[:30]}...]"
             full_text.append(text_chunk)
             bad_chunks += 1
             total_chunks += 1
+        if os.path.exists(temp_wav):
+            os.remove(temp_wav)
         i += step_size
         chunk_idx += 1
     shutil.rmtree(temp_wav_dir, ignore_errors=True)
     final_text = ' '.join(full_text).strip()
+    error_msg = ""
     if not final_text:
+        error_msg = "هیچ chunk موفقی نبود."
+        return None, error_msg
+    bad_ratio = bad_chunks / total_chunks if total_chunks > 0 else 1
     if bad_ratio > 0.7:
+        error_msg = f"بیش از 70% chunkها fail ({bad_ratio*100:.0f}%). ممکن است نویز باشد، rate limit گوگل، یا زبان اشتباه انتخاب شده."
+        return None, error_msg
+    print(f"[DEBUG] {total_chunks} chunk پردازش شد, {bad_chunks} بد.")
+    return final_text, error_msg
+# API endpoint برای دریافت نتیجه با cache key
+def get_cached_result(cache_key):
+    """API برای دریافت نتیجه ذخیره شده"""
+    mp3, text, status = get_result_from_cache(cache_key)
+    return {
+        "cache_key": cache_key,
+        "mp3_available": mp3 is not None,
+        "text": text,
+        "status": status
     }
+# رابط Gradio با خروجی cache key
 iface = gr.Interface(
     fn=convert_to_mp3_and_transcribe,
     inputs=[
+        gr.Textbox(
+            label="لینک ویدیو (یوتیوب یا MP4 مستقیم)",
+            placeholder="https://www.youtube.com/watch?v=... یا https://example.com/video.mp4"
+        ),
+        gr.Dropdown(
+            choices=[
+                ("پارسی", "fa-IR"),
+                ("انگلیسی", "en-US"),
+            ],
+            value="fa-IR",
+            label="زبان متن"
+        ),
+        gr.Checkbox(
+            label="استفاده از حافظه cache",
+            value=True,
+            visible=False  # مخفی کنیم چون همیشه فعال است
+        )
     ],
     outputs=[
         gr.File(label="دانلود MP3"),
+        gr.Textbox(label="متن استخراج‌شده (Google STT)", lines=10),
+        gr.Textbox(label="پیام وضعیت"),
+        gr.Textbox(label="Cache Key (برای دریافت مجدد نتیجه)", visible=True)
     ],
+    title="تبدیل ویدیو به MP3 و استخراج متن (Google STT) - با Cache",
+    description="لینک ویدیو را وارد کنید و زبان را انتخاب کنید. نتایج برای 24 ساعت ذخیره می‌شوند.",
     examples=[
+        ["https://www.youtube.com/watch?v=5qap5aO4i9A", "fa-IR"],
+        ["https://www.youtube.com/watch?v=dQw4w9WgXcQ", "en-US"]
     ],
     allow_flagging="never",
     cache_examples=False
 )
+# اضافه کردن API route برای دسترسی به cache
+with gr.Blocks() as demo:
+    iface.render()
+    # API endpoint مخفی
+    @gr.route("/api/get_result/{cache_key}")
+    def api_get_result(cache_key):
+        return get_cached_result(cache_key)
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860, share=False)