""" MediaTranscriberPro - Hugging Face Space Final Fix for DNS/IPv6 Issues """ # --------------------------------------------------------- # LAYER 1: SYSTEM SOCKET PATCH (Must be at the very top) # --------------------------------------------------------- import socket import os # Force IPv4 for all socket connections old_getaddrinfo = socket.getaddrinfo def new_getaddrinfo(*args, **kwargs): responses = old_getaddrinfo(*args, **kwargs) return [response for response in responses if response[0] == socket.AF_INET] socket.getaddrinfo = new_getaddrinfo # --------------------------------------------------------- import gradio as gr import logging import tempfile import shutil import subprocess import re import yt_dlp from pathlib import Path from dataclasses import dataclass from typing import Optional, Callable # Logging Setup logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) # Constants SUPPORTED_MEDIA = {".mp3", ".wav", ".m4a", ".aac", ".ogg", ".opus", ".flac", ".mp4", ".mkv", ".avi", ".mov", ".webm"} @dataclass class Result: success: bool data: Optional[str] = None file_path: Optional[str] = None error: Optional[str] = None class MediaDownloader: def __init__(self, output_dir): self.output_dir = output_dir self.output_dir.mkdir(parents=True, exist_ok=True) def download(self, url, progress=None): try: if progress: progress(0.1, "Initializing download...") # LAYER 2: YT-DLP SPECIFIC OPTIONS ydl_opts = { 'format': 'bestaudio/best', 'outtmpl': str(self.output_dir / '%(title)s.%(ext)s'), 'noplaylist': True, 'force_ipv4': True, # <--- يجبر المكتبة على استخدام IPv4 'nocheckcertificate': True, # <--- يتجاوز أخطاء SSL 'socket_timeout': 30, # <--- يزيد وقت الانتظار 'quiet': True, 'no_warnings': True, # LAYER 3: USER AGENT SPOOFING 'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36', } with yt_dlp.YoutubeDL(ydl_opts) as ydl: info = ydl.extract_info(url, download=True) filename = ydl.prepare_filename(info) file_path = Path(filename) # Fallback check if filename differs if not file_path.exists(): potential_files = list(self.output_dir.glob("*")) if not potential_files: return Result(False, error="Download finished but file not found.") file_path = max(potential_files, key=lambda x: x.stat().st_mtime) return Result(True, file_path=str(file_path)) except Exception as e: logger.error(f"Download Error: {e}") return Result(False, error=str(e)) class Processor: def __init__(self): self.tmp = Path(tempfile.mkdtemp()) self.downloader = MediaDownloader(self.tmp / "download") # Lazy load whisper to save startup time self.model = None def load_model(self): if not self.model: from faster_whisper import WhisperModel self.model = WhisperModel("medium", device="cpu", compute_type="int8") def run(self, url, upload, lang, progress=gr.Progress()): try: # 1. Acquire Media target_file = None if upload: target_file = Path(upload) elif url: res = self.downloader.download(url, progress) if not res.success: return f"❌ Error: {res.error}", None, None target_file = Path(res.file_path) else: return "Please provide URL or File", None, None # 2. Transcribe progress(0.3, "Loading Model...") self.load_model() progress(0.5, "Transcribing...") lang_code = lang.split("-")[0] segments, _ = self.model.transcribe(str(target_file), language=lang_code, beam_size=5) # Collect result full_text = [] srt_content = [] for i, seg in enumerate(segments, 1): full_text.append(seg.text) # Simple SRT formatting start = f"{int(seg.start//3600):02}:{int((seg.start%3600)//60):02}:{int(seg.start%60):02},000" end = f"{int(seg.end//3600):02}:{int((seg.end%3600)//60):02}:{int(seg.end%60):02},000" srt_content.append(f"{i}\n{start} --> {end}\n{seg.text.strip()}\n") text_str = " ".join(full_text) srt_str = "\n".join(srt_content) # Save files out_txt = self.tmp / "transcript.txt" out_srt = self.tmp / "subs.srt" out_txt.write_text(text_str, encoding="utf-8") out_srt.write_text(srt_str, encoding="utf-8") return f"✅ Done! ({len(text_str)} chars)", str(out_txt), str(out_srt) except Exception as e: return f"❌ Critical Error: {str(e)}", None, None # UI Setup proc = Processor() with gr.Blocks(title="Transcriber Pro") as demo: gr.Markdown("## 🎙️ Media Transcriber Pro (IPv4 Fix)") with gr.Row(): url_in = gr.Textbox(label="YouTube URL") file_in = gr.File(label="Upload File") lang_in = gr.Dropdown(["ar", "en"], value="ar", label="Language") btn = gr.Button("Transcribe", variant="primary") status = gr.Textbox(label="Status") with gr.Row(): f1 = gr.File(label="TXT") f2 = gr.File(label="SRT") btn.click(proc.run, [url_in, file_in, lang_in], [status, f1, f2]) if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860)