TransCree / app.py
ArtSpace's picture
Update app.py
3412e09 verified
"""
MediaTranscriberPro - Hugging Face Space
Final Fix for DNS/IPv6 Issues
"""
# ---------------------------------------------------------
# LAYER 1: SYSTEM SOCKET PATCH (Must be at the very top)
# ---------------------------------------------------------
import socket
import os
# Force IPv4 for all socket connections
old_getaddrinfo = socket.getaddrinfo
def new_getaddrinfo(*args, **kwargs):
responses = old_getaddrinfo(*args, **kwargs)
return [response for response in responses if response[0] == socket.AF_INET]
socket.getaddrinfo = new_getaddrinfo
# ---------------------------------------------------------
import gradio as gr
import logging
import tempfile
import shutil
import subprocess
import re
import yt_dlp
from pathlib import Path
from dataclasses import dataclass
from typing import Optional, Callable
# Logging Setup
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
# Constants
SUPPORTED_MEDIA = {".mp3", ".wav", ".m4a", ".aac", ".ogg", ".opus", ".flac", ".mp4", ".mkv", ".avi", ".mov", ".webm"}
@dataclass
class Result:
success: bool
data: Optional[str] = None
file_path: Optional[str] = None
error: Optional[str] = None
class MediaDownloader:
def __init__(self, output_dir):
self.output_dir = output_dir
self.output_dir.mkdir(parents=True, exist_ok=True)
def download(self, url, progress=None):
try:
if progress: progress(0.1, "Initializing download...")
# LAYER 2: YT-DLP SPECIFIC OPTIONS
ydl_opts = {
'format': 'bestaudio/best',
'outtmpl': str(self.output_dir / '%(title)s.%(ext)s'),
'noplaylist': True,
'force_ipv4': True, # <--- يجبر المكتبة على استخدام IPv4
'nocheckcertificate': True, # <--- يتجاوز أخطاء SSL
'socket_timeout': 30, # <--- يزيد وقت الانتظار
'quiet': True,
'no_warnings': True,
# LAYER 3: USER AGENT SPOOFING
'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36',
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(url, download=True)
filename = ydl.prepare_filename(info)
file_path = Path(filename)
# Fallback check if filename differs
if not file_path.exists():
potential_files = list(self.output_dir.glob("*"))
if not potential_files:
return Result(False, error="Download finished but file not found.")
file_path = max(potential_files, key=lambda x: x.stat().st_mtime)
return Result(True, file_path=str(file_path))
except Exception as e:
logger.error(f"Download Error: {e}")
return Result(False, error=str(e))
class Processor:
def __init__(self):
self.tmp = Path(tempfile.mkdtemp())
self.downloader = MediaDownloader(self.tmp / "download")
# Lazy load whisper to save startup time
self.model = None
def load_model(self):
if not self.model:
from faster_whisper import WhisperModel
self.model = WhisperModel("medium", device="cpu", compute_type="int8")
def run(self, url, upload, lang, progress=gr.Progress()):
try:
# 1. Acquire Media
target_file = None
if upload:
target_file = Path(upload)
elif url:
res = self.downloader.download(url, progress)
if not res.success: return f"❌ Error: {res.error}", None, None
target_file = Path(res.file_path)
else:
return "Please provide URL or File", None, None
# 2. Transcribe
progress(0.3, "Loading Model...")
self.load_model()
progress(0.5, "Transcribing...")
lang_code = lang.split("-")[0]
segments, _ = self.model.transcribe(str(target_file), language=lang_code, beam_size=5)
# Collect result
full_text = []
srt_content = []
for i, seg in enumerate(segments, 1):
full_text.append(seg.text)
# Simple SRT formatting
start = f"{int(seg.start//3600):02}:{int((seg.start%3600)//60):02}:{int(seg.start%60):02},000"
end = f"{int(seg.end//3600):02}:{int((seg.end%3600)//60):02}:{int(seg.end%60):02},000"
srt_content.append(f"{i}\n{start} --> {end}\n{seg.text.strip()}\n")
text_str = " ".join(full_text)
srt_str = "\n".join(srt_content)
# Save files
out_txt = self.tmp / "transcript.txt"
out_srt = self.tmp / "subs.srt"
out_txt.write_text(text_str, encoding="utf-8")
out_srt.write_text(srt_str, encoding="utf-8")
return f"✅ Done! ({len(text_str)} chars)", str(out_txt), str(out_srt)
except Exception as e:
return f"❌ Critical Error: {str(e)}", None, None
# UI Setup
proc = Processor()
with gr.Blocks(title="Transcriber Pro") as demo:
gr.Markdown("## 🎙️ Media Transcriber Pro (IPv4 Fix)")
with gr.Row():
url_in = gr.Textbox(label="YouTube URL")
file_in = gr.File(label="Upload File")
lang_in = gr.Dropdown(["ar", "en"], value="ar", label="Language")
btn = gr.Button("Transcribe", variant="primary")
status = gr.Textbox(label="Status")
with gr.Row():
f1 = gr.File(label="TXT")
f2 = gr.File(label="SRT")
btn.click(proc.run, [url_in, file_in, lang_in], [status, f1, f2])
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)