import os import tempfile import asyncio from pathlib import Path import gradio as gr from huggingface_hub import InferenceClient import edge_tts from pydub import AudioSegment from langchain_community.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader from langchain_text_splitters import RecursiveCharacterTextSplitter # ================================================================= # 1. UI STYLING & PREMIUM MOVING ANIMATIONS # ================================================================= CUSTOM_CSS = """ .gradio-container { background: #ffffff; background-image: radial-gradient(at 0% 0%, rgba(147, 51, 234, 0.15) 0px, transparent 50%), radial-gradient(at 100% 0%, rgba(249, 115, 22, 0.12) 0px, transparent 50%), radial-gradient(at 100% 100%, rgba(147, 51, 234, 0.15) 0px, transparent 50%), radial-gradient(at 0% 100%, rgba(249, 115, 22, 0.12) 0px, transparent 50%); background-attachment: fixed; animation: meshFlow 20s ease-in-out infinite alternate; min-height: 100vh; overflow-x: hidden; } @keyframes meshFlow { 0% { background-size: 100% 100%; background-position: 0% 0%; } 50% { background-size: 140% 140%; background-position: 50% 50%; } 100% { background-size: 100% 100%; background-position: 100% 100%; } } .glass-panel { background: rgba(255, 255, 255, 0.5) !important; backdrop-filter: blur(25px) saturate(160%); -webkit-backdrop-filter: blur(25px) saturate(160%); border: 1px solid rgba(255, 255, 255, 0.4) !important; border-radius: 28px !important; padding: 30px !important; box-shadow: 0 20px 40px rgba(0, 0, 0, 0.03) !important; transition: all 0.4s cubic-bezier(0.4, 0, 0.2, 1); } .glass-panel:hover { transform: translateY(-8px); background: rgba(255, 255, 255, 0.65) !important; box-shadow: 0 35px 70px rgba(147, 51, 234, 0.12) !important; } .premium-btn { background: linear-gradient(135deg, #f97316 0%, #9333ea 50%, #f97316 100%) !important; background-size: 200% auto !important; border: none !important; color: white !important; font-weight: 800 !important; text-transform: uppercase; letter-spacing: 1px; border-radius: 15px !important; box-shadow: 0 10px 25px rgba(147, 51, 234, 0.35) !important; transition: 0.5s all !important; } .premium-btn:hover { background-position: right center !important; transform: scale(1.04); box-shadow: 0 15px 35px rgba(147, 51, 234, 0.5) !important; } .gradio-container > * { animation: fadeIn 1.2s ease-out; } @keyframes fadeIn { from { opacity: 0; transform: translateY(20px); } to { opacity: 1; transform: translateY(0); } } """ SURAJIT_HF_TOKEN = os.getenv("CLONE_SURAJIT_TOKEN") client = InferenceClient(token=SURAJIT_HF_TOKEN) MODEL_ID = "HuggingFaceH4/zephyr-7b-beta" # ================================================================= # 2. CORE LOGIC # ================================================================= def process_multiple_documents(files) -> str: if not files: return "" combined_text = "" for file in files: ext = Path(file.name).suffix.lower() try: if ext == ".pdf": loader = PyPDFLoader(file.name) elif ext == ".docx": loader = Docx2txtLoader(file.name) else: loader = TextLoader(file.name) docs = loader.load() combined_text += " ".join([d.page_content for d in docs]) + "\n\n" except Exception as e: print(f"Error loading {file.name}: {e}") splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) chunks = splitter.split_text(combined_text) return " ".join(chunks)[:10000] def generate_timed_script(context: str, mode: str, duration: str): duration_map = { "1 Minute (Short)": "approx 150 words", "5 Minutes (Medium)": "approx 750 words", "10 Minutes (Detailed)": "approx 1500 words", "20 Minutes (Deep Dive)": "approx 3000 words" } target_len = duration_map.get(duration, "750 words") messages = [ {"role": "system", "content": f"You are a master scriptwriter. Mode: {mode}. Length: {target_len}. Use 'Host:' and 'Expert:' for dialogue."}, {"role": "user", "content": f"Analyze these documents and write the script:\n\n{context}"} ] response = client.chat_completion(model=MODEL_ID, messages=messages, max_tokens=2500) return response.choices[0].message.content async def create_audio(script: str, mode: str, voice: str, speed: float): rate_val = int((speed - 1.0) * 100) rate_str = f"{rate_val:+d}%" if mode == "Podcast": combined = AudioSegment.empty() for line in script.split('\n'): line = line.strip() if not line: continue # Determine which voice to use if ":" in line: current_voice = voice if "Host" in line else "en-GB-SoniaNeural" text_to_speak = line.split(":", 1)[1].strip() else: # If no colon, Host reads the line instead of skipping it current_voice = voice text_to_speak = line if text_to_speak: communicate = edge_tts.Communicate(text_to_speak, current_voice, rate=rate_str) t_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name await communicate.save(t_path) combined += AudioSegment.from_mp3(t_path) + AudioSegment.silent(duration=600) os.remove(t_path) out = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name combined.export(out, format="mp3") return out else: communicate = edge_tts.Communicate(script, voice, rate=rate_str) tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") await communicate.save(tmp.name) return tmp.name # ================================================================= # 3. INTERFACE # ================================================================= with gr.Blocks() as app: gr.HTML("
