import os import tempfile import asyncio from pathlib import Path import gradio as gr from huggingface_hub import InferenceClient import edge_tts from pydub import AudioSegment from langchain_community.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader from langchain_text_splitters import RecursiveCharacterTextSplitter # ================================================================= # 1. UI STYLING & PREMIUM MOVING ANIMATIONS # ================================================================= CUSTOM_CSS = """ .gradio-container { background: #ffffff; background-image: radial-gradient(at 0% 0%, rgba(147, 51, 234, 0.15) 0px, transparent 50%), radial-gradient(at 100% 0%, rgba(249, 115, 22, 0.12) 0px, transparent 50%), radial-gradient(at 100% 100%, rgba(147, 51, 234, 0.15) 0px, transparent 50%), radial-gradient(at 0% 100%, rgba(249, 115, 22, 0.12) 0px, transparent 50%); background-attachment: fixed; animation: meshFlow 20s ease-in-out infinite alternate; min-height: 100vh; overflow-x: hidden; } @keyframes meshFlow { 0% { background-size: 100% 100%; background-position: 0% 0%; } 50% { background-size: 140% 140%; background-position: 50% 50%; } 100% { background-size: 100% 100%; background-position: 100% 100%; } } .glass-panel { background: rgba(255, 255, 255, 0.5) !important; backdrop-filter: blur(25px) saturate(160%); -webkit-backdrop-filter: blur(25px) saturate(160%); border: 1px solid rgba(255, 255, 255, 0.4) !important; border-radius: 28px !important; padding: 30px !important; box-shadow: 0 20px 40px rgba(0, 0, 0, 0.03) !important; transition: all 0.4s cubic-bezier(0.4, 0, 0.2, 1); } .glass-panel:hover { transform: translateY(-8px); background: rgba(255, 255, 255, 0.65) !important; box-shadow: 0 35px 70px rgba(147, 51, 234, 0.12) !important; } .premium-btn { background: linear-gradient(135deg, #f97316 0%, #9333ea 50%, #f97316 100%) !important; background-size: 200% auto !important; border: none !important; color: white !important; font-weight: 800 !important; text-transform: uppercase; letter-spacing: 1px; border-radius: 15px !important; box-shadow: 0 10px 25px rgba(147, 51, 234, 0.35) !important; transition: 0.5s all !important; } .premium-btn:hover { background-position: right center !important; transform: scale(1.04); box-shadow: 0 15px 35px rgba(147, 51, 234, 0.5) !important; } .gradio-container > * { animation: fadeIn 1.2s ease-out; } @keyframes fadeIn { from { opacity: 0; transform: translateY(20px); } to { opacity: 1; transform: translateY(0); } } """ SURAJIT_HF_TOKEN = os.getenv("CLONE_SURAJIT_TOKEN") client = InferenceClient(token=SURAJIT_HF_TOKEN) MODEL_ID = "HuggingFaceH4/zephyr-7b-beta" # ================================================================= # 2. CORE LOGIC # ================================================================= def process_multiple_documents(files) -> str: if not files: return "" combined_text = "" for file in files: ext = Path(file.name).suffix.lower() try: if ext == ".pdf": loader = PyPDFLoader(file.name) elif ext == ".docx": loader = Docx2txtLoader(file.name) else: loader = TextLoader(file.name) docs = loader.load() combined_text += " ".join([d.page_content for d in docs]) + "\n\n" except Exception as e: print(f"Error loading {file.name}: {e}") splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) chunks = splitter.split_text(combined_text) return " ".join(chunks)[:10000] def generate_timed_script(context: str, mode: str, duration: str): duration_map = { "1 Minute (Short)": "approx 150 words", "5 Minutes (Medium)": "approx 750 words", "10 Minutes (Detailed)": "approx 1500 words", "20 Minutes (Deep Dive)": "approx 3000 words" } target_len = duration_map.get(duration, "750 words") messages = [ {"role": "system", "content": f"You are a master scriptwriter. Mode: {mode}. Length: {target_len}. Use 'Host:' and 'Expert:' for dialogue."}, {"role": "user", "content": f"Analyze these documents and write the script:\n\n{context}"} ] response = client.chat_completion(model=MODEL_ID, messages=messages, max_tokens=2500) return response.choices[0].message.content async def create_audio(script: str, mode: str, voice: str, speed: float): rate_val = int((speed - 1.0) * 100) rate_str = f"{rate_val:+d}%" if mode == "Podcast": combined = AudioSegment.empty() for line in script.split('\n'): line = line.strip() if not line: continue # Determine which voice to use if ":" in line: current_voice = voice if "Host" in line else "en-GB-SoniaNeural" text_to_speak = line.split(":", 1)[1].strip() else: # If no colon, Host reads the line instead of skipping it current_voice = voice text_to_speak = line if text_to_speak: communicate = edge_tts.Communicate(text_to_speak, current_voice, rate=rate_str) t_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name await communicate.save(t_path) combined += AudioSegment.from_mp3(t_path) + AudioSegment.silent(duration=600) os.remove(t_path) out = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name combined.export(out, format="mp3") return out else: communicate = edge_tts.Communicate(script, voice, rate=rate_str) tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") await communicate.save(tmp.name) return tmp.name # ================================================================= # 3. INTERFACE # ================================================================= with gr.Blocks() as app: gr.HTML("

AI Multi-Doc Studio

") with gr.Row(): with gr.Column(scale=1): with gr.Group(elem_classes="glass-panel"): gr.HTML("

📚 Upload Documents

") file_input = gr.File(label=None, file_count="multiple", file_types=[".pdf", ".docx", ".txt"]) gr.HTML("

⏱️ Duration & Style

") duration_sel = gr.Dropdown( ["1 Minute (Short)", "5 Minutes (Medium)", "10 Minutes (Detailed)", "20 Minutes (Deep Dive)"], value="5 Minutes (Medium)", label="Target Audio Length" ) mode_sel = gr.Dropdown(["Podcast", "Storytelling", "Teaching", "Summary"], value="Podcast", label="Script Style") gr.HTML("

🗣️ Voice Settings

") voice_sel = gr.Dropdown([ ("Andrew (US - Male)", "en-US-AndrewNeural"), ("Ava (US - Female)", "en-US-AvaNeural"), ("Emma (UK - Female)", "en-GB-SoniaNeural"), ("Aditi (IN - Female)", "en-IN-NeerjaNeural") ], value="en-US-AndrewNeural", label="Voice Selection") speed_sld = gr.Slider(0.5, 1.5, value=1.0, label="Pace") btn = gr.Button("🚀 GENERATE STUDIO AUDIO", elem_classes="premium-btn") with gr.Column(scale=1): with gr.Group(elem_classes="glass-panel"): gr.HTML("

📝 Generated Script

") out_txt = gr.Textbox(label=None, lines=15) gr.HTML("

🔊 Audio Output

") out_aud = gr.Audio(label=None) async def run_pipeline(files, dur, mode, voice, speed): if not files: return "Please upload at least one file.", None ctx = process_multiple_documents(files) sc = generate_timed_script(ctx, mode, dur) aud = await create_audio(sc, mode, voice, speed) return sc, aud btn.click(run_pipeline, inputs=[file_input, duration_sel, mode_sel, voice_sel, speed_sld], outputs=[out_txt, out_aud]) if __name__ == "__main__": app.launch(css=CUSTOM_CSS)