Spaces:
Sleeping
Sleeping
| import os | |
| import tempfile | |
| import asyncio | |
| from pathlib import Path | |
| import gradio as gr | |
| from huggingface_hub import InferenceClient | |
| import edge_tts | |
| from pydub import AudioSegment | |
| from langchain_community.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| # ================================================================= | |
| # 1. UI STYLING & PREMIUM MOVING ANIMATIONS | |
| # ================================================================= | |
| CUSTOM_CSS = """ | |
| .gradio-container { | |
| background: #ffffff; | |
| background-image: | |
| radial-gradient(at 0% 0%, rgba(147, 51, 234, 0.15) 0px, transparent 50%), | |
| radial-gradient(at 100% 0%, rgba(249, 115, 22, 0.12) 0px, transparent 50%), | |
| radial-gradient(at 100% 100%, rgba(147, 51, 234, 0.15) 0px, transparent 50%), | |
| radial-gradient(at 0% 100%, rgba(249, 115, 22, 0.12) 0px, transparent 50%); | |
| background-attachment: fixed; | |
| animation: meshFlow 20s ease-in-out infinite alternate; | |
| min-height: 100vh; | |
| overflow-x: hidden; | |
| } | |
| @keyframes meshFlow { | |
| 0% { background-size: 100% 100%; background-position: 0% 0%; } | |
| 50% { background-size: 140% 140%; background-position: 50% 50%; } | |
| 100% { background-size: 100% 100%; background-position: 100% 100%; } | |
| } | |
| .glass-panel { | |
| background: rgba(255, 255, 255, 0.5) !important; | |
| backdrop-filter: blur(25px) saturate(160%); | |
| -webkit-backdrop-filter: blur(25px) saturate(160%); | |
| border: 1px solid rgba(255, 255, 255, 0.4) !important; | |
| border-radius: 28px !important; | |
| padding: 30px !important; | |
| box-shadow: 0 20px 40px rgba(0, 0, 0, 0.03) !important; | |
| transition: all 0.4s cubic-bezier(0.4, 0, 0.2, 1); | |
| } | |
| .glass-panel:hover { | |
| transform: translateY(-8px); | |
| background: rgba(255, 255, 255, 0.65) !important; | |
| box-shadow: 0 35px 70px rgba(147, 51, 234, 0.12) !important; | |
| } | |
| .premium-btn { | |
| background: linear-gradient(135deg, #f97316 0%, #9333ea 50%, #f97316 100%) !important; | |
| background-size: 200% auto !important; | |
| border: none !important; | |
| color: white !important; | |
| font-weight: 800 !important; | |
| text-transform: uppercase; | |
| letter-spacing: 1px; | |
| border-radius: 15px !important; | |
| box-shadow: 0 10px 25px rgba(147, 51, 234, 0.35) !important; | |
| transition: 0.5s all !important; | |
| } | |
| .premium-btn:hover { | |
| background-position: right center !important; | |
| transform: scale(1.04); | |
| box-shadow: 0 15px 35px rgba(147, 51, 234, 0.5) !important; | |
| } | |
| .gradio-container > * { | |
| animation: fadeIn 1.2s ease-out; | |
| } | |
| @keyframes fadeIn { | |
| from { opacity: 0; transform: translateY(20px); } | |
| to { opacity: 1; transform: translateY(0); } | |
| } | |
| """ | |
| SURAJIT_HF_TOKEN = os.getenv("CLONE_SURAJIT_TOKEN") | |
| client = InferenceClient(token=SURAJIT_HF_TOKEN) | |
| MODEL_ID = "HuggingFaceH4/zephyr-7b-beta" | |
| # ================================================================= | |
| # 2. CORE LOGIC | |
| # ================================================================= | |
| def process_multiple_documents(files) -> str: | |
| if not files: return "" | |
| combined_text = "" | |
| for file in files: | |
| ext = Path(file.name).suffix.lower() | |
| try: | |
| if ext == ".pdf": loader = PyPDFLoader(file.name) | |
| elif ext == ".docx": loader = Docx2txtLoader(file.name) | |
| else: loader = TextLoader(file.name) | |
| docs = loader.load() | |
| combined_text += " ".join([d.page_content for d in docs]) + "\n\n" | |
| except Exception as e: | |
| print(f"Error loading {file.name}: {e}") | |
| splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) | |
| chunks = splitter.split_text(combined_text) | |
| return " ".join(chunks)[:10000] | |
| def generate_timed_script(context: str, mode: str, duration: str): | |
| duration_map = { | |
| "1 Minute (Short)": "approx 150 words", | |
| "5 Minutes (Medium)": "approx 750 words", | |
| "10 Minutes (Detailed)": "approx 1500 words", | |
| "20 Minutes (Deep Dive)": "approx 3000 words" | |
| } | |
| target_len = duration_map.get(duration, "750 words") | |
| messages = [ | |
| {"role": "system", "content": f"You are a master scriptwriter. Mode: {mode}. Length: {target_len}. Use 'Host:' and 'Expert:' for dialogue."}, | |
| {"role": "user", "content": f"Analyze these documents and write the script:\n\n{context}"} | |
| ] | |
| response = client.chat_completion(model=MODEL_ID, messages=messages, max_tokens=2500) | |
| return response.choices[0].message.content | |
| async def create_audio(script: str, mode: str, voice: str, speed: float): | |
| rate_val = int((speed - 1.0) * 100) | |
| rate_str = f"{rate_val:+d}%" | |
| if mode == "Podcast": | |
| combined = AudioSegment.empty() | |
| for line in script.split('\n'): | |
| line = line.strip() | |
| if not line: continue | |
| # Determine which voice to use | |
| if ":" in line: | |
| current_voice = voice if "Host" in line else "en-GB-SoniaNeural" | |
| text_to_speak = line.split(":", 1)[1].strip() | |
| else: | |
| # If no colon, Host reads the line instead of skipping it | |
| current_voice = voice | |
| text_to_speak = line | |
| if text_to_speak: | |
| communicate = edge_tts.Communicate(text_to_speak, current_voice, rate=rate_str) | |
| t_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name | |
| await communicate.save(t_path) | |
| combined += AudioSegment.from_mp3(t_path) + AudioSegment.silent(duration=600) | |
| os.remove(t_path) | |
| out = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name | |
| combined.export(out, format="mp3") | |
| return out | |
| else: | |
| communicate = edge_tts.Communicate(script, voice, rate=rate_str) | |
| tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") | |
| await communicate.save(tmp.name) | |
| return tmp.name | |
| # ================================================================= | |
| # 3. INTERFACE | |
| # ================================================================= | |
| with gr.Blocks() as app: | |
| gr.HTML("<div style='text-align: center; padding: 20px;'><img src='https://cdn.pixabay.com/animation/2023/06/13/15/12/15-12-47-323_512.gif' style='width:50px;'><h1 style='color: #1f2937; font-weight: 900;'>AI Multi-Doc Studio</h1></div>") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| with gr.Group(elem_classes="glass-panel"): | |
| gr.HTML("<h4>π Upload Documents</h4>") | |
| file_input = gr.File(label=None, file_count="multiple", file_types=[".pdf", ".docx", ".txt"]) | |
| gr.HTML("<h4>β±οΈ Duration & Style</h4>") | |
| duration_sel = gr.Dropdown( | |
| ["1 Minute (Short)", "5 Minutes (Medium)", "10 Minutes (Detailed)", "20 Minutes (Deep Dive)"], | |
| value="5 Minutes (Medium)", label="Target Audio Length" | |
| ) | |
| mode_sel = gr.Dropdown(["Podcast", "Storytelling", "Teaching", "Summary"], value="Podcast", label="Script Style") | |
| gr.HTML("<h4>π£οΈ Voice Settings</h4>") | |
| voice_sel = gr.Dropdown([ | |
| ("Andrew (US - Male)", "en-US-AndrewNeural"), | |
| ("Ava (US - Female)", "en-US-AvaNeural"), | |
| ("Emma (UK - Female)", "en-GB-SoniaNeural"), | |
| ("Aditi (IN - Female)", "en-IN-NeerjaNeural") | |
| ], value="en-US-AndrewNeural", label="Voice Selection") | |
| speed_sld = gr.Slider(0.5, 1.5, value=1.0, label="Pace") | |
| btn = gr.Button("π GENERATE STUDIO AUDIO", elem_classes="premium-btn") | |
| with gr.Column(scale=1): | |
| with gr.Group(elem_classes="glass-panel"): | |
| gr.HTML("<h4>π Generated Script</h4>") | |
| out_txt = gr.Textbox(label=None, lines=15) | |
| gr.HTML("<h4>π Audio Output</h4>") | |
| out_aud = gr.Audio(label=None) | |
| async def run_pipeline(files, dur, mode, voice, speed): | |
| if not files: return "Please upload at least one file.", None | |
| ctx = process_multiple_documents(files) | |
| sc = generate_timed_script(ctx, mode, dur) | |
| aud = await create_audio(sc, mode, voice, speed) | |
| return sc, aud | |
| btn.click(run_pipeline, inputs=[file_input, duration_sel, mode_sel, voice_sel, speed_sld], outputs=[out_txt, out_aud]) | |
| if __name__ == "__main__": | |
| app.launch(css=CUSTOM_CSS) |