Spaces:

surajit2839
/

VOICENEWSC

Sleeping

File size: 8,588 Bytes

491687c
 
243c4c7
fbde9e6
198493b
fbde9e6
243c4c7
3ad3bfb
491687c
 
999c1a2
198493b
98a52ab
198493b
3ad3bfb
 
98a52ab
 
 
 
 
 
 
 
4e4afae
98a52ab
fa0327e
98a52ab
 
 
 
 
d7c8932
98a52ab
3ad3bfb
98a52ab
 
 
c22dae2
98a52ab
 
 
 
 
 
 
 
 
 
c22dae2
98a52ab
4e4afae
98a52ab
 
2ab9e63
4e4afae
98a52ab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198493b
6a87434
 
491687c
c19672a
3ad3bfb
999c1a2
198493b
4e4afae
198493b
 
4e4afae
 
3ad3bfb
4e4afae
 
 
 
 
 
 
 
 
 
ce2dcee
4e4afae
0bf8577
4e4afae
0cdc7c0
4e4afae
3ad3bfb
4e4afae
 
 
 
3ad3bfb
4e4afae
3ad3bfb
198493b
98a52ab
4e4afae
198493b
4e4afae
 
afa366c
4e4afae
243c4c7
 
4e4afae
 
 
98a52ab
4e4afae
98a52ab
 
 
 
4e4afae
 
98a52ab
 
 
 
 
 
 
 
 
 
 
 
4e4afae
 
 
 
 
 
 
 
 
999c1a2
198493b
4e4afae
198493b
fa0327e
4e4afae
3ad3bfb
 
 
2ab9e63
4e4afae
 
 
 
 
 
 
 
 
 
b8b543b
fa0327e
b8b543b
 
 
 
 
4e4afae
 
 
 
3ad3bfb
08a5937
4e4afae
 
 
 
08a5937
4e4afae
 
 
 
 
 
08a5937
4e4afae
999c1a2
198493b
4e4afae

import os
import tempfile
import asyncio
from pathlib import Path
import gradio as gr
from huggingface_hub import InferenceClient
import edge_tts
from pydub import AudioSegment
from langchain_community.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

# =================================================================
# 1. UI STYLING & PREMIUM MOVING ANIMATIONS
# =================================================================
CUSTOM_CSS = """
.gradio-container {
    background: #ffffff;
    background-image: 
        radial-gradient(at 0% 0%, rgba(147, 51, 234, 0.15) 0px, transparent 50%),
        radial-gradient(at 100% 0%, rgba(249, 115, 22, 0.12) 0px, transparent 50%),
        radial-gradient(at 100% 100%, rgba(147, 51, 234, 0.15) 0px, transparent 50%),
        radial-gradient(at 0% 100%, rgba(249, 115, 22, 0.12) 0px, transparent 50%);
    background-attachment: fixed;
    animation: meshFlow 20s ease-in-out infinite alternate;
    min-height: 100vh;
    overflow-x: hidden;
}

@keyframes meshFlow {
    0% { background-size: 100% 100%; background-position: 0% 0%; }
    50% { background-size: 140% 140%; background-position: 50% 50%; }
    100% { background-size: 100% 100%; background-position: 100% 100%; }
}

.glass-panel {
    background: rgba(255, 255, 255, 0.5) !important;
    backdrop-filter: blur(25px) saturate(160%);
    -webkit-backdrop-filter: blur(25px) saturate(160%);
    border: 1px solid rgba(255, 255, 255, 0.4) !important;
    border-radius: 28px !important;
    padding: 30px !important;
    box-shadow: 0 20px 40px rgba(0, 0, 0, 0.03) !important;
    transition: all 0.4s cubic-bezier(0.4, 0, 0.2, 1);
}

.glass-panel:hover {
    transform: translateY(-8px);
    background: rgba(255, 255, 255, 0.65) !important;
    box-shadow: 0 35px 70px rgba(147, 51, 234, 0.12) !important;
}

.premium-btn {
    background: linear-gradient(135deg, #f97316 0%, #9333ea 50%, #f97316 100%) !important;
    background-size: 200% auto !important;
    border: none !important;
    color: white !important;
    font-weight: 800 !important;
    text-transform: uppercase;
    letter-spacing: 1px;
    border-radius: 15px !important;
    box-shadow: 0 10px 25px rgba(147, 51, 234, 0.35) !important;
    transition: 0.5s all !important;
}

.premium-btn:hover {
    background-position: right center !important;
    transform: scale(1.04);
    box-shadow: 0 15px 35px rgba(147, 51, 234, 0.5) !important;
}

.gradio-container > * {
    animation: fadeIn 1.2s ease-out;
}

@keyframes fadeIn {
    from { opacity: 0; transform: translateY(20px); }
    to { opacity: 1; transform: translateY(0); }
}
"""

SURAJIT_HF_TOKEN = os.getenv("CLONE_SURAJIT_TOKEN")
client = InferenceClient(token=SURAJIT_HF_TOKEN)
MODEL_ID = "HuggingFaceH4/zephyr-7b-beta"

# =================================================================
# 2. CORE LOGIC
# =================================================================

def process_multiple_documents(files) -> str:
    if not files: return ""
    combined_text = ""
    for file in files:
        ext = Path(file.name).suffix.lower()
        try:
            if ext == ".pdf": loader = PyPDFLoader(file.name)
            elif ext == ".docx": loader = Docx2txtLoader(file.name)
            else: loader = TextLoader(file.name)
            docs = loader.load()
            combined_text += " ".join([d.page_content for d in docs]) + "\n\n"
        except Exception as e:
            print(f"Error loading {file.name}: {e}")
    
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    chunks = splitter.split_text(combined_text)
    return " ".join(chunks)[:10000]

def generate_timed_script(context: str, mode: str, duration: str):
    duration_map = {
        "1 Minute (Short)": "approx 150 words",
        "5 Minutes (Medium)": "approx 750 words",
        "10 Minutes (Detailed)": "approx 1500 words",
        "20 Minutes (Deep Dive)": "approx 3000 words"
    }
    target_len = duration_map.get(duration, "750 words")
    
    messages = [
        {"role": "system", "content": f"You are a master scriptwriter. Mode: {mode}. Length: {target_len}. Use 'Host:' and 'Expert:' for dialogue."},
        {"role": "user", "content": f"Analyze these documents and write the script:\n\n{context}"}
    ]
    response = client.chat_completion(model=MODEL_ID, messages=messages, max_tokens=2500)
    return response.choices[0].message.content

async def create_audio(script: str, mode: str, voice: str, speed: float):
    rate_val = int((speed - 1.0) * 100)
    rate_str = f"{rate_val:+d}%"
    
    if mode == "Podcast":
        combined = AudioSegment.empty()
        
        for line in script.split('\n'):
            line = line.strip()
            if not line: continue
            
            # Determine which voice to use
            if ":" in line:
                current_voice = voice if "Host" in line else "en-GB-SoniaNeural"
                text_to_speak = line.split(":", 1)[1].strip()
            else:
                # If no colon, Host reads the line instead of skipping it
                current_voice = voice
                text_to_speak = line
            
            if text_to_speak:
                communicate = edge_tts.Communicate(text_to_speak, current_voice, rate=rate_str)
                t_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
                await communicate.save(t_path)
                combined += AudioSegment.from_mp3(t_path) + AudioSegment.silent(duration=600)
                os.remove(t_path)
        
        out = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
        combined.export(out, format="mp3")
        return out
    else:
        communicate = edge_tts.Communicate(script, voice, rate=rate_str)
        tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
        await communicate.save(tmp.name)
        return tmp.name

# =================================================================
# 3. INTERFACE
# =================================================================
with gr.Blocks() as app:
    gr.HTML("<div style='text-align: center; padding: 20px;'><img src='https://cdn.pixabay.com/animation/2023/06/13/15/12/15-12-47-323_512.gif' style='width:50px;'><h1 style='color: #1f2937; font-weight: 900;'>AI Multi-Doc Studio</h1></div>")
    
    with gr.Row():
        with gr.Column(scale=1):
            with gr.Group(elem_classes="glass-panel"):
                gr.HTML("<h4>📚 Upload Documents</h4>")
                file_input = gr.File(label=None, file_count="multiple", file_types=[".pdf", ".docx", ".txt"])
                
                gr.HTML("<h4>⏱️ Duration & Style</h4>")
                duration_sel = gr.Dropdown(
                    ["1 Minute (Short)", "5 Minutes (Medium)", "10 Minutes (Detailed)", "20 Minutes (Deep Dive)"], 
                    value="5 Minutes (Medium)", label="Target Audio Length"
                )
                mode_sel = gr.Dropdown(["Podcast", "Storytelling", "Teaching", "Summary"], value="Podcast", label="Script Style")
                
                gr.HTML("<h4>🗣️ Voice Settings</h4>")
                voice_sel = gr.Dropdown([
                    ("Andrew (US - Male)", "en-US-AndrewNeural"),
                    ("Ava (US - Female)", "en-US-AvaNeural"),
                    ("Emma (UK - Female)", "en-GB-SoniaNeural"),
                    ("Aditi (IN - Female)", "en-IN-NeerjaNeural")
                ], value="en-US-AndrewNeural", label="Voice Selection")
                speed_sld = gr.Slider(0.5, 1.5, value=1.0, label="Pace")
                
                btn = gr.Button("🚀 GENERATE STUDIO AUDIO", elem_classes="premium-btn")
        
        with gr.Column(scale=1):
            with gr.Group(elem_classes="glass-panel"):
                gr.HTML("<h4>📝 Generated Script</h4>")
                out_txt = gr.Textbox(label=None, lines=15)
                gr.HTML("<h4>🔊 Audio Output</h4>")
                out_aud = gr.Audio(label=None)

    async def run_pipeline(files, dur, mode, voice, speed):
        if not files: return "Please upload at least one file.", None
        ctx = process_multiple_documents(files)
        sc = generate_timed_script(ctx, mode, dur)
        aud = await create_audio(sc, mode, voice, speed)
        return sc, aud

    btn.click(run_pipeline, inputs=[file_input, duration_sel, mode_sel, voice_sel, speed_sld], outputs=[out_txt, out_aud])

if __name__ == "__main__":
    app.launch(css=CUSTOM_CSS)