File size: 8,588 Bytes
491687c
 
243c4c7
fbde9e6
198493b
fbde9e6
243c4c7
3ad3bfb
491687c
 
999c1a2
198493b
98a52ab
198493b
3ad3bfb
 
98a52ab
 
 
 
 
 
 
 
4e4afae
98a52ab
fa0327e
98a52ab
 
 
 
 
d7c8932
98a52ab
3ad3bfb
98a52ab
 
 
c22dae2
98a52ab
 
 
 
 
 
 
 
 
 
c22dae2
98a52ab
4e4afae
98a52ab
 
2ab9e63
4e4afae
98a52ab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198493b
6a87434
 
491687c
c19672a
3ad3bfb
999c1a2
198493b
4e4afae
198493b
 
4e4afae
 
3ad3bfb
4e4afae
 
 
 
 
 
 
 
 
 
ce2dcee
4e4afae
0bf8577
4e4afae
0cdc7c0
4e4afae
3ad3bfb
4e4afae
 
 
 
3ad3bfb
4e4afae
3ad3bfb
198493b
98a52ab
4e4afae
198493b
4e4afae
 
afa366c
4e4afae
243c4c7
 
4e4afae
 
 
98a52ab
4e4afae
98a52ab
 
 
 
4e4afae
 
98a52ab
 
 
 
 
 
 
 
 
 
 
 
4e4afae
 
 
 
 
 
 
 
 
999c1a2
198493b
4e4afae
198493b
fa0327e
4e4afae
3ad3bfb
 
 
2ab9e63
4e4afae
 
 
 
 
 
 
 
 
 
b8b543b
fa0327e
b8b543b
 
 
 
 
4e4afae
 
 
 
3ad3bfb
08a5937
4e4afae
 
 
 
08a5937
4e4afae
 
 
 
 
 
08a5937
4e4afae
999c1a2
198493b
4e4afae
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
import os
import tempfile
import asyncio
from pathlib import Path
import gradio as gr
from huggingface_hub import InferenceClient
import edge_tts
from pydub import AudioSegment
from langchain_community.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

# =================================================================
# 1. UI STYLING & PREMIUM MOVING ANIMATIONS
# =================================================================
CUSTOM_CSS = """
.gradio-container {
    background: #ffffff;
    background-image: 
        radial-gradient(at 0% 0%, rgba(147, 51, 234, 0.15) 0px, transparent 50%),
        radial-gradient(at 100% 0%, rgba(249, 115, 22, 0.12) 0px, transparent 50%),
        radial-gradient(at 100% 100%, rgba(147, 51, 234, 0.15) 0px, transparent 50%),
        radial-gradient(at 0% 100%, rgba(249, 115, 22, 0.12) 0px, transparent 50%);
    background-attachment: fixed;
    animation: meshFlow 20s ease-in-out infinite alternate;
    min-height: 100vh;
    overflow-x: hidden;
}

@keyframes meshFlow {
    0% { background-size: 100% 100%; background-position: 0% 0%; }
    50% { background-size: 140% 140%; background-position: 50% 50%; }
    100% { background-size: 100% 100%; background-position: 100% 100%; }
}

.glass-panel {
    background: rgba(255, 255, 255, 0.5) !important;
    backdrop-filter: blur(25px) saturate(160%);
    -webkit-backdrop-filter: blur(25px) saturate(160%);
    border: 1px solid rgba(255, 255, 255, 0.4) !important;
    border-radius: 28px !important;
    padding: 30px !important;
    box-shadow: 0 20px 40px rgba(0, 0, 0, 0.03) !important;
    transition: all 0.4s cubic-bezier(0.4, 0, 0.2, 1);
}

.glass-panel:hover {
    transform: translateY(-8px);
    background: rgba(255, 255, 255, 0.65) !important;
    box-shadow: 0 35px 70px rgba(147, 51, 234, 0.12) !important;
}

.premium-btn {
    background: linear-gradient(135deg, #f97316 0%, #9333ea 50%, #f97316 100%) !important;
    background-size: 200% auto !important;
    border: none !important;
    color: white !important;
    font-weight: 800 !important;
    text-transform: uppercase;
    letter-spacing: 1px;
    border-radius: 15px !important;
    box-shadow: 0 10px 25px rgba(147, 51, 234, 0.35) !important;
    transition: 0.5s all !important;
}

.premium-btn:hover {
    background-position: right center !important;
    transform: scale(1.04);
    box-shadow: 0 15px 35px rgba(147, 51, 234, 0.5) !important;
}

.gradio-container > * {
    animation: fadeIn 1.2s ease-out;
}

@keyframes fadeIn {
    from { opacity: 0; transform: translateY(20px); }
    to { opacity: 1; transform: translateY(0); }
}
"""

SURAJIT_HF_TOKEN = os.getenv("CLONE_SURAJIT_TOKEN")
client = InferenceClient(token=SURAJIT_HF_TOKEN)
MODEL_ID = "HuggingFaceH4/zephyr-7b-beta"

# =================================================================
# 2. CORE LOGIC
# =================================================================

def process_multiple_documents(files) -> str:
    if not files: return ""
    combined_text = ""
    for file in files:
        ext = Path(file.name).suffix.lower()
        try:
            if ext == ".pdf": loader = PyPDFLoader(file.name)
            elif ext == ".docx": loader = Docx2txtLoader(file.name)
            else: loader = TextLoader(file.name)
            docs = loader.load()
            combined_text += " ".join([d.page_content for d in docs]) + "\n\n"
        except Exception as e:
            print(f"Error loading {file.name}: {e}")
    
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    chunks = splitter.split_text(combined_text)
    return " ".join(chunks)[:10000]

def generate_timed_script(context: str, mode: str, duration: str):
    duration_map = {
        "1 Minute (Short)": "approx 150 words",
        "5 Minutes (Medium)": "approx 750 words",
        "10 Minutes (Detailed)": "approx 1500 words",
        "20 Minutes (Deep Dive)": "approx 3000 words"
    }
    target_len = duration_map.get(duration, "750 words")
    
    messages = [
        {"role": "system", "content": f"You are a master scriptwriter. Mode: {mode}. Length: {target_len}. Use 'Host:' and 'Expert:' for dialogue."},
        {"role": "user", "content": f"Analyze these documents and write the script:\n\n{context}"}
    ]
    response = client.chat_completion(model=MODEL_ID, messages=messages, max_tokens=2500)
    return response.choices[0].message.content

async def create_audio(script: str, mode: str, voice: str, speed: float):
    rate_val = int((speed - 1.0) * 100)
    rate_str = f"{rate_val:+d}%"
    
    if mode == "Podcast":
        combined = AudioSegment.empty()
        
        for line in script.split('\n'):
            line = line.strip()
            if not line: continue
            
            # Determine which voice to use
            if ":" in line:
                current_voice = voice if "Host" in line else "en-GB-SoniaNeural"
                text_to_speak = line.split(":", 1)[1].strip()
            else:
                # If no colon, Host reads the line instead of skipping it
                current_voice = voice
                text_to_speak = line
            
            if text_to_speak:
                communicate = edge_tts.Communicate(text_to_speak, current_voice, rate=rate_str)
                t_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
                await communicate.save(t_path)
                combined += AudioSegment.from_mp3(t_path) + AudioSegment.silent(duration=600)
                os.remove(t_path)
        
        out = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
        combined.export(out, format="mp3")
        return out
    else:
        communicate = edge_tts.Communicate(script, voice, rate=rate_str)
        tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
        await communicate.save(tmp.name)
        return tmp.name

# =================================================================
# 3. INTERFACE
# =================================================================
with gr.Blocks() as app:
    gr.HTML("<div style='text-align: center; padding: 20px;'><img src='https://cdn.pixabay.com/animation/2023/06/13/15/12/15-12-47-323_512.gif' style='width:50px;'><h1 style='color: #1f2937; font-weight: 900;'>AI Multi-Doc Studio</h1></div>")
    
    with gr.Row():
        with gr.Column(scale=1):
            with gr.Group(elem_classes="glass-panel"):
                gr.HTML("<h4>πŸ“š Upload Documents</h4>")
                file_input = gr.File(label=None, file_count="multiple", file_types=[".pdf", ".docx", ".txt"])
                
                gr.HTML("<h4>⏱️ Duration & Style</h4>")
                duration_sel = gr.Dropdown(
                    ["1 Minute (Short)", "5 Minutes (Medium)", "10 Minutes (Detailed)", "20 Minutes (Deep Dive)"], 
                    value="5 Minutes (Medium)", label="Target Audio Length"
                )
                mode_sel = gr.Dropdown(["Podcast", "Storytelling", "Teaching", "Summary"], value="Podcast", label="Script Style")
                
                gr.HTML("<h4>πŸ—£οΈ Voice Settings</h4>")
                voice_sel = gr.Dropdown([
                    ("Andrew (US - Male)", "en-US-AndrewNeural"),
                    ("Ava (US - Female)", "en-US-AvaNeural"),
                    ("Emma (UK - Female)", "en-GB-SoniaNeural"),
                    ("Aditi (IN - Female)", "en-IN-NeerjaNeural")
                ], value="en-US-AndrewNeural", label="Voice Selection")
                speed_sld = gr.Slider(0.5, 1.5, value=1.0, label="Pace")
                
                btn = gr.Button("πŸš€ GENERATE STUDIO AUDIO", elem_classes="premium-btn")
        
        with gr.Column(scale=1):
            with gr.Group(elem_classes="glass-panel"):
                gr.HTML("<h4>πŸ“ Generated Script</h4>")
                out_txt = gr.Textbox(label=None, lines=15)
                gr.HTML("<h4>πŸ”Š Audio Output</h4>")
                out_aud = gr.Audio(label=None)

    async def run_pipeline(files, dur, mode, voice, speed):
        if not files: return "Please upload at least one file.", None
        ctx = process_multiple_documents(files)
        sc = generate_timed_script(ctx, mode, dur)
        aud = await create_audio(sc, mode, voice, speed)
        return sc, aud

    btn.click(run_pipeline, inputs=[file_input, duration_sel, mode_sel, voice_sel, speed_sld], outputs=[out_txt, out_aud])

if __name__ == "__main__":
    app.launch(css=CUSTOM_CSS)