Spaces:
Sleeping
Sleeping
File size: 8,588 Bytes
491687c 243c4c7 fbde9e6 198493b fbde9e6 243c4c7 3ad3bfb 491687c 999c1a2 198493b 98a52ab 198493b 3ad3bfb 98a52ab 4e4afae 98a52ab fa0327e 98a52ab d7c8932 98a52ab 3ad3bfb 98a52ab c22dae2 98a52ab c22dae2 98a52ab 4e4afae 98a52ab 2ab9e63 4e4afae 98a52ab 198493b 6a87434 491687c c19672a 3ad3bfb 999c1a2 198493b 4e4afae 198493b 4e4afae 3ad3bfb 4e4afae ce2dcee 4e4afae 0bf8577 4e4afae 0cdc7c0 4e4afae 3ad3bfb 4e4afae 3ad3bfb 4e4afae 3ad3bfb 198493b 98a52ab 4e4afae 198493b 4e4afae afa366c 4e4afae 243c4c7 4e4afae 98a52ab 4e4afae 98a52ab 4e4afae 98a52ab 4e4afae 999c1a2 198493b 4e4afae 198493b fa0327e 4e4afae 3ad3bfb 2ab9e63 4e4afae b8b543b fa0327e b8b543b 4e4afae 3ad3bfb 08a5937 4e4afae 08a5937 4e4afae 08a5937 4e4afae 999c1a2 198493b 4e4afae | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 | import os
import tempfile
import asyncio
from pathlib import Path
import gradio as gr
from huggingface_hub import InferenceClient
import edge_tts
from pydub import AudioSegment
from langchain_community.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
# =================================================================
# 1. UI STYLING & PREMIUM MOVING ANIMATIONS
# =================================================================
CUSTOM_CSS = """
.gradio-container {
background: #ffffff;
background-image:
radial-gradient(at 0% 0%, rgba(147, 51, 234, 0.15) 0px, transparent 50%),
radial-gradient(at 100% 0%, rgba(249, 115, 22, 0.12) 0px, transparent 50%),
radial-gradient(at 100% 100%, rgba(147, 51, 234, 0.15) 0px, transparent 50%),
radial-gradient(at 0% 100%, rgba(249, 115, 22, 0.12) 0px, transparent 50%);
background-attachment: fixed;
animation: meshFlow 20s ease-in-out infinite alternate;
min-height: 100vh;
overflow-x: hidden;
}
@keyframes meshFlow {
0% { background-size: 100% 100%; background-position: 0% 0%; }
50% { background-size: 140% 140%; background-position: 50% 50%; }
100% { background-size: 100% 100%; background-position: 100% 100%; }
}
.glass-panel {
background: rgba(255, 255, 255, 0.5) !important;
backdrop-filter: blur(25px) saturate(160%);
-webkit-backdrop-filter: blur(25px) saturate(160%);
border: 1px solid rgba(255, 255, 255, 0.4) !important;
border-radius: 28px !important;
padding: 30px !important;
box-shadow: 0 20px 40px rgba(0, 0, 0, 0.03) !important;
transition: all 0.4s cubic-bezier(0.4, 0, 0.2, 1);
}
.glass-panel:hover {
transform: translateY(-8px);
background: rgba(255, 255, 255, 0.65) !important;
box-shadow: 0 35px 70px rgba(147, 51, 234, 0.12) !important;
}
.premium-btn {
background: linear-gradient(135deg, #f97316 0%, #9333ea 50%, #f97316 100%) !important;
background-size: 200% auto !important;
border: none !important;
color: white !important;
font-weight: 800 !important;
text-transform: uppercase;
letter-spacing: 1px;
border-radius: 15px !important;
box-shadow: 0 10px 25px rgba(147, 51, 234, 0.35) !important;
transition: 0.5s all !important;
}
.premium-btn:hover {
background-position: right center !important;
transform: scale(1.04);
box-shadow: 0 15px 35px rgba(147, 51, 234, 0.5) !important;
}
.gradio-container > * {
animation: fadeIn 1.2s ease-out;
}
@keyframes fadeIn {
from { opacity: 0; transform: translateY(20px); }
to { opacity: 1; transform: translateY(0); }
}
"""
SURAJIT_HF_TOKEN = os.getenv("CLONE_SURAJIT_TOKEN")
client = InferenceClient(token=SURAJIT_HF_TOKEN)
MODEL_ID = "HuggingFaceH4/zephyr-7b-beta"
# =================================================================
# 2. CORE LOGIC
# =================================================================
def process_multiple_documents(files) -> str:
if not files: return ""
combined_text = ""
for file in files:
ext = Path(file.name).suffix.lower()
try:
if ext == ".pdf": loader = PyPDFLoader(file.name)
elif ext == ".docx": loader = Docx2txtLoader(file.name)
else: loader = TextLoader(file.name)
docs = loader.load()
combined_text += " ".join([d.page_content for d in docs]) + "\n\n"
except Exception as e:
print(f"Error loading {file.name}: {e}")
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
chunks = splitter.split_text(combined_text)
return " ".join(chunks)[:10000]
def generate_timed_script(context: str, mode: str, duration: str):
duration_map = {
"1 Minute (Short)": "approx 150 words",
"5 Minutes (Medium)": "approx 750 words",
"10 Minutes (Detailed)": "approx 1500 words",
"20 Minutes (Deep Dive)": "approx 3000 words"
}
target_len = duration_map.get(duration, "750 words")
messages = [
{"role": "system", "content": f"You are a master scriptwriter. Mode: {mode}. Length: {target_len}. Use 'Host:' and 'Expert:' for dialogue."},
{"role": "user", "content": f"Analyze these documents and write the script:\n\n{context}"}
]
response = client.chat_completion(model=MODEL_ID, messages=messages, max_tokens=2500)
return response.choices[0].message.content
async def create_audio(script: str, mode: str, voice: str, speed: float):
rate_val = int((speed - 1.0) * 100)
rate_str = f"{rate_val:+d}%"
if mode == "Podcast":
combined = AudioSegment.empty()
for line in script.split('\n'):
line = line.strip()
if not line: continue
# Determine which voice to use
if ":" in line:
current_voice = voice if "Host" in line else "en-GB-SoniaNeural"
text_to_speak = line.split(":", 1)[1].strip()
else:
# If no colon, Host reads the line instead of skipping it
current_voice = voice
text_to_speak = line
if text_to_speak:
communicate = edge_tts.Communicate(text_to_speak, current_voice, rate=rate_str)
t_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
await communicate.save(t_path)
combined += AudioSegment.from_mp3(t_path) + AudioSegment.silent(duration=600)
os.remove(t_path)
out = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
combined.export(out, format="mp3")
return out
else:
communicate = edge_tts.Communicate(script, voice, rate=rate_str)
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
await communicate.save(tmp.name)
return tmp.name
# =================================================================
# 3. INTERFACE
# =================================================================
with gr.Blocks() as app:
gr.HTML("<div style='text-align: center; padding: 20px;'><img src='https://cdn.pixabay.com/animation/2023/06/13/15/12/15-12-47-323_512.gif' style='width:50px;'><h1 style='color: #1f2937; font-weight: 900;'>AI Multi-Doc Studio</h1></div>")
with gr.Row():
with gr.Column(scale=1):
with gr.Group(elem_classes="glass-panel"):
gr.HTML("<h4>π Upload Documents</h4>")
file_input = gr.File(label=None, file_count="multiple", file_types=[".pdf", ".docx", ".txt"])
gr.HTML("<h4>β±οΈ Duration & Style</h4>")
duration_sel = gr.Dropdown(
["1 Minute (Short)", "5 Minutes (Medium)", "10 Minutes (Detailed)", "20 Minutes (Deep Dive)"],
value="5 Minutes (Medium)", label="Target Audio Length"
)
mode_sel = gr.Dropdown(["Podcast", "Storytelling", "Teaching", "Summary"], value="Podcast", label="Script Style")
gr.HTML("<h4>π£οΈ Voice Settings</h4>")
voice_sel = gr.Dropdown([
("Andrew (US - Male)", "en-US-AndrewNeural"),
("Ava (US - Female)", "en-US-AvaNeural"),
("Emma (UK - Female)", "en-GB-SoniaNeural"),
("Aditi (IN - Female)", "en-IN-NeerjaNeural")
], value="en-US-AndrewNeural", label="Voice Selection")
speed_sld = gr.Slider(0.5, 1.5, value=1.0, label="Pace")
btn = gr.Button("π GENERATE STUDIO AUDIO", elem_classes="premium-btn")
with gr.Column(scale=1):
with gr.Group(elem_classes="glass-panel"):
gr.HTML("<h4>π Generated Script</h4>")
out_txt = gr.Textbox(label=None, lines=15)
gr.HTML("<h4>π Audio Output</h4>")
out_aud = gr.Audio(label=None)
async def run_pipeline(files, dur, mode, voice, speed):
if not files: return "Please upload at least one file.", None
ctx = process_multiple_documents(files)
sc = generate_timed_script(ctx, mode, dur)
aud = await create_audio(sc, mode, voice, speed)
return sc, aud
btn.click(run_pipeline, inputs=[file_input, duration_sel, mode_sel, voice_sel, speed_sld], outputs=[out_txt, out_aud])
if __name__ == "__main__":
app.launch(css=CUSTOM_CSS) |