Spaces:

surajit2839
/

VOICENEWSC

Sleeping

App Files Files Community

VOICENEWSC / app.py

surajit2839

Update app.py

98a52ab verified about 2 months ago

raw

history blame contribute delete

8.59 kB

	import os
	import tempfile
	import asyncio
	from pathlib import Path
	import gradio as gr
	from huggingface_hub import InferenceClient
	import edge_tts
	from pydub import AudioSegment
	from langchain_community.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader
	from langchain_text_splitters import RecursiveCharacterTextSplitter

	# =================================================================
	# 1. UI STYLING & PREMIUM MOVING ANIMATIONS
	# =================================================================
	CUSTOM_CSS = """
	.gradio-container {
	background: #ffffff;
	background-image:
	radial-gradient(at 0% 0%, rgba(147, 51, 234, 0.15) 0px, transparent 50%),
	radial-gradient(at 100% 0%, rgba(249, 115, 22, 0.12) 0px, transparent 50%),
	radial-gradient(at 100% 100%, rgba(147, 51, 234, 0.15) 0px, transparent 50%),
	radial-gradient(at 0% 100%, rgba(249, 115, 22, 0.12) 0px, transparent 50%);
	background-attachment: fixed;
	animation: meshFlow 20s ease-in-out infinite alternate;
	min-height: 100vh;
	overflow-x: hidden;
	}

	@keyframes meshFlow {
	0% { background-size: 100% 100%; background-position: 0% 0%; }
	50% { background-size: 140% 140%; background-position: 50% 50%; }
	100% { background-size: 100% 100%; background-position: 100% 100%; }
	}

	.glass-panel {
	background: rgba(255, 255, 255, 0.5) !important;
	backdrop-filter: blur(25px) saturate(160%);
	-webkit-backdrop-filter: blur(25px) saturate(160%);
	border: 1px solid rgba(255, 255, 255, 0.4) !important;
	border-radius: 28px !important;
	padding: 30px !important;
	box-shadow: 0 20px 40px rgba(0, 0, 0, 0.03) !important;
	transition: all 0.4s cubic-bezier(0.4, 0, 0.2, 1);
	}

	.glass-panel:hover {
	transform: translateY(-8px);
	background: rgba(255, 255, 255, 0.65) !important;
	box-shadow: 0 35px 70px rgba(147, 51, 234, 0.12) !important;
	}

	.premium-btn {
	background: linear-gradient(135deg, #f97316 0%, #9333ea 50%, #f97316 100%) !important;
	background-size: 200% auto !important;
	border: none !important;
	color: white !important;
	font-weight: 800 !important;
	text-transform: uppercase;
	letter-spacing: 1px;
	border-radius: 15px !important;
	box-shadow: 0 10px 25px rgba(147, 51, 234, 0.35) !important;
	transition: 0.5s all !important;
	}

	.premium-btn:hover {
	background-position: right center !important;
	transform: scale(1.04);
	box-shadow: 0 15px 35px rgba(147, 51, 234, 0.5) !important;
	}

	.gradio-container > * {
	animation: fadeIn 1.2s ease-out;
	}

	@keyframes fadeIn {
	from { opacity: 0; transform: translateY(20px); }
	to { opacity: 1; transform: translateY(0); }
	}
	"""

	SURAJIT_HF_TOKEN = os.getenv("CLONE_SURAJIT_TOKEN")
	client = InferenceClient(token=SURAJIT_HF_TOKEN)
	MODEL_ID = "HuggingFaceH4/zephyr-7b-beta"

	# =================================================================
	# 2. CORE LOGIC
	# =================================================================

	def process_multiple_documents(files) -> str:
	if not files: return ""
	combined_text = ""
	for file in files:
	ext = Path(file.name).suffix.lower()
	try:
	if ext == ".pdf": loader = PyPDFLoader(file.name)
	elif ext == ".docx": loader = Docx2txtLoader(file.name)
	else: loader = TextLoader(file.name)
	docs = loader.load()
	combined_text += " ".join([d.page_content for d in docs]) + "\n\n"
	except Exception as e:
	print(f"Error loading {file.name}: {e}")

	splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
	chunks = splitter.split_text(combined_text)
	return " ".join(chunks)[:10000]

	def generate_timed_script(context: str, mode: str, duration: str):
	duration_map = {
	"1 Minute (Short)": "approx 150 words",
	"5 Minutes (Medium)": "approx 750 words",
	"10 Minutes (Detailed)": "approx 1500 words",
	"20 Minutes (Deep Dive)": "approx 3000 words"
	}
	target_len = duration_map.get(duration, "750 words")

	messages = [
	{"role": "system", "content": f"You are a master scriptwriter. Mode: {mode}. Length: {target_len}. Use 'Host:' and 'Expert:' for dialogue."},
	{"role": "user", "content": f"Analyze these documents and write the script:\n\n{context}"}
	]
	response = client.chat_completion(model=MODEL_ID, messages=messages, max_tokens=2500)
	return response.choices[0].message.content

	async def create_audio(script: str, mode: str, voice: str, speed: float):
	rate_val = int((speed - 1.0) * 100)
	rate_str = f"{rate_val:+d}%"

	if mode == "Podcast":
	combined = AudioSegment.empty()

	for line in script.split('\n'):
	line = line.strip()
	if not line: continue

	# Determine which voice to use
	if ":" in line:
	current_voice = voice if "Host" in line else "en-GB-SoniaNeural"
	text_to_speak = line.split(":", 1)[1].strip()
	else:
	# If no colon, Host reads the line instead of skipping it
	current_voice = voice
	text_to_speak = line

	if text_to_speak:
	communicate = edge_tts.Communicate(text_to_speak, current_voice, rate=rate_str)
	t_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
	await communicate.save(t_path)
	combined += AudioSegment.from_mp3(t_path) + AudioSegment.silent(duration=600)
	os.remove(t_path)

	out = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
	combined.export(out, format="mp3")
	return out
	else:
	communicate = edge_tts.Communicate(script, voice, rate=rate_str)
	tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
	await communicate.save(tmp.name)
	return tmp.name

	# =================================================================
	# 3. INTERFACE
	# =================================================================
	with gr.Blocks() as app:
	gr.HTML("<div style='text-align: center; padding: 20px;'><img src='https://cdn.pixabay.com/animation/2023/06/13/15/12/15-12-47-323_512.gif' style='width:50px;'><h1 style='color: #1f2937; font-weight: 900;'>AI Multi-Doc Studio</h1></div>")

	with gr.Row():
	with gr.Column(scale=1):
	with gr.Group(elem_classes="glass-panel"):
	gr.HTML("<h4>📚 Upload Documents</h4>")
	file_input = gr.File(label=None, file_count="multiple", file_types=[".pdf", ".docx", ".txt"])

	gr.HTML("<h4>⏱️ Duration & Style</h4>")
	duration_sel = gr.Dropdown(
	["1 Minute (Short)", "5 Minutes (Medium)", "10 Minutes (Detailed)", "20 Minutes (Deep Dive)"],
	value="5 Minutes (Medium)", label="Target Audio Length"
	)
	mode_sel = gr.Dropdown(["Podcast", "Storytelling", "Teaching", "Summary"], value="Podcast", label="Script Style")

	gr.HTML("<h4>🗣️ Voice Settings</h4>")
	voice_sel = gr.Dropdown([
	("Andrew (US - Male)", "en-US-AndrewNeural"),
	("Ava (US - Female)", "en-US-AvaNeural"),
	("Emma (UK - Female)", "en-GB-SoniaNeural"),
	("Aditi (IN - Female)", "en-IN-NeerjaNeural")
	], value="en-US-AndrewNeural", label="Voice Selection")
	speed_sld = gr.Slider(0.5, 1.5, value=1.0, label="Pace")

	btn = gr.Button("🚀 GENERATE STUDIO AUDIO", elem_classes="premium-btn")

	with gr.Column(scale=1):
	with gr.Group(elem_classes="glass-panel"):
	gr.HTML("<h4>📝 Generated Script</h4>")
	out_txt = gr.Textbox(label=None, lines=15)
	gr.HTML("<h4>🔊 Audio Output</h4>")
	out_aud = gr.Audio(label=None)

	async def run_pipeline(files, dur, mode, voice, speed):
	if not files: return "Please upload at least one file.", None
	ctx = process_multiple_documents(files)
	sc = generate_timed_script(ctx, mode, dur)
	aud = await create_audio(sc, mode, voice, speed)
	return sc, aud

	btn.click(run_pipeline, inputs=[file_input, duration_sel, mode_sel, voice_sel, speed_sld], outputs=[out_txt, out_aud])

	if __name__ == "__main__":
	app.launch(css=CUSTOM_CSS)