Spaces:

HaiderAUT
/

PodCastIt

Build error

App Files Files Community

PodCastIt / app.py

HaiderAUT

Update app.py

03ef672 verified 9 months ago

raw

history blame contribute delete

5.37 kB

	# =============================================================
	# Lecture → English Podcast Generator
	# • Script: HF Inference API (Qwen/Qwen2.5-Coder-32B-Instruct)
	# • Audio: MeloTTS (English)
	# =============================================================

	import io
	import re
	import tempfile
	import textwrap
	from pathlib import Path
	from typing import List

	import gradio as gr
	from PyPDF2 import PdfReader
	from huggingface_hub import InferenceClient

	import torch
	import nltk
	nltk.download('averaged_perceptron_tagger_eng')
	from melo.api import TTS

	# ────────────────────────────────────────────────────────────────────
	# 1) Setup HF client & MeloTTS for English
	# ────────────────────────────────────────────────────────────────────
	hf_client = InferenceClient() # anonymous/public access

	device = 'cuda' if torch.cuda.is_available() else 'cpu'
	melo_en = TTS(language='EN', device=device)
	speaker_ids = melo_en.hps.data.spk2id
	default_speaker = next(iter(speaker_ids.keys()))

	# ────────────────────────────────────────────────────────────────────
	# 2) Prompt template
	# ────────────────────────────────────────────────────────────────────
	PROMPT = textwrap.dedent("""
	You are producing a lively two-host educational podcast in English.
	Summarize the following lecture content into a dialogue of approximately 300 words.
	Make it engaging: hosts ask questions, clarify ideas with analogies,
	and wrap up with a concise recap. Preserve technical accuracy.
	Use Markdown for host names (e.g., Host 1:).

	### Lecture Content
	{content}
	""")

	# ────────────────────────────────────────────────────────────────────
	# 3) Helpers
	# ────────────────────────────────────────────────────────────────────
	def extract_pdf_text(pdf_path: str) -> str:
	reader = PdfReader(pdf_path)
	return "\n".join(page.extract_text() or "" for page in reader.pages)

	def split_to_chunks(text: str, limit: int = 280) -> List[str]:
	sents = [s.strip() for s in re.split(r"(?<=[.!?])\s+", text) if s.strip()]
	chunks, curr = [], ""
	for sent in sents:
	if curr and len(curr) + len(sent) + 1 > limit:
	chunks.append(curr)
	curr = sent
	else:
	curr = f"{curr} {sent}".strip() if curr else sent
	if curr:
	chunks.append(curr)
	return chunks

	# ────────────────────────────────────────────────────────────────────
	# 4) Main generate function
	# ────────────────────────────────────────────────────────────────────
	def generate_podcast(lecture_pdf: gr.File):
	if not lecture_pdf:
	raise gr.Error("Please upload a lecture PDF.")
	# 1️⃣ Extract & prompt
	raw = extract_pdf_text(lecture_pdf.name)
	prompt = PROMPT.format(content=raw)
	# 2️⃣ HF text generation
	out = hf_client.text_generation(
	inputs=prompt,
	model="Qwen/Qwen2.5-Coder-32B-Instruct",
	parameters={"max_new_tokens": 512, "temperature": 0.5}
	)
	# InferenceClient returns a dict or a str depending on version
	script = out.get("generated_text") if isinstance(out, dict) else out

	# 3️⃣ MeloTTS audio
	tmpdir = Path(tempfile.mkdtemp())
	bio = io.BytesIO()
	progress = gr.Progress()
	# use the default English speaker
	melo_en.tts_to_file(
	script,
	speaker_ids[default_speaker],
	bio,
	speed=1.0,
	pbar=progress.tqdm,
	format="wav"
	)
	audio_bytes = bio.getvalue()

	return script, audio_bytes

	# ────────────────────────────────────────────────────────────────────
	# 5) Gradio UI
	# ────────────────────────────────────────────────────────────────────
	with gr.Blocks() as demo:
	gr.Markdown("## Lecture → English Podcast")
	pdf_in = gr.File(label="Upload Lecture PDF", file_types=[".pdf"])
	btn = gr.Button("Generate Podcast")
	script_md = gr.Markdown(label="Podcast Script")
	audio_out = gr.Audio(label="Podcast Audio", type="bytes")
	btn.click(fn=generate_podcast, inputs=[pdf_in], outputs=[script_md, audio_out])

	demo.launch()