| # ============================================================= | |
| # Lecture β English Podcast Generator | |
| # β’ Script: HF Inference API (Qwen/Qwen2.5-Coder-32B-Instruct) | |
| # β’ Audio: MeloTTS (English) | |
| # ============================================================= | |
| import io | |
| import re | |
| import tempfile | |
| import textwrap | |
| from pathlib import Path | |
| from typing import List | |
| import gradio as gr | |
| from PyPDF2 import PdfReader | |
| from huggingface_hub import InferenceClient | |
| import torch | |
| import nltk | |
| nltk.download('averaged_perceptron_tagger_eng') | |
| from melo.api import TTS | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 1) Setup HF client & MeloTTS for English | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| hf_client = InferenceClient() # anonymous/public access | |
| device = 'cuda' if torch.cuda.is_available() else 'cpu' | |
| melo_en = TTS(language='EN', device=device) | |
| speaker_ids = melo_en.hps.data.spk2id | |
| default_speaker = next(iter(speaker_ids.keys())) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 2) Prompt template | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| PROMPT = textwrap.dedent(""" | |
| You are producing a lively two-host educational podcast in English. | |
| Summarize the following lecture content into a dialogue of approximately 300 words. | |
| Make it engaging: hosts ask questions, clarify ideas with analogies, | |
| and wrap up with a concise recap. Preserve technical accuracy. | |
| Use Markdown for host names (e.g., **Host 1:**). | |
| ### Lecture Content | |
| {content} | |
| """) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 3) Helpers | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def extract_pdf_text(pdf_path: str) -> str: | |
| reader = PdfReader(pdf_path) | |
| return "\n".join(page.extract_text() or "" for page in reader.pages) | |
| def split_to_chunks(text: str, limit: int = 280) -> List[str]: | |
| sents = [s.strip() for s in re.split(r"(?<=[.!?])\s+", text) if s.strip()] | |
| chunks, curr = [], "" | |
| for sent in sents: | |
| if curr and len(curr) + len(sent) + 1 > limit: | |
| chunks.append(curr) | |
| curr = sent | |
| else: | |
| curr = f"{curr} {sent}".strip() if curr else sent | |
| if curr: | |
| chunks.append(curr) | |
| return chunks | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 4) Main generate function | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def generate_podcast(lecture_pdf: gr.File): | |
| if not lecture_pdf: | |
| raise gr.Error("Please upload a lecture PDF.") | |
| # 1οΈβ£ Extract & prompt | |
| raw = extract_pdf_text(lecture_pdf.name) | |
| prompt = PROMPT.format(content=raw) | |
| # 2οΈβ£ HF text generation | |
| out = hf_client.text_generation( | |
| inputs=prompt, | |
| model="Qwen/Qwen2.5-Coder-32B-Instruct", | |
| parameters={"max_new_tokens": 512, "temperature": 0.5} | |
| ) | |
| # InferenceClient returns a dict or a str depending on version | |
| script = out.get("generated_text") if isinstance(out, dict) else out | |
| # 3οΈβ£ MeloTTS audio | |
| tmpdir = Path(tempfile.mkdtemp()) | |
| bio = io.BytesIO() | |
| progress = gr.Progress() | |
| # use the default English speaker | |
| melo_en.tts_to_file( | |
| script, | |
| speaker_ids[default_speaker], | |
| bio, | |
| speed=1.0, | |
| pbar=progress.tqdm, | |
| format="wav" | |
| ) | |
| audio_bytes = bio.getvalue() | |
| return script, audio_bytes | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 5) Gradio UI | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## Lecture β English Podcast") | |
| pdf_in = gr.File(label="Upload Lecture PDF", file_types=[".pdf"]) | |
| btn = gr.Button("Generate Podcast") | |
| script_md = gr.Markdown(label="Podcast Script") | |
| audio_out = gr.Audio(label="Podcast Audio", type="bytes") | |
| btn.click(fn=generate_podcast, inputs=[pdf_in], outputs=[script_md, audio_out]) | |
| demo.launch() | |