Spaces:
Runtime error
Runtime error
File size: 5,222 Bytes
68dff12 331f1ed 68dff12 198f8e5 68dff12 331f1ed 4863b2c 68dff12 198f8e5 68dff12 4863b2c 331f1ed 68dff12 198f8e5 331f1ed 198f8e5 331f1ed 198f8e5 331f1ed 68dff12 198f8e5 4863b2c 68dff12 198f8e5 4863b2c 198f8e5 331f1ed 4863b2c 41bab03 4863b2c 41bab03 68dff12 41bab03 331f1ed 4863b2c 41bab03 4863b2c 41bab03 198f8e5 68dff12 4863b2c 68dff12 41bab03 68dff12 198f8e5 4863b2c 41bab03 4863b2c 198f8e5 68dff12 198f8e5 41bab03 198f8e5 4863b2c 198f8e5 41bab03 4863b2c 198f8e5 4863b2c 331f1ed 4863b2c 331f1ed 41bab03 4863b2c 331f1ed 41bab03 198f8e5 4863b2c 331f1ed 198f8e5 331f1ed 41bab03 68dff12 4863b2c 331f1ed 198f8e5 41bab03 198f8e5 68dff12 198f8e5 41bab03 198f8e5 4863b2c 41bab03 198f8e5 41bab03 4863b2c 198f8e5 41bab03 198f8e5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 |
import gradio as gr
import google.generativeai as genai
from elevenlabs.client import ElevenLabs
import os
import json
from pypdf import PdfReader
from dotenv import load_dotenv
# 1️⃣ Load API Keys
load_dotenv()
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
ELEVEN_API_KEY = os.getenv("ELEVEN_API_KEY")
if GEMINI_API_KEY:
genai.configure(api_key=GEMINI_API_KEY)
if ELEVEN_API_KEY:
client = ElevenLabs(api_key=ELEVEN_API_KEY)
# 2️⃣ Podcast State
class PodcastState:
def __init__(self):
self.script = []
self.current_index = 0
self.persona = "Serious Academic"
self.full_text = ""
state = PodcastState()
# 3️⃣ PDF Extraction
def extract_text_from_pdf(pdf_file):
try:
reader = PdfReader(pdf_file)
text = ""
for page in reader.pages[:5]:
page_text = page.extract_text()
if page_text:
text += page_text + "\n"
return text
except Exception as e:
return f"Error reading PDF: {e}"
# 4️⃣ Generate Script
def generate_script(pdf_file, persona_style):
if not pdf_file:
return "⚠️ Upload PDF first.", {}
if not GEMINI_API_KEY or not ELEVEN_API_KEY:
return "⚠️ API Keys missing in HF Secrets!", {}
pdf_text = extract_text_from_pdf(pdf_file)
state.full_text = pdf_text
state.persona = persona_style
prompts = {
"Serious Academic": "Two professors discussing the paper. Tone: Intellectual.",
"Gossip Columnist": "Two gossip hosts reacting dramatically.",
}
system_prompt = f"""
{prompts.get(persona_style)}
Based on the paper:
"{pdf_text[:4000]}..."
Generate a short 4-turn dialogue script.
Return JSON ONLY:
[
{{"speaker":"Host A","text":"..."}},
{{"speaker":"Host B","text":"..."}}
]
"""
try:
model = genai.GenerativeModel("gemini-2.0-flash")
response = model.generate_content(system_prompt)
clean_json = response.text.replace("```json", "").replace("```", "").strip()
script = json.loads(clean_json)
state.script = script
state.current_index = 0
return "✅ Script ready!", script
except Exception as e:
return f"Error generating script: {e}", {}
# 5️⃣ Play next line
def play_next_chunk():
if state.current_index >= len(state.script):
return None, "🎉 Podcast finished."
line = state.script[state.current_index]
voice_id = "nPczCjz82tPNOwVbpGE2"
if state.persona == "Gossip Columnist" and line["speaker"] == "Host B":
voice_id = "21m00Tcm4TlvDq8ikWAM"
try:
audio_stream = client.generate(
text=line["text"],
voice=voice_id,
model="eleven_monolingual_v1"
)
save_path = f"temp_{state.current_index}.mp3"
with open(save_path, "wb") as f:
for chunk in audio_stream:
f.write(chunk)
state.current_index += 1
return save_path, f"{line['speaker']}: {line['text']}"
except Exception as e:
return None, f"Audio error: {e}"
# 6️⃣ Interrupt & Ask
def interrupt_and_ask(question):
if not state.full_text:
return None, "Upload a PDF first."
model = genai.GenerativeModel("gemini-2.0-flash")
prompt = f"""
Persona: {state.persona}
Context: {state.full_text[:1000]}
User Question: {question}
Answer briefly, then say "Anyway, back to the paper..."
"""
try:
response = model.generate_content(prompt)
answer = response.text
audio_stream = client.generate(
text=answer,
voice="nPczCjz82tPNOwVbpGE2",
model="eleven_monolingual_v1"
)
save_path = "interrupt.mp3"
with open(save_path, "wb") as f:
for chunk in audio_stream:
f.write(chunk)
return save_path, answer
except Exception as e:
return None, f"Error: {e}"
# 7️⃣ Build Gradio 5.7 UI
with gr.Blocks() as demo:
gr.Markdown("# 🎧 PodQuery — Research Paper Podcast Generator")
with gr.Row():
with gr.Column():
pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
persona = gr.Dropdown(
["Serious Academic", "Gossip Columnist"],
value="Serious Academic",
label="Persona Style"
)
btn_gen = gr.Button("Generate Podcast Script")
status = gr.Textbox(label="Status")
script_display = gr.JSON(label="Generated Script")
with gr.Column():
player = gr.Audio(label="Audio Output", autoplay=True)
transcript = gr.Textbox(label="Transcript")
btn_play = gr.Button("▶️ Play Next Line")
q_input = gr.Textbox(label="Ask a Question")
btn_interrupt = gr.Button("✋ Interrupt Podcast")
# Bind events
btn_gen.click(generate_script, [pdf_input, persona], [status, script_display])
btn_play.click(play_next_chunk, [], [player, transcript])
btn_interrupt.click(interrupt_and_ask, [q_input], [player, transcript])
# 8️⃣ Launch
if __name__ == "__main__":
demo.launch()
|