Spaces:

Emeritus-21
/

research-podquery

Runtime error

App Files Files Community

Emeritus-21 commited on Nov 29, 2025

Commit

41bab03

verified ·

1 Parent(s): 78049e5

Update app.py

Browse files

Files changed (1) hide show

app.py +119 -122

app.py CHANGED Viewed

@@ -3,177 +3,174 @@ import google.generativeai as genai
 from elevenlabs.client import ElevenLabs
 import os
 import json
-import time
 from pypdf import PdfReader
 from dotenv import load_dotenv
-# 1. Load Keys
 load_dotenv()
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
 ELEVEN_API_KEY = os.getenv("ELEVEN_API_KEY")
-# 2. Configure APIs
 if GEMINI_API_KEY:
     genai.configure(api_key=GEMINI_API_KEY)
-if ELEVEN_API_KEY:
-    client = ElevenLabs(api_key=ELEVEN_API_KEY)
-# 3. State Management
 class PodcastState:
     def __init__(self):
         self.script = []
-        self.current_index = 0
-        self.persona = "Serious Academic"
         self.full_text = ""
 state = PodcastState()
-# 4. Helper Functions
-def extract_text_from_pdf(pdf_path):
-    """Real PDF Text Extraction"""
     try:
-        reader = PdfReader(pdf_path)
         text = ""
-        # Read first 5 pages max to save tokens for demo
         for page in reader.pages[:5]:
             text += page.extract_text() + "\n"
         return text
     except Exception as e:
         return f"Error reading PDF: {e}"
-def generate_script(pdf_file, persona_style):
     if not pdf_file:
-        return "⚠️ Please upload a PDF first.", []
-    if not GEMINI_API_KEY or not ELEVEN_API_KEY:
-        return "⚠️ API Keys missing! Check Settings -> Secrets.", []
-    # Read PDF
     pdf_text = extract_text_from_pdf(pdf_file)
     state.full_text = pdf_text
-    state.persona = persona_style
-    model = genai.GenerativeModel('gemini-2.0-flash')
-    prompts = {
-        "Serious Academic": "Two professors discussing the paper. Tone: Intellectual, precise.",
-        "Gossip Columnist": "Two drama-loving gossip columnists reading this paper like it's a scandal. Tone: Shocked, slang-heavy.",
-        "Explain Like I'm 5": "A gentle teacher and a curious student. Tone: Simple analogies, enthusiastic."
     }
     system_prompt = f"""
-    {prompts.get(persona_style)}
-    Based on the following text from a research paper:
-    "{pdf_text[:4000]}..."
-    Generate a short 4-turn dialogue script (2 turns each) summarizing the key point.
-    RETURN RAW JSON ONLY. No markdown formatting. Format:
     [
         {{"speaker": "Host A", "text": "..."}},
         {{"speaker": "Host B", "text": "..."}}
     ]
     """
     try:
-        response = model.generate_content(system_prompt)
-        clean_json = response.text.replace("```json", "").replace("```", "").strip()
-        script_data = json.loads(clean_json)
-        state.script = script_data
-        state.current_index = 0
-        return "✅ Script Generated! Click 'Play' to start.", script_data
     except Exception as e:
-        return f"Error: {str(e)}", []
-def play_next_chunk():
-    if state.current_index >= len(state.script):
-        return None, "🎉 Podcast Ended."
-    line = state.script[state.current_index]
-    # Voice Selection
-    voice_id = "nPczCjz82tPNOwVbpGE2" # Default Male
-    if state.persona == "Gossip Columnist" and line["speaker"] == "Host B":
-         voice_id = "21m00Tcm4TlvDq8ikWAM" # Female
-    elif line["speaker"] == "Host B":
-         voice_id = "EXAVITQu4vr4xnSDxMaL" # Female Generic
     try:
-        audio_stream = client.generate(
-            text=line["text"],
-            voice=voice_id,
-            model="eleven_monolingual_v1"
-        )
-        save_path = f"temp_{state.current_index}.mp3"
-        with open(save_path, "wb") as f:
-            for chunk in audio_stream:
                 f.write(chunk)
-        state.current_index += 1
-        return save_path, f"🎙️ {line['speaker']}: {line['text']}"
     except Exception as e:
-        return None, f"Audio Error: {str(e)}"
-def interrupt_and_ask(user_question):
     if not state.full_text:
         return None, "Upload a PDF first."
-    model = genai.GenerativeModel('gemini-2.0-flash')
     prompt = f"""
-    You are a podcast host ({state.persona}).
     Context: {state.full_text[:1000]}
-    User Question: "{user_question}"
-    1. Answer the question briefly.
     2. Say "Anyway, back to the paper..."
     """
-    response = model.generate_content(prompt)
-    answer = response.text
-    audio_stream = client.generate(
-        text=answer,
-        voice="nPczCjz82tPNOwVbpGE2",
-        model="eleven_monolingual_v1"
-    )
-    save_path = "interrupt.mp3"
-    with open(save_path, "wb") as f:
-        for chunk in audio_stream:
             f.write(chunk)
-    return save_path, f"💡 Host: {answer}"
-# 5. Build Interface (THEME ERROR FIXED HERE)
-# We use 'theme=gr.themes.Soft()' which requires gradio>=4.0
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🎧 PodQuery: The Interactive Paper")
-    with gr.Row():
-        with gr.Column():
-            pdf_input = gr.File(label="Upload PDF")
-            style = gr.Dropdown(["Serious Academic", "Gossip Columnist"], value="Serious Academic", label="Persona")
-            btn_gen = gr.Button("Generate Script", variant="primary")
-            status = gr.Textbox(label="Status")
-        with gr.Column():
-            player = gr.Audio(autoplay=True, label="Stream")
-            transcript = gr.Markdown()
-            btn_play = gr.Button("▶️ Play Next Line")
-            gr.Markdown("### Interrupt")
-            q_input = gr.Textbox(label="Question")
-            btn_ask = gr.Button("✋ Interrupt")
-    btn_gen.click(generate_script, [pdf_input, style], [status, transcript])
-    btn_play.click(play_next_chunk, [], [player, transcript])
-    btn_ask.click(interrupt_and_ask, [q_input], [player, transcript])
-if __name__ == "__main__":
-    # MCP Server Mode Enabled
-    demo.launch(mcp_server=True)

 from elevenlabs.client import ElevenLabs
 import os
 import json
 from pypdf import PdfReader
 from dotenv import load_dotenv
+# ---------------------------------------
+# Load API Keys
+# ---------------------------------------
 load_dotenv()
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
 ELEVEN_API_KEY = os.getenv("ELEVEN_API_KEY")
 if GEMINI_API_KEY:
     genai.configure(api_key=GEMINI_API_KEY)
+client = ElevenLabs(api_key=ELEVEN_API_KEY) if ELEVEN_API_KEY else None
+# ---------------------------------------
+# Podcast State
+# ---------------------------------------
 class PodcastState:
     def __init__(self):
         self.script = []
+        self.index = 0
+        self.persona = ""
         self.full_text = ""
 state = PodcastState()
+# ---------------------------------------
+# Helpers
+# ---------------------------------------
+def extract_text_from_pdf(file):
     try:
+        reader = PdfReader(file)
         text = ""
         for page in reader.pages[:5]:
             text += page.extract_text() + "\n"
         return text
     except Exception as e:
         return f"Error reading PDF: {e}"
+def generate_script(pdf_file, persona):
     if not pdf_file:
+        return "⚠️ Upload a PDF first.", []
     pdf_text = extract_text_from_pdf(pdf_file)
     state.full_text = pdf_text
+    state.persona = persona
+    model = genai.GenerativeModel("gemini-2.0-flash")
+    persona_prompts = {
+        "Serious Academic": "Two intelligent professors discussing the paper.",
+        "Gossip Columnist": "Two gossip hosts reacting dramatically.",
     }
     system_prompt = f"""
+    {persona_prompts.get(persona)}
+    Based on the text:
+    "{pdf_text[:4000]}"
+    Produce raw JSON only:
     [
         {{"speaker": "Host A", "text": "..."}},
         {{"speaker": "Host B", "text": "..."}}
     ]
     """
     try:
+        res = model.generate_content(system_prompt)
+        clean = res.text.replace("```json", "").replace("```", "")
+        script = json.loads(clean)
+        state.script = script
+        state.index = 0
+        return "✅ Script generated!", script
     except Exception as e:
+        return f"Error: {e}", []
+def play_next():
+    if state.index >= len(state.script):
+        return None, "🎉 Podcast complete."
+    line = state.script[state.index]
+    text = line["text"]
+    # choose voice
+    voice = "nPczCjz82tPNOwVbpGE2"  # default
+    if state.persona == "Gossip Columnist":
+        voice = "EXAVITQu4vr4xnSDxMaL" if line["speaker"] == "Host B" else voice
+    audio_path = f"audio_{state.index}.mp3"
     try:
+        stream = client.generate(text=text, voice=voice, model="eleven_monolingual_v1")
+        with open(audio_path, "wb") as f:
+            for chunk in stream:
                 f.write(chunk)
+        state.index += 1
+        return audio_path, f"🎙️ {line['speaker']}: {text}"
     except Exception as e:
+        return None, f"Audio error: {e}"
+def interrupt(question):
     if not state.full_text:
         return None, "Upload a PDF first."
+    model = genai.GenerativeModel("gemini-2.0-flash")
     prompt = f"""
+    You are a podcast host ({state.persona}).
     Context: {state.full_text[:1000]}
+    User asked: "{question}"
+    1. Answer the question.
     2. Say "Anyway, back to the paper..."
     """
+    ans = model.generate_content(prompt).text
+    audio_path = "interrupt.mp3"
+    stream = client.generate(text=ans, voice="nPczCjz82tPNOwVbpGE2", model="eleven_monolingual_v1")
+    with open(audio_path, "wb") as f:
+        for chunk in stream:
             f.write(chunk)
+    return audio_path, ans
+# ---------------------------------------
+# GRADIO 6 APP
+# ---------------------------------------
+with gr.App() as app:
+    gr.Markdown("# 🎧 PodQuery (Gradio 6 Edition)")
+    pdf_input = gr.File(label="Upload PDF")
+    persona = gr.Dropdown(
+        ["Serious Academic", "Gossip Columnist"],
+        value="Serious Academic",
+        label="Persona"
+    )
+    btn_gen = gr.Button("Generate Script")
+    status = gr.Textbox(label="Status")
+    script_box = gr.JSON(label="Generated Script")
+    player = gr.Audio(label="Audio", autoplay=True)
+    transcript = gr.Textbox(label="Transcript")
+    btn_play = gr.Button("▶️ Play Next Line")
+    q_input = gr.Textbox(label="Interrupt question")
+    btn_interrupt = gr.Button("✋ Interrupt Podcast")
+    # wiring events
+    btn_gen.subscribe(generate_script, [pdf_input, persona], [status, script_box])
+    btn_play.subscribe(play_next, [], [player, transcript])
+    btn_interrupt.subscribe(interrupt, [q_input], [player, transcript])
+app.launch()