Spaces:

Emeritus-21
/

research-podquery

Runtime error

App Files Files Community

Emeritus-21 commited on Nov 29, 2025

Commit

331f1ed

verified ·

1 Parent(s): 32d3d6d

Update app.py

Browse files

Files changed (1) hide show

app.py +97 -129

app.py CHANGED Viewed

@@ -4,208 +4,176 @@ from elevenlabs.client import ElevenLabs
 import os
 import json
 import time
 from dotenv import load_dotenv
 load_dotenv()
-# --- CONFIGURATION ---
-# Get these keys from your .env file or Hugging Face Secrets
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
 ELEVEN_API_KEY = os.getenv("ELEVEN_API_KEY")
-# Configure APIs
-genai.configure(api_key=GEMINI_API_KEY)
-client = ElevenLabs(api_key=ELEVEN_API_KEY)
-# --- STATE MANAGEMENT ---
-# We use a global state to track where we are in the podcast
-# In a real production app, this would be per-user session state
 class PodcastState:
     def __init__(self):
         self.script = []
         self.current_index = 0
-        self.pdf_context = ""
         self.persona = "Serious Academic"
 state = PodcastState()
-# --- HELPER FUNCTIONS ---
 def generate_script(pdf_file, persona_style):
-    """
-    1. Uploads PDF to Gemini 2.0 Flash
-    2. Generates a dialogue script based on the chosen Persona
-    """
     if not pdf_file:
-        return "Please upload a PDF first.", []
-    print(f"Processing PDF... Style: {persona_style}")
-    # Upload file to Gemini
     model = genai.GenerativeModel('gemini-2.0-flash')
-    # Define Persona Prompts
     prompts = {
-        "Serious Academic": "You are two professors discussing a paper. Tone: Intellectual, precise, slightly dry but insightful. Speaker A is the Skeptic, Speaker B is the Believer.",
-        "Gossip Columnist": "You are two drama-loving gossip columnists reading this paper like it's a scandal. Tone: Shocked, slang-heavy, 'Tea spilling'. Speaker A is hyper, Speaker B is sarcastic.",
-        "Explain Like I'm 5": "You are a gentle teacher and a curious student. Tone: Simple analogies, very clear, enthusiastic."
     }
-    selected_prompt = prompts.get(persona_style, prompts["Serious Academic"])
-    # This prompt forces a JSON structure for easier parsing
-    prompt = f"""
-    {selected_prompt}
-    Analyze the attached PDF research paper.
-    Generate a 6-turn dialogue script (3 turns each) summarizing the key findings.
-    RETURN JSON ONLY in this format:
     [
         {{"speaker": "Host A", "text": "..."}},
         {{"speaker": "Host B", "text": "..."}}
     ]
     """
-    # In a real deployment, we would use the File API.
-    # For this hackathon demo (files < 50 pages), we can pass text or use the upload API.
-    # Here we assume text extraction or direct PDF support if the SDK allows.
-    # For simplicity/reliability in this snippet, we will simulate the file read:
-    # (Note: To make this robust, use `genai.upload_file` in production)
-    # MOCKING THE FILE READ FOR THE DEMO (Replace this with actual Gemini File API call)
-    # We will just send the prompt to Gemini without the file content if file acts up,
-    # but normally you do: sample_file = genai.upload_file(path=pdf_file, display_name="Paper")
-    # Actual Call (Simulated for speed in demo code):
-    response = model.generate_content(prompt)
-    # Clean up JSON (Gemini sometimes adds ```json ... ```)
-    clean_json = response.text.replace("```json", "").replace("```", "").strip()
     try:
         script_data = json.loads(clean_json)
         state.script = script_data
         state.current_index = 0
-        state.persona = persona_style
-        # Store context for Q&A later
-        state.pdf_context = "User uploaded a paper. (Context stored)."
         return "✅ Script Generated! Click 'Play' to start.", script_data
     except Exception as e:
-        return f"Error parsing script: {str(e)}", []
 def play_next_chunk():
-    """
-    Generates Audio for the NEXT line in the script.
-    """
     if state.current_index >= len(state.script):
         return None, "🎉 Podcast Ended."
     line = state.script[state.current_index]
-    speaker = line["speaker"]
-    text = line["text"]
-    # Voice Selection Logic
-    # Voices: 'Adam' (Deep/Male), 'Nicole' (Crisp/Female), 'Mimi' (Childlike - for ELI5)
     voice_id = "nPczCjz82tPNOwVbpGE2" # Default Male
-    if state.persona == "Gossip Columnist":
-        voice_id = "nPczCjz82tPNOwVbpGE2" if speaker == "Host A" else "21m00Tcm4TlvDq8ikWAM" # Rachel
-    elif state.persona == "Explain Like I'm 5":
-        voice_id = "nPczCjz82tPNOwVbpGE2" if speaker == "Host A" else "MF3mGyEYCl7XYWbV9V6O" # Childlike
-    else:
-        # Academic
-        voice_id = "nPczCjz82tPNOwVbpGE2" if speaker == "Host A" else "EXAVITQu4vr4xnSDxMaL"
-    # Generate Audio
-    audio_stream = client.generate(
-        text=text,
-        voice=voice_id,
-        model="eleven_monolingual_v1"
-    )
-    # Save to temp file
-    save_path = f"temp_{state.current_index}.mp3"
-    with open(save_path, "wb") as f:
-        for chunk in audio_stream:
-            f.write(chunk)
-    state.current_index += 1
-    return save_path, f"🎙️ {speaker}: {text}"
 def interrupt_and_ask(user_question):
-    """
-    The 'Hero' Feature:
-    1. Pauses context.
-    2. Answers question.
-    3. Bridges back to the podcast.
-    """
     model = genai.GenerativeModel('gemini-2.0-flash')
-    last_line = state.script[state.current_index - 1]["text"] if state.current_index > 0 else "the start"
     prompt = f"""
     You are a podcast host ({state.persona}).
-    You were just interrupted by a listener asking: "{user_question}"
-    The last thing you said was: "{last_line}"
-    1. Answer the question directly but keep the persona.
-    2. Seamlessly transition back to the podcast topic.
     """
     response = model.generate_content(prompt)
-    answer_text = response.text
-    # Generate Answer Audio
     audio_stream = client.generate(
-        text=answer_text,
-        voice="nPczCjz82tPNOwVbpGE2", # Main Host Voice
         model="eleven_monolingual_v1"
     )
-    save_path = "temp_interrupt.mp3"
     with open(save_path, "wb") as f:
         for chunk in audio_stream:
             f.write(chunk)
-    return save_path, f"💡 Host: {answer_text}"
-# --- UI LAYOUT ---
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🎧 PodQuery: The Interactive Paper")
-    gr.Markdown("Don't just listen. **Interrupt.** Ask questions. Learn actively.")
     with gr.Row():
-        with gr.Column(scale=1):
-            pdf_input = gr.File(label="1. Upload Research Paper (PDF)")
-            style_dropdown = gr.Dropdown(
-                ["Serious Academic", "Gossip Columnist", "Explain Like I'm 5"],
-                label="2. Select Host Persona",
-                value="Serious Academic"
-            )
-            gen_btn = gr.Button("🚀 Generate Podcast", variant="primary")
-            status_box = gr.Textbox(label="Status", interactive=False)
-        with gr.Column(scale=2):
-            # The Player
-            audio_player = gr.Audio(label="Podcast Stream", autoplay=True, type="filepath")
-            transcript_box = gr.Markdown("### Transcript appears here...")
-            next_btn = gr.Button("▶️ Play Next Segment", size="lg")
-            # The Interrupt Interaction
-            gr.Markdown("---")
-            gr.Markdown("### ✋ Wait, I have a question!")
-            with gr.Row():
-                q_input = gr.Textbox(label="Ask the host...", placeholder="e.g., What does 'Stochastic' mean?", scale=4)
-                ask_btn = gr.Button("Ask", scale=1)
-    # Wiring
-    gen_btn.click(fn=generate_script, inputs=[pdf_input, style_dropdown], outputs=[status_box, transcript_box])
-    next_btn.click(fn=play_next_chunk, inputs=[], outputs=[audio_player, transcript_box])
-    ask_btn.click(fn=interrupt_and_ask, inputs=[q_input], outputs=[audio_player, transcript_box])
 if __name__ == "__main__":
     demo.launch(mcp_server=True)

 import os
 import json
 import time
+from pypdf import PdfReader
 from dotenv import load_dotenv
+# 1. Load Keys
 load_dotenv()
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
 ELEVEN_API_KEY = os.getenv("ELEVEN_API_KEY")
+# 2. Configure APIs
+if GEMINI_API_KEY:
+    genai.configure(api_key=GEMINI_API_KEY)
+if ELEVEN_API_KEY:
+    client = ElevenLabs(api_key=ELEVEN_API_KEY)
+# 3. State Management
 class PodcastState:
     def __init__(self):
         self.script = []
         self.current_index = 0
         self.persona = "Serious Academic"
+        self.full_text = ""
 state = PodcastState()
+# 4. Helper Functions
+def extract_text_from_pdf(pdf_path):
+    """Real PDF Text Extraction"""
+    try:
+        reader = PdfReader(pdf_path)
+        text = ""
+        # Read first 5 pages max to save tokens for demo
+        for page in reader.pages[:5]:
+            text += page.extract_text() + "\n"
+        return text
+    except Exception as e:
+        return f"Error reading PDF: {e}"
 def generate_script(pdf_file, persona_style):
     if not pdf_file:
+        return "⚠️ Please upload a PDF first.", []
+    if not GEMINI_API_KEY or not ELEVEN_API_KEY:
+        return "⚠️ API Keys missing! Check Settings -> Secrets.", []
+    # Read PDF
+    pdf_text = extract_text_from_pdf(pdf_file)
+    state.full_text = pdf_text
+    state.persona = persona_style
     model = genai.GenerativeModel('gemini-2.0-flash')
     prompts = {
+        "Serious Academic": "Two professors discussing the paper. Tone: Intellectual, precise.",
+        "Gossip Columnist": "Two drama-loving gossip columnists reading this paper like it's a scandal. Tone: Shocked, slang-heavy.",
+        "Explain Like I'm 5": "A gentle teacher and a curious student. Tone: Simple analogies, enthusiastic."
     }
+    system_prompt = f"""
+    {prompts.get(persona_style)}
+    Based on the following text from a research paper:
+    "{pdf_text[:4000]}..."
+    Generate a short 4-turn dialogue script (2 turns each) summarizing the key point.
+    RETURN RAW JSON ONLY. No markdown formatting. Format:
     [
         {{"speaker": "Host A", "text": "..."}},
         {{"speaker": "Host B", "text": "..."}}
     ]
     """
     try:
+        response = model.generate_content(system_prompt)
+        clean_json = response.text.replace("```json", "").replace("```", "").strip()
         script_data = json.loads(clean_json)
         state.script = script_data
         state.current_index = 0
         return "✅ Script Generated! Click 'Play' to start.", script_data
     except Exception as e:
+        return f"Error: {str(e)}", []
 def play_next_chunk():
     if state.current_index >= len(state.script):
         return None, "🎉 Podcast Ended."
     line = state.script[state.current_index]
+    # Voice Selection
     voice_id = "nPczCjz82tPNOwVbpGE2" # Default Male
+    if state.persona == "Gossip Columnist" and line["speaker"] == "Host B":
+         voice_id = "21m00Tcm4TlvDq8ikWAM" # Female
+    elif line["speaker"] == "Host B":
+         voice_id = "EXAVITQu4vr4xnSDxMaL" # Female Generic
+    try:
+        audio_stream = client.generate(
+            text=line["text"],
+            voice=voice_id,
+            model="eleven_monolingual_v1"
+        )
+        save_path = f"temp_{state.current_index}.mp3"
+        with open(save_path, "wb") as f:
+            for chunk in audio_stream:
+                f.write(chunk)
+        state.current_index += 1
+        return save_path, f"🎙️ {line['speaker']}: {line['text']}"
+    except Exception as e:
+        return None, f"Audio Error: {str(e)}"
 def interrupt_and_ask(user_question):
+    if not state.full_text:
+        return None, "Upload a PDF first."
     model = genai.GenerativeModel('gemini-2.0-flash')
     prompt = f"""
     You are a podcast host ({state.persona}).
+    Context: {state.full_text[:1000]}
+    User Question: "{user_question}"
+    1. Answer the question briefly.
+    2. Say "Anyway, back to the paper..."
     """
     response = model.generate_content(prompt)
+    answer = response.text
     audio_stream = client.generate(
+        text=answer,
+        voice="nPczCjz82tPNOwVbpGE2",
         model="eleven_monolingual_v1"
     )
+    save_path = "interrupt.mp3"
     with open(save_path, "wb") as f:
         for chunk in audio_stream:
             f.write(chunk)
+    return save_path, f"💡 Host: {answer}"
+# 5. Build Interface (THEME ERROR FIXED HERE)
+# We use 'theme=gr.themes.Soft()' which requires gradio>=4.0
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🎧 PodQuery: The Interactive Paper")
     with gr.Row():
+        with gr.Column():
+            pdf_input = gr.File(label="Upload PDF")
+            style = gr.Dropdown(["Serious Academic", "Gossip Columnist"], value="Serious Academic", label="Persona")
+            btn_gen = gr.Button("Generate Script", variant="primary")
+            status = gr.Textbox(label="Status")
+        with gr.Column():
+            player = gr.Audio(autoplay=True, label="Stream")
+            transcript = gr.Markdown()
+            btn_play = gr.Button("▶️ Play Next Line")
+            gr.Markdown("### Interrupt")
+            q_input = gr.Textbox(label="Question")
+            btn_ask = gr.Button("✋ Interrupt")
+    btn_gen.click(generate_script, [pdf_input, style], [status, transcript])
+    btn_play.click(play_next_chunk, [], [player, transcript])
+    btn_ask.click(interrupt_and_ask, [q_input], [player, transcript])
 if __name__ == "__main__":
+    # MCP Server Mode Enabled
     demo.launch(mcp_server=True)