Emeritus-21 commited on
Commit
41bab03
·
verified ·
1 Parent(s): 78049e5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +119 -122
app.py CHANGED
@@ -3,177 +3,174 @@ import google.generativeai as genai
3
  from elevenlabs.client import ElevenLabs
4
  import os
5
  import json
6
- import time
7
  from pypdf import PdfReader
8
  from dotenv import load_dotenv
9
 
10
- # 1. Load Keys
 
 
 
11
  load_dotenv()
12
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
13
  ELEVEN_API_KEY = os.getenv("ELEVEN_API_KEY")
14
 
15
- # 2. Configure APIs
16
  if GEMINI_API_KEY:
17
  genai.configure(api_key=GEMINI_API_KEY)
18
- if ELEVEN_API_KEY:
19
- client = ElevenLabs(api_key=ELEVEN_API_KEY)
20
 
21
- # 3. State Management
 
 
 
 
 
22
  class PodcastState:
23
  def __init__(self):
24
  self.script = []
25
- self.current_index = 0
26
- self.persona = "Serious Academic"
27
  self.full_text = ""
28
 
29
  state = PodcastState()
30
 
31
- # 4. Helper Functions
32
 
33
- def extract_text_from_pdf(pdf_path):
34
- """Real PDF Text Extraction"""
 
 
35
  try:
36
- reader = PdfReader(pdf_path)
37
  text = ""
38
- # Read first 5 pages max to save tokens for demo
39
  for page in reader.pages[:5]:
40
  text += page.extract_text() + "\n"
41
  return text
42
  except Exception as e:
43
  return f"Error reading PDF: {e}"
44
 
45
- def generate_script(pdf_file, persona_style):
 
46
  if not pdf_file:
47
- return "⚠️ Please upload a PDF first.", []
48
-
49
- if not GEMINI_API_KEY or not ELEVEN_API_KEY:
50
- return "⚠️ API Keys missing! Check Settings -> Secrets.", []
51
 
52
- # Read PDF
53
  pdf_text = extract_text_from_pdf(pdf_file)
54
  state.full_text = pdf_text
55
- state.persona = persona_style
56
-
57
- model = genai.GenerativeModel('gemini-2.0-flash')
58
-
59
- prompts = {
60
- "Serious Academic": "Two professors discussing the paper. Tone: Intellectual, precise.",
61
- "Gossip Columnist": "Two drama-loving gossip columnists reading this paper like it's a scandal. Tone: Shocked, slang-heavy.",
62
- "Explain Like I'm 5": "A gentle teacher and a curious student. Tone: Simple analogies, enthusiastic."
63
  }
64
-
65
  system_prompt = f"""
66
- {prompts.get(persona_style)}
67
-
68
- Based on the following text from a research paper:
69
- "{pdf_text[:4000]}..."
70
-
71
- Generate a short 4-turn dialogue script (2 turns each) summarizing the key point.
72
- RETURN RAW JSON ONLY. No markdown formatting. Format:
73
  [
74
  {{"speaker": "Host A", "text": "..."}},
75
  {{"speaker": "Host B", "text": "..."}}
76
  ]
77
  """
78
-
79
  try:
80
- response = model.generate_content(system_prompt)
81
- clean_json = response.text.replace("```json", "").replace("```", "").strip()
82
- script_data = json.loads(clean_json)
83
-
84
- state.script = script_data
85
- state.current_index = 0
86
-
87
- return "✅ Script Generated! Click 'Play' to start.", script_data
 
88
  except Exception as e:
89
- return f"Error: {str(e)}", []
90
-
91
- def play_next_chunk():
92
- if state.current_index >= len(state.script):
93
- return None, "🎉 Podcast Ended."
94
-
95
- line = state.script[state.current_index]
96
-
97
- # Voice Selection
98
- voice_id = "nPczCjz82tPNOwVbpGE2" # Default Male
99
- if state.persona == "Gossip Columnist" and line["speaker"] == "Host B":
100
- voice_id = "21m00Tcm4TlvDq8ikWAM" # Female
101
- elif line["speaker"] == "Host B":
102
- voice_id = "EXAVITQu4vr4xnSDxMaL" # Female Generic
 
 
103
 
104
  try:
105
- audio_stream = client.generate(
106
- text=line["text"],
107
- voice=voice_id,
108
- model="eleven_monolingual_v1"
109
- )
110
-
111
- save_path = f"temp_{state.current_index}.mp3"
112
- with open(save_path, "wb") as f:
113
- for chunk in audio_stream:
114
  f.write(chunk)
115
-
116
- state.current_index += 1
117
- return save_path, f"🎙️ {line['speaker']}: {line['text']}"
 
118
  except Exception as e:
119
- return None, f"Audio Error: {str(e)}"
 
120
 
121
- def interrupt_and_ask(user_question):
122
  if not state.full_text:
123
  return None, "Upload a PDF first."
124
 
125
- model = genai.GenerativeModel('gemini-2.0-flash')
126
-
127
  prompt = f"""
128
- You are a podcast host ({state.persona}).
129
  Context: {state.full_text[:1000]}
130
- User Question: "{user_question}"
131
-
132
- 1. Answer the question briefly.
133
  2. Say "Anyway, back to the paper..."
134
  """
135
-
136
- response = model.generate_content(prompt)
137
- answer = response.text
138
-
139
- audio_stream = client.generate(
140
- text=answer,
141
- voice="nPczCjz82tPNOwVbpGE2",
142
- model="eleven_monolingual_v1"
143
- )
144
-
145
- save_path = "interrupt.mp3"
146
- with open(save_path, "wb") as f:
147
- for chunk in audio_stream:
148
  f.write(chunk)
149
-
150
- return save_path, f"💡 Host: {answer}"
151
-
152
- # 5. Build Interface (THEME ERROR FIXED HERE)
153
- # We use 'theme=gr.themes.Soft()' which requires gradio>=4.0
154
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
155
- gr.Markdown("# 🎧 PodQuery: The Interactive Paper")
156
-
157
- with gr.Row():
158
- with gr.Column():
159
- pdf_input = gr.File(label="Upload PDF")
160
- style = gr.Dropdown(["Serious Academic", "Gossip Columnist"], value="Serious Academic", label="Persona")
161
- btn_gen = gr.Button("Generate Script", variant="primary")
162
- status = gr.Textbox(label="Status")
163
-
164
- with gr.Column():
165
- player = gr.Audio(autoplay=True, label="Stream")
166
- transcript = gr.Markdown()
167
- btn_play = gr.Button("▶️ Play Next Line")
168
-
169
- gr.Markdown("### Interrupt")
170
- q_input = gr.Textbox(label="Question")
171
- btn_ask = gr.Button("✋ Interrupt")
172
-
173
- btn_gen.click(generate_script, [pdf_input, style], [status, transcript])
174
- btn_play.click(play_next_chunk, [], [player, transcript])
175
- btn_ask.click(interrupt_and_ask, [q_input], [player, transcript])
176
-
177
- if __name__ == "__main__":
178
- # MCP Server Mode Enabled
179
- demo.launch(mcp_server=True)
 
 
 
 
 
 
3
  from elevenlabs.client import ElevenLabs
4
  import os
5
  import json
 
6
  from pypdf import PdfReader
7
  from dotenv import load_dotenv
8
 
9
+
10
+ # ---------------------------------------
11
+ # Load API Keys
12
+ # ---------------------------------------
13
  load_dotenv()
14
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
15
  ELEVEN_API_KEY = os.getenv("ELEVEN_API_KEY")
16
 
 
17
  if GEMINI_API_KEY:
18
  genai.configure(api_key=GEMINI_API_KEY)
 
 
19
 
20
+ client = ElevenLabs(api_key=ELEVEN_API_KEY) if ELEVEN_API_KEY else None
21
+
22
+
23
+ # ---------------------------------------
24
+ # Podcast State
25
+ # ---------------------------------------
26
  class PodcastState:
27
  def __init__(self):
28
  self.script = []
29
+ self.index = 0
30
+ self.persona = ""
31
  self.full_text = ""
32
 
33
  state = PodcastState()
34
 
 
35
 
36
+ # ---------------------------------------
37
+ # Helpers
38
+ # ---------------------------------------
39
+ def extract_text_from_pdf(file):
40
  try:
41
+ reader = PdfReader(file)
42
  text = ""
 
43
  for page in reader.pages[:5]:
44
  text += page.extract_text() + "\n"
45
  return text
46
  except Exception as e:
47
  return f"Error reading PDF: {e}"
48
 
49
+
50
+ def generate_script(pdf_file, persona):
51
  if not pdf_file:
52
+ return "⚠️ Upload a PDF first.", []
 
 
 
53
 
 
54
  pdf_text = extract_text_from_pdf(pdf_file)
55
  state.full_text = pdf_text
56
+ state.persona = persona
57
+
58
+ model = genai.GenerativeModel("gemini-2.0-flash")
59
+
60
+ persona_prompts = {
61
+ "Serious Academic": "Two intelligent professors discussing the paper.",
62
+ "Gossip Columnist": "Two gossip hosts reacting dramatically.",
 
63
  }
64
+
65
  system_prompt = f"""
66
+ {persona_prompts.get(persona)}
67
+
68
+ Based on the text:
69
+ "{pdf_text[:4000]}"
70
+
71
+ Produce raw JSON only:
 
72
  [
73
  {{"speaker": "Host A", "text": "..."}},
74
  {{"speaker": "Host B", "text": "..."}}
75
  ]
76
  """
77
+
78
  try:
79
+ res = model.generate_content(system_prompt)
80
+ clean = res.text.replace("```json", "").replace("```", "")
81
+ script = json.loads(clean)
82
+
83
+ state.script = script
84
+ state.index = 0
85
+
86
+ return "✅ Script generated!", script
87
+
88
  except Exception as e:
89
+ return f"Error: {e}", []
90
+
91
+
92
+ def play_next():
93
+ if state.index >= len(state.script):
94
+ return None, "🎉 Podcast complete."
95
+
96
+ line = state.script[state.index]
97
+ text = line["text"]
98
+
99
+ # choose voice
100
+ voice = "nPczCjz82tPNOwVbpGE2" # default
101
+ if state.persona == "Gossip Columnist":
102
+ voice = "EXAVITQu4vr4xnSDxMaL" if line["speaker"] == "Host B" else voice
103
+
104
+ audio_path = f"audio_{state.index}.mp3"
105
 
106
  try:
107
+ stream = client.generate(text=text, voice=voice, model="eleven_monolingual_v1")
108
+ with open(audio_path, "wb") as f:
109
+ for chunk in stream:
 
 
 
 
 
 
110
  f.write(chunk)
111
+
112
+ state.index += 1
113
+ return audio_path, f"🎙️ {line['speaker']}: {text}"
114
+
115
  except Exception as e:
116
+ return None, f"Audio error: {e}"
117
+
118
 
119
+ def interrupt(question):
120
  if not state.full_text:
121
  return None, "Upload a PDF first."
122
 
123
+ model = genai.GenerativeModel("gemini-2.0-flash")
124
+
125
  prompt = f"""
126
+ You are a podcast host ({state.persona}).
127
  Context: {state.full_text[:1000]}
128
+ User asked: "{question}"
129
+
130
+ 1. Answer the question.
131
  2. Say "Anyway, back to the paper..."
132
  """
133
+
134
+ ans = model.generate_content(prompt).text
135
+
136
+ audio_path = "interrupt.mp3"
137
+ stream = client.generate(text=ans, voice="nPczCjz82tPNOwVbpGE2", model="eleven_monolingual_v1")
138
+ with open(audio_path, "wb") as f:
139
+ for chunk in stream:
 
 
 
 
 
 
140
  f.write(chunk)
141
+
142
+ return audio_path, ans
143
+
144
+
145
+ # ---------------------------------------
146
+ # GRADIO 6 APP
147
+ # ---------------------------------------
148
+ with gr.App() as app:
149
+
150
+ gr.Markdown("# 🎧 PodQuery (Gradio 6 Edition)")
151
+
152
+ pdf_input = gr.File(label="Upload PDF")
153
+ persona = gr.Dropdown(
154
+ ["Serious Academic", "Gossip Columnist"],
155
+ value="Serious Academic",
156
+ label="Persona"
157
+ )
158
+ btn_gen = gr.Button("Generate Script")
159
+ status = gr.Textbox(label="Status")
160
+ script_box = gr.JSON(label="Generated Script")
161
+
162
+ player = gr.Audio(label="Audio", autoplay=True)
163
+ transcript = gr.Textbox(label="Transcript")
164
+
165
+ btn_play = gr.Button("▶️ Play Next Line")
166
+
167
+ q_input = gr.Textbox(label="Interrupt question")
168
+ btn_interrupt = gr.Button("✋ Interrupt Podcast")
169
+
170
+ # wiring events
171
+ btn_gen.subscribe(generate_script, [pdf_input, persona], [status, script_box])
172
+ btn_play.subscribe(play_next, [], [player, transcript])
173
+ btn_interrupt.subscribe(interrupt, [q_input], [player, transcript])
174
+
175
+
176
+ app.launch()