Emeritus-21 commited on
Commit
198f8e5
·
verified ·
1 Parent(s): 1a78858

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -79
app.py CHANGED
@@ -6,18 +6,17 @@ import json
6
  from pypdf import PdfReader
7
  from dotenv import load_dotenv
8
 
9
- # 1. Load Keys
10
  load_dotenv()
11
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
12
  ELEVEN_API_KEY = os.getenv("ELEVEN_API_KEY")
13
 
14
- # 2. Configure APIs
15
  if GEMINI_API_KEY:
16
  genai.configure(api_key=GEMINI_API_KEY)
17
  if ELEVEN_API_KEY:
18
  client = ElevenLabs(api_key=ELEVEN_API_KEY)
19
 
20
- # 3. State Management
21
  class PodcastState:
22
  def __init__(self):
23
  self.script = []
@@ -27,35 +26,31 @@ class PodcastState:
27
 
28
  state = PodcastState()
29
 
30
- # =========================
31
- # PDF TEXT EXTRACTION
32
- # =========================
33
- def extract_text_from_pdf(pdf):
34
  try:
35
- reader = PdfReader(pdf)
36
  text = ""
37
  for page in reader.pages[:5]:
38
- text += page.extract_text() + "\n"
 
 
39
  return text
40
  except Exception as e:
41
  return f"Error reading PDF: {e}"
42
 
43
- # =========================
44
- # SCRIPT GENERATION
45
- # =========================
46
  def generate_script(pdf_file, persona_style):
47
  if not pdf_file:
48
- return "⚠️ Upload a PDF first.", None
49
 
50
  if not GEMINI_API_KEY or not ELEVEN_API_KEY:
51
- return "⚠️ Missing API Keys in HF Secrets.", None
52
 
53
  pdf_text = extract_text_from_pdf(pdf_file)
54
  state.full_text = pdf_text
55
  state.persona = persona_style
56
 
57
- model = genai.GenerativeModel("gemini-2.0-flash")
58
-
59
  prompts = {
60
  "Serious Academic": "Two professors discussing the paper. Tone: Intellectual.",
61
  "Gossip Columnist": "Two gossip hosts reacting dramatically.",
@@ -67,8 +62,8 @@ def generate_script(pdf_file, persona_style):
67
  Based on the paper:
68
  "{pdf_text[:4000]}..."
69
 
70
- Generate a 4-line dialogue script.
71
- RETURN JSON ONLY:
72
  [
73
  {{"speaker":"Host A","text":"..."}},
74
  {{"speaker":"Host B","text":"..."}}
@@ -76,25 +71,23 @@ def generate_script(pdf_file, persona_style):
76
  """
77
 
78
  try:
 
79
  response = model.generate_content(system_prompt)
80
  clean_json = response.text.replace("```json", "").replace("```", "").strip()
81
  script = json.loads(clean_json)
82
  state.script = script
83
  state.current_index = 0
84
- return "✅ Script ready.", script
85
  except Exception as e:
86
- return f"Error: {e}", None
87
 
88
- # =========================
89
- # PLAY NEXT LINE
90
- # =========================
91
  def play_next_chunk():
92
  if state.current_index >= len(state.script):
93
- return None, "🎉 Podcast complete."
94
 
95
  line = state.script[state.current_index]
96
-
97
- voice_id = "nPczCjz82tPNOwVbpGE2" # Default male
98
  if state.persona == "Gossip Columnist" and line["speaker"] == "Host B":
99
  voice_id = "21m00Tcm4TlvDq8ikWAM"
100
 
@@ -112,74 +105,66 @@ def play_next_chunk():
112
 
113
  state.current_index += 1
114
  return save_path, f"{line['speaker']}: {line['text']}"
115
-
116
  except Exception as e:
117
  return None, f"Audio error: {e}"
118
 
119
- # =========================
120
- # INTERRUPT + QUESTION
121
- # =========================
122
  def interrupt_and_ask(question):
123
  if not state.full_text:
124
- return None, "Upload PDF first."
125
 
126
  model = genai.GenerativeModel("gemini-2.0-flash")
127
-
128
  prompt = f"""
129
  Persona: {state.persona}
130
  Context: {state.full_text[:1000]}
131
- User asked: {question}
132
 
133
- Respond briefly, then say:
134
- "Anyway, back to the paper..."
135
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
 
137
- response = model.generate_content(prompt)
138
- answer = response.text
139
-
140
- audio_stream = client.generate(
141
- text=answer,
142
- voice="nPczCjz82tPNOwVbpGE2",
143
- model="eleven_monolingual_v1"
144
- )
145
-
146
- save_path = "interrupt.mp3"
147
- with open(save_path, "wb") as f:
148
- for chunk in audio_stream:
149
- f.write(chunk)
150
-
151
- return save_path, answer
152
-
153
- # =========================
154
- # GRADIO 6 UI
155
- # =========================
156
- with gr.App() as app:
157
-
158
  gr.Markdown("# 🎧 PodQuery — Research Paper Podcast Generator")
159
 
160
- pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
161
- persona = gr.Dropdown(
162
- ["Serious Academic", "Gossip Columnist"],
163
- value="Serious Academic",
164
- label="Persona Style"
165
- )
166
- btn_gen = gr.Button("Generate Podcast Script")
167
-
168
- status = gr.Textbox(label="Status")
169
- script_display = gr.JSON(label="Generated Script")
170
-
171
- player = gr.Audio(label="Audio Output", autoplay=True)
172
- transcript = gr.Textbox(label="Transcript")
173
-
174
- btn_play = gr.Button("▶️ Play Next Line")
175
-
176
- q_input = gr.Textbox(label="Ask a Question (Interrupt)")
177
- btn_interrupt = gr.Button("✋ Interrupt Podcast")
178
 
179
  # Bind events
180
- btn_gen.subscribe(generate_script, [pdf_input, persona], [status, script_display])
181
- btn_play.subscribe(play_next_chunk, [], [player, transcript])
182
- btn_interrupt.subscribe(interrupt_and_ask, [q_input], [player, transcript])
183
 
184
- # DO NOT CALL app.launch()
185
- # Hugging Face Spaces auto-launches Gradio 6 apps.
 
 
6
  from pypdf import PdfReader
7
  from dotenv import load_dotenv
8
 
9
+ # 1️⃣ Load API Keys
10
  load_dotenv()
11
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
12
  ELEVEN_API_KEY = os.getenv("ELEVEN_API_KEY")
13
 
 
14
  if GEMINI_API_KEY:
15
  genai.configure(api_key=GEMINI_API_KEY)
16
  if ELEVEN_API_KEY:
17
  client = ElevenLabs(api_key=ELEVEN_API_KEY)
18
 
19
+ # 2️⃣ Podcast State
20
  class PodcastState:
21
  def __init__(self):
22
  self.script = []
 
26
 
27
  state = PodcastState()
28
 
29
+ # 3️⃣ PDF Extraction
30
+ def extract_text_from_pdf(pdf_file):
 
 
31
  try:
32
+ reader = PdfReader(pdf_file)
33
  text = ""
34
  for page in reader.pages[:5]:
35
+ page_text = page.extract_text()
36
+ if page_text:
37
+ text += page_text + "\n"
38
  return text
39
  except Exception as e:
40
  return f"Error reading PDF: {e}"
41
 
42
+ # 4️⃣ Generate Script
 
 
43
  def generate_script(pdf_file, persona_style):
44
  if not pdf_file:
45
+ return "⚠️ Upload PDF first.", {}
46
 
47
  if not GEMINI_API_KEY or not ELEVEN_API_KEY:
48
+ return "⚠️ API Keys missing in HF Secrets!", {}
49
 
50
  pdf_text = extract_text_from_pdf(pdf_file)
51
  state.full_text = pdf_text
52
  state.persona = persona_style
53
 
 
 
54
  prompts = {
55
  "Serious Academic": "Two professors discussing the paper. Tone: Intellectual.",
56
  "Gossip Columnist": "Two gossip hosts reacting dramatically.",
 
62
  Based on the paper:
63
  "{pdf_text[:4000]}..."
64
 
65
+ Generate a short 4-turn dialogue script.
66
+ Return JSON ONLY:
67
  [
68
  {{"speaker":"Host A","text":"..."}},
69
  {{"speaker":"Host B","text":"..."}}
 
71
  """
72
 
73
  try:
74
+ model = genai.GenerativeModel("gemini-2.0-flash")
75
  response = model.generate_content(system_prompt)
76
  clean_json = response.text.replace("```json", "").replace("```", "").strip()
77
  script = json.loads(clean_json)
78
  state.script = script
79
  state.current_index = 0
80
+ return "✅ Script ready!", script
81
  except Exception as e:
82
+ return f"Error generating script: {e}", {}
83
 
84
+ # 5️⃣ Play next line
 
 
85
  def play_next_chunk():
86
  if state.current_index >= len(state.script):
87
+ return None, "🎉 Podcast finished."
88
 
89
  line = state.script[state.current_index]
90
+ voice_id = "nPczCjz82tPNOwVbpGE2"
 
91
  if state.persona == "Gossip Columnist" and line["speaker"] == "Host B":
92
  voice_id = "21m00Tcm4TlvDq8ikWAM"
93
 
 
105
 
106
  state.current_index += 1
107
  return save_path, f"{line['speaker']}: {line['text']}"
 
108
  except Exception as e:
109
  return None, f"Audio error: {e}"
110
 
111
+ # 6️⃣ Interrupt & Ask
 
 
112
  def interrupt_and_ask(question):
113
  if not state.full_text:
114
+ return None, "Upload a PDF first."
115
 
116
  model = genai.GenerativeModel("gemini-2.0-flash")
 
117
  prompt = f"""
118
  Persona: {state.persona}
119
  Context: {state.full_text[:1000]}
120
+ User Question: {question}
121
 
122
+ Answer briefly, then say "Anyway, back to the paper..."
 
123
  """
124
+ try:
125
+ response = model.generate_content(prompt)
126
+ answer = response.text
127
+ audio_stream = client.generate(
128
+ text=answer,
129
+ voice="nPczCjz82tPNOwVbpGE2",
130
+ model="eleven_monolingual_v1"
131
+ )
132
+ save_path = "interrupt.mp3"
133
+ with open(save_path, "wb") as f:
134
+ for chunk in audio_stream:
135
+ f.write(chunk)
136
+ return save_path, answer
137
+ except Exception as e:
138
+ return None, f"Error: {e}"
139
 
140
+ # 7️⃣ Build Gradio 5.7 UI
141
+ with gr.Blocks() as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  gr.Markdown("# 🎧 PodQuery — Research Paper Podcast Generator")
143
 
144
+ with gr.Row():
145
+ with gr.Column():
146
+ pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
147
+ persona = gr.Dropdown(
148
+ ["Serious Academic", "Gossip Columnist"],
149
+ value="Serious Academic",
150
+ label="Persona Style"
151
+ )
152
+ btn_gen = gr.Button("Generate Podcast Script")
153
+ status = gr.Textbox(label="Status")
154
+ script_display = gr.JSON(label="Generated Script")
155
+
156
+ with gr.Column():
157
+ player = gr.Audio(label="Audio Output", autoplay=True)
158
+ transcript = gr.Textbox(label="Transcript")
159
+ btn_play = gr.Button("▶️ Play Next Line")
160
+ q_input = gr.Textbox(label="Ask a Question")
161
+ btn_interrupt = gr.Button("✋ Interrupt Podcast")
162
 
163
  # Bind events
164
+ btn_gen.click(generate_script, [pdf_input, persona], [status, script_display])
165
+ btn_play.click(play_next_chunk, [], [player, transcript])
166
+ btn_interrupt.click(interrupt_and_ask, [q_input], [player, transcript])
167
 
168
+ # 8️⃣ Launch
169
+ if __name__ == "__main__":
170
+ demo.launch()