ChatBotsTA commited on
Commit
d93d7bb
Β·
verified Β·
1 Parent(s): 6401b0a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +138 -267
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # app.py
2
  import os
3
  import re
4
  import tempfile
@@ -24,33 +24,15 @@ HUGGINGFACE_KEY = os.getenv("HUGGINGFACE_API_KEY", st.secrets.get("HUGGINGFACE_A
24
  HF_MERMAID_MODEL = os.getenv("HF_MERMAID_MODEL", "TroyDoesAI/MermaidStable3B")
25
 
26
  # ============ HELPERS ============
27
- # (rest of the helper functions from your original code are here, unchanged)
28
  def clean_text(text: str) -> str:
29
  return re.sub(r"\s+", " ", text or "").strip()
30
 
31
  def extract_text_from_pdf(uploaded_file) -> str:
32
  reader = PdfReader(uploaded_file)
33
- parts = []
34
- for page in reader.pages:
35
- t = page.extract_text()
36
- if t:
37
- parts.append(t)
38
  return clean_text(" ".join(parts))
39
 
40
- def chunk_text_by_chars(text: str, chunk_size: int = 3000, overlap: int = 200) -> List[str]:
41
- if not text:
42
- return []
43
- chunks = []
44
- start = 0
45
- while start < len(text):
46
- end = start + chunk_size
47
- chunks.append(text[start:end])
48
- start = max(end - overlap, end)
49
- return chunks
50
-
51
- # ============ OpenRouter wrapper (safe) ============
52
- def openrouter_chat(messages: List[dict], model: str = OPENROUTER_MODEL, max_tokens: int = 800, temperature: float = 0.2):
53
- """Return tuple (success: bool, text_or_error: str)."""
54
  if not OPENROUTER_KEY:
55
  return False, "OPENROUTER_API_KEY not set"
56
  url = "https://api.openrouter.ai/v1/chat/completions"
@@ -60,294 +42,183 @@ def openrouter_chat(messages: List[dict], model: str = OPENROUTER_MODEL, max_tok
60
  resp = requests.post(url, json=payload, headers=headers, timeout=30)
61
  resp.raise_for_status()
62
  data = resp.json()
63
- choices = data.get("choices", [])
64
- if choices:
65
- c = choices[0]
66
- if "message" in c and isinstance(c["message"], dict):
67
- content = c["message"].get("content")
68
- if isinstance(content, dict) and "content" in content:
69
- return True, content["content"]
70
- elif isinstance(content, str):
71
- return True, content
72
- if "text" in c:
73
- return True, c["text"]
74
- if "text" in data:
75
- return True, data["text"]
76
- return False, "OpenRouter responded with unexpected shape"
77
  except Exception as e:
78
- return False, f"OpenRouter request failed: {repr(e)}"
79
 
80
- # ============ Local extractive summarizer (offline) ============
81
  def extractive_summary(text: str, num_sentences: int = 6) -> str:
82
  if not text:
83
  return ""
84
  sentences = re.split(r'(?<=[.!?])\s+', text)
85
  words = re.findall(r'\w+', text.lower())
86
- stopwords = set(["the","and","is","in","to","of","a","that","it","for","on","with","as","are","was","be","by","an","or"])
87
- freq = {}
88
- for w in words:
89
- if w in stopwords or len(w) < 2:
90
- continue
91
- freq[w] = freq.get(w, 0) + 1
92
- if not freq:
93
- return "Unable to summarize (text too short)."
94
- sent_scores = []
95
- for s in sentences:
96
- s_words = re.findall(r'\w+', s.lower())
97
- score = sum(freq.get(w, 0) for w in s_words)
98
- sent_scores.append((score, s))
99
- sent_scores.sort(reverse=True, key=lambda x: x[0])
100
- chosen = [s for _, s in sent_scores[:num_sentences]]
101
- chosen_sorted = sorted(chosen, key=lambda s: text.find(s))
102
- bullets = "\n".join(f"- {clean_text(s)}" for s in chosen_sorted if s.strip())
103
- return bullets if bullets else clean_text(" ".join(chosen_sorted))
104
-
105
- # ============ ElevenLabs TTS (remote) ============
106
- def eleven_tts_bytes(text: str, voice_id: str = "pnCWbS8Aqipqqr5wzjuy"):
107
  if not ELEVEN_API_KEY:
108
  return False, "ELEVEN_API_KEY not set"
109
- url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
110
  headers = {"xi-api-key": ELEVEN_API_KEY, "Accept": "audio/mpeg", "Content-Type": "application/json"}
111
- data = {"text": text, "model_id": "eleven_multilingual_v2", "voice_settings": {"stability": 0.5, "similarity_boost": 0.5}}
112
  try:
113
  r = requests.post(url, json=data, headers=headers, timeout=30)
114
- if r.ok:
115
- return True, r.content
116
- return False, f"ElevenLabs returned {r.status_code}: {r.text[:300]}"
117
  except Exception as e:
118
- return False, f"ElevenLabs request failed: {repr(e)}"
119
 
120
- # ============ Local TTS fallback (pyttsx3) ============
121
  def pyttsx3_tts_file(text: str):
122
  if not HAS_PYTTSX3:
123
  return False, "pyttsx3 not installed"
124
  try:
125
  engine = pyttsx3.init()
126
- tf = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
127
- tf_name = tf.name
128
- tf.close()
129
- engine.save_to_file(text, tf_name)
130
  engine.runAndWait()
131
- with open(tf_name, "rb") as f:
132
- b = f.read()
133
- return True, b
134
  except Exception as e:
135
- return False, f"pyttsx3 TTS failed: {repr(e)}"
136
-
137
- # ============ Hugging Face mermaid (optional) ============
138
- def call_hf_mermaid(prompt: str, model: str = HF_MERMAID_MODEL):
139
- if not HUGGINGFACE_KEY:
140
- return False, "HUGGINGFACE_API_KEY not set"
141
- url = f"https://api-inference.huggingface.co/models/{model}"
142
- headers = {"Authorization": f"Bearer {HUGGINGFACE_KEY}", "Accept": "application/json"}
143
- payload = {"inputs": prompt, "parameters": {"max_new_tokens": 512, "temperature": 0.2}}
144
- try:
145
- r = requests.post(url, headers=headers, json=payload, timeout=40)
146
- if not r.ok:
147
- return False, f"HuggingFace returned {r.status_code}: {r.text[:300]}"
148
- j = r.json()
149
- if isinstance(j, list) and len(j) > 0 and isinstance(j[0], dict) and "generated_text" in j[0]:
150
- return True, j[0]["generated_text"]
151
- if isinstance(j, str):
152
- return True, j
153
- if isinstance(j, dict):
154
- for k in ("generated_text", "output", "text"):
155
- if k in j:
156
- return True, j[k]
157
- return False, "HF: unexpected response shape"
158
- except Exception as e:
159
- return False, f"HuggingFace request failed: {repr(e)}"
160
 
161
  def generate_mermaid_from_summary(summary: str):
162
- prompt = ("Given the following concise summary, produce a Mermaid flowchart (use 'flowchart TD' syntax). "
163
- "Output only the Mermaid code block. Summary:\n\n" + summary)
164
- ok, hf_out = call_hf_mermaid(prompt)
165
- if ok:
166
- m = re.search(r"```(?:mermaid)?\n([\s\S]+?)```", hf_out, re.IGNORECASE)
167
- return hf_out if m is None else m.group(1).strip()
168
- lines = re.split(r"\n+|-{1,}\s*|β€’\s*", summary)
169
- nodes = [clean_text(l) for l in lines if clean_text(l)]
170
- nodes = nodes[:8]
 
 
 
 
 
 
 
 
 
171
  if not nodes:
172
- nodes = ["Summary empty"]
173
- mermaid = "flowchart TD\n"
174
- for i, n in enumerate(nodes):
175
- node_text = n.replace('"', "'")[:80]
176
- mermaid += f' A{i}["{node_text}"]\n'
177
- for i in range(len(nodes) - 1):
178
- mermaid += f" A{i} --> A{i+1}\n"
179
- return mermaid
180
-
181
- def render_mermaid(mermaid_code: str, height: int = 420):
182
- html = f"""
183
- <div id="mermaid-target">
184
- <pre class="mermaid">
185
- {mermaid_code}
186
- </pre>
187
  </div>
188
  <script src="https://cdn.jsdelivr.net/npm/mermaid@10/dist/mermaid.min.js"></script>
189
- <script>
190
- mermaid.initialize({{startOnLoad:true}});
191
- </script>
192
  """
193
- st.components.v1.html(html, height=height, scrolling=True)
194
 
195
  # ============ STREAMLIT UI ============
196
- st.set_page_config(page_title="PDF Q&A resilient", layout="wide")
197
- st.title("πŸ“„ PDF Q&A β€” resilient (OpenRouter β†’ local fallback)")
198
-
199
- # Session state initialization
200
- if 'text_data' not in st.session_state:
201
- st.session_state.text_data = None
202
- if 'summary' not in st.session_state:
203
- st.session_state.summary = None
204
- if 'mermaid' not in st.session_state:
205
- st.session_state.mermaid = None
206
- if 'diagnostics' not in st.session_state:
207
- st.session_state.diagnostics = {"summary_source": None, "mermaid_source": None, "tts_source": None}
208
-
209
- def process_pdf():
210
- uploaded_file = st.session_state.uploaded_file
211
- if uploaded_file:
212
- try:
213
- with st.spinner("Extracting text from PDF..."):
214
- raw_text = extract_text_from_pdf(uploaded_file)
215
- st.session_state.text_data = raw_text
216
- st.success(f"Extracted {len(raw_text)} characters")
217
- except Exception as e:
218
- st.error(f"PDF extraction failed: {e}")
219
- st.session_state.text_data = None
220
- else:
221
- st.session_state.text_data = None
222
-
223
- def generate_outputs():
224
- raw_text = st.session_state.text_data
225
- if not raw_text:
226
- st.error("No text available to process. Please upload a PDF.")
227
- return
228
-
229
- used = st.session_state.diagnostics
230
-
231
- # Summarize logic
232
- with st.spinner("Trying OpenRouter summarization..."):
233
- prompt = f"Summarize the following text in 5-8 concise bullets:\n\n{raw_text[:15000]}"
234
- messages = [{"role": "system", "content": "You are a concise summarizer."},
235
- {"role": "user", "content": prompt}]
236
- ok, out = openrouter_chat(messages, max_tokens=400, model=OPENROUTER_MODEL)
237
- if ok:
238
- used["summary_source"] = "openrouter"
239
- summary = out
240
  else:
241
- used["summary_source"] = f"fallback_local (reason: {out})"
242
- summary = extractive_summary(raw_text, num_sentences=6)
243
- st.session_state.summary = summary
244
- st.session_state.diagnostics = used
245
 
246
- # Mermaid logic
247
- with st.spinner("Generating Mermaid diagram (HF β†’ local fallback)..."):
248
- mermaid = generate_mermaid_from_summary(summary)
249
- used["mermaid_source"] = "huggingface" if HUGGINGFACE_KEY and mermaid.strip().startswith(("flowchart","graph")) else "local"
250
- st.session_state.mermaid = mermaid
251
- st.session_state.diagnostics = used
252
-
253
- st.success("Summary and Diagram generated!")
 
 
 
 
 
254
 
255
- # UI layout
256
- c1, c2, c3 = st.columns(3)
257
- with c1:
258
- st.write("OpenRouter:")
259
- st.success("Key present") if OPENROUTER_KEY else st.error("Key missing β€” will use local summarizer/Q&A fallback")
260
- with c2:
261
- st.write("Hugging Face:")
262
- st.success("Key present (optional)") if HUGGINGFACE_KEY else st.info("Key missing β€” using local Mermaid fallback")
263
- with c3:
264
- st.write("Audio:")
265
- if ELEVEN_API_KEY:
266
- st.success("ElevenLabs key present (preferred)")
267
- elif HAS_PYTTSX3:
268
- st.info("Using local pyttsx3 fallback TTS")
269
- else:
270
- st.info("No ElevenLabs key and pyttsx3 not available")
271
-
272
- st.file_uploader("Upload a PDF", type=["pdf"], key='uploaded_file', on_change=process_pdf)
273
 
274
- if st.session_state.text_data:
275
- st.button("Summarize & Diagram", on_click=generate_outputs)
276
-
277
- if st.session_state.summary:
278
- st.subheader("πŸ“Œ Summary")
279
- st.write(st.session_state.summary)
280
- st.markdown(f"**Summary source:** {st.session_state.diagnostics['summary_source']}")
281
-
282
- st.subheader("πŸ—ΊοΈ Summary Diagram")
283
- render_mermaid(st.session_state.mermaid, height=460)
284
- st.code(st.session_state.mermaid, language="mermaid")
285
- st.markdown(f"**Mermaid source:** {st.session_state.diagnostics['mermaid_source']}")
286
-
287
- st.write("### TTS Audio")
288
- if st.checkbox("Generate audio for summary"):
289
- with st.spinner("Attempting TTS..."):
290
  audio_bytes = None
291
- if ELEVEN_API_KEY:
 
292
  ok, out = eleven_tts_bytes(st.session_state.summary)
293
  if ok:
294
- st.session_state.diagnostics["tts_source"] = "elevenlabs"
295
  audio_bytes = out
 
 
 
 
 
 
296
  else:
297
- st.session_state.diagnostics["tts_source"] = f"elevenlabs_failed ({out})"
298
- if HAS_PYTTSX3:
299
- ok2, out2 = pyttsx3_tts_file(st.session_state.summary)
300
- if ok2:
301
- st.session_state.diagnostics["tts_source"] = "pyttsx3"
302
- audio_bytes = out2
303
- elif HAS_PYTTSX3:
304
- ok2, out2 = pyttsx3_tts_file(st.session_state.summary)
305
- if ok2:
306
- st.session_state.diagnostics["tts_source"] = "pyttsx3"
307
- audio_bytes = out2
308
 
309
  if audio_bytes:
310
- if st.session_state.diagnostics["tts_source"] == "elevenlabs":
311
- st.audio(audio_bytes, format="audio/mp3")
312
- else:
313
- st.audio(audio_bytes, format="audio/wav")
314
  else:
315
- st.error("Audio generation failed. Check your API keys and local setup.")
316
- st.markdown(f"**TTS source:** {st.session_state.diagnostics['tts_source']}")
317
-
318
- st.write("### Diagnostics")
319
- st.json(st.session_state.diagnostics)
320
 
321
  st.markdown("---")
322
- st.subheader("❓ Q&A")
323
- query = st.text_input("Ask a question about the PDF:")
324
- if query:
325
- with st.spinner("Processing your question..."):
326
- if OPENROUTER_KEY:
327
- prompt = f"Context:\n{st.session_state.text_data[:15000]}\n\nQuestion: {query}\nAnswer concisely."
328
- messages = [{"role": "system", "content": "You are a helpful assistant."},
329
- {"role": "user", "content": prompt}]
330
- ok, out = openrouter_chat(messages, max_tokens=600, model=OPENROUTER_MODEL)
 
 
 
 
 
 
 
 
331
  if ok:
332
- st.info("Answer from OpenRouter:")
333
- st.write(out)
334
- else:
335
- st.warning(f"OpenRouter failed: {out}\nFalling back to local Q&A.")
336
- sentences = re.split(r'(?<=[.!?])\s+', st.session_state.text_data)
337
- q_words = re.findall(r'\w+', query.lower())[:3]
338
- matches = [s for s in sentences if all(w in s.lower() for w in q_words)]
339
- if matches:
340
- st.info("Answer from local fallback:")
341
- st.write(matches[:3])
342
- else:
343
- st.info("No good local match found.")
344
- else:
345
- st.info("OpenRouter key missing. Using local Q&A fallback.")
346
- sentences = re.split(r'(?<=[.!?])\s+', st.session_state.text_data)
347
- q_words = re.findall(r'\w+', query.lower())[:3]
348
- matches = [s for s in sentences if all(w in s.lower() for w in q_words)]
349
- if matches:
350
- st.info("Answer from local fallback:")
351
- st.write(matches[:3])
352
  else:
353
- st.info("No good local match found.")
 
 
 
 
 
 
 
1
+ # app.py - Corrected and Simplified
2
  import os
3
  import re
4
  import tempfile
 
24
  HF_MERMAID_MODEL = os.getenv("HF_MERMAID_MODEL", "TroyDoesAI/MermaidStable3B")
25
 
26
  # ============ HELPERS ============
 
27
  def clean_text(text: str) -> str:
28
  return re.sub(r"\s+", " ", text or "").strip()
29
 
30
  def extract_text_from_pdf(uploaded_file) -> str:
31
  reader = PdfReader(uploaded_file)
32
+ parts = [page.extract_text() for page in reader.pages if page.extract_text()]
 
 
 
 
33
  return clean_text(" ".join(parts))
34
 
35
+ def openrouter_chat(messages: List[dict], model: str, max_tokens: int, temperature: float):
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  if not OPENROUTER_KEY:
37
  return False, "OPENROUTER_API_KEY not set"
38
  url = "https://api.openrouter.ai/v1/chat/completions"
 
42
  resp = requests.post(url, json=payload, headers=headers, timeout=30)
43
  resp.raise_for_status()
44
  data = resp.json()
45
+ content = data['choices'][0]['message']['content']
46
+ return True, content
 
 
 
 
 
 
 
 
 
 
 
 
47
  except Exception as e:
48
+ return False, f"OpenRouter request failed: {e}"
49
 
 
50
  def extractive_summary(text: str, num_sentences: int = 6) -> str:
51
  if not text:
52
  return ""
53
  sentences = re.split(r'(?<=[.!?])\s+', text)
54
  words = re.findall(r'\w+', text.lower())
55
+ stopwords = set(["the", "and", "is", "in", "to", "of", "a", "that", "it", "for"])
56
+ freq = {w: words.count(w) for w in words if w not in stopwords and len(w) > 1}
57
+ sent_scores = [(sum(freq.get(w, 0) for w in re.findall(r'\w+', s.lower())), s) for s in sentences]
58
+ sent_scores.sort(reverse=True)
59
+ chosen_sentences = sorted([s for _, s in sent_scores[:num_sentences]], key=text.find)
60
+ return "\n".join(f"- {clean_text(s)}" for s in chosen_sentences if s.strip())
61
+
62
+ def eleven_tts_bytes(text: str):
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  if not ELEVEN_API_KEY:
64
  return False, "ELEVEN_API_KEY not set"
65
+ url = "https://api.elevenlabs.io/v1/text-to-speech/pnCWbS8Aqipqqr5wzjuy"
66
  headers = {"xi-api-key": ELEVEN_API_KEY, "Accept": "audio/mpeg", "Content-Type": "application/json"}
67
+ data = {"text": text, "model_id": "eleven_multilingual_v2"}
68
  try:
69
  r = requests.post(url, json=data, headers=headers, timeout=30)
70
+ r.raise_for_status()
71
+ return True, r.content
 
72
  except Exception as e:
73
+ return False, f"ElevenLabs request failed: {e}"
74
 
 
75
  def pyttsx3_tts_file(text: str):
76
  if not HAS_PYTTSX3:
77
  return False, "pyttsx3 not installed"
78
  try:
79
  engine = pyttsx3.init()
80
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
81
+ engine.save_to_file(text, temp_file.name)
 
 
82
  engine.runAndWait()
83
+ with open(temp_file.name, "rb") as f:
84
+ return True, f.read()
 
85
  except Exception as e:
86
+ return False, f"pyttsx3 TTS failed: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
  def generate_mermaid_from_summary(summary: str):
89
+ prompt = ("Create a concise Mermaid flowchart ('flowchart TD') from the following summary. "
90
+ "Output only the Mermaid code block. Summary:\n" + summary)
91
+ if HUGGINGFACE_KEY:
92
+ url = f"https://api-inference.huggingface.co/models/{HF_MERMAID_MODEL}"
93
+ headers = {"Authorization": f"Bearer {HUGGINGFACE_KEY}"}
94
+ payload = {"inputs": prompt, "parameters": {"max_new_tokens": 512}}
95
+ try:
96
+ response = requests.post(url, headers=headers, json=payload, timeout=40)
97
+ if response.ok and response.json():
98
+ text = response.json()[0]['generated_text']
99
+ match = re.search(r"```(?:mermaid)?\n([\s\S]+?)```", text)
100
+ if match:
101
+ return match.group(1).strip()
102
+ except Exception:
103
+ pass # Fallback to local
104
+
105
+ # Local fallback logic
106
+ nodes = [re.sub(r'^- ', '', line).strip() for line in summary.split('\n') if line.strip()]
107
  if not nodes:
108
+ return "graph TD\n A[Summary Empty]"
109
+ mermaid_code = "graph TD\n"
110
+ for i, node_text in enumerate(nodes[:8]):
111
+ mermaid_code += f' A{i}["{node_text.replace('"', "'")[:60]}"]\n'
112
+ for i in range(len(nodes[:8]) - 1):
113
+ mermaid_code += f" A{i} --> A{i+1}\n"
114
+ return mermaid_code
115
+
116
+ def render_mermaid(mermaid_code: str):
117
+ html_code = f"""
118
+ <div class="mermaid">
119
+ {mermaid_code}
 
 
 
120
  </div>
121
  <script src="https://cdn.jsdelivr.net/npm/mermaid@10/dist/mermaid.min.js"></script>
122
+ <style>
123
+ .mermaid-container {{ height: 420px; border: 1px solid #ddd; padding: 10px; border-radius: 8px; }}
124
+ </style>
125
  """
126
+ st.components.v1.html(html_code, height=450, scrolling=True)
127
 
128
  # ============ STREAMLIT UI ============
129
+ st.set_page_config(page_title="PDF Assistant", layout="wide")
130
+ st.title("πŸ“„ PDF Assistant: Summary, Diagram, Q&A")
131
+ st.markdown("---")
132
+
133
+ st.session_state.setdefault('raw_text', None)
134
+ st.session_state.setdefault('summary', None)
135
+ st.session_state.setdefault('mermaid_code', None)
136
+ st.session_state.setdefault('chat_history', [])
137
+
138
+ with st.sidebar:
139
+ st.header("πŸ”‘ API Status")
140
+ st.markdown(f"**OpenRouter:** {'βœ… Key present' if OPENROUTER_KEY else '❌ Key missing. Q&A will be local.'}")
141
+ st.markdown(f"**Hugging Face:** {'βœ… Key present' if HUGGINGFACE_KEY else '❌ Key missing. Diagram will be local.'}")
142
+ st.markdown(f"**ElevenLabs:** {'βœ… Key present' if ELEVEN_API_KEY else '❌ Key missing. TTS will be local.'}")
143
+ if not HAS_PYTTSX3:
144
+ st.warning("pyttsx3 not installed. Local audio disabled.")
145
+
146
+ uploaded_file = st.file_uploader("1. Upload a PDF", type=["pdf"])
147
+ if uploaded_file and st.session_state.raw_text is None:
148
+ with st.spinner("Extracting text..."):
149
+ st.session_state.raw_text = extract_text_from_pdf(uploaded_file)
150
+ if st.session_state.raw_text:
151
+ st.success("Text extracted successfully!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  else:
153
+ st.warning("No text extracted from PDF. Is it a scanned image?")
 
 
 
154
 
155
+ if st.session_state.raw_text:
156
+ st.markdown("---")
157
+ if st.button("2. Generate Summary & Diagram"):
158
+ with st.spinner("Generating summary and diagram..."):
159
+ # Generate Summary
160
+ prompt = f"Summarize the text in 5-8 concise bullet points:\n\n{st.session_state.raw_text[:15000]}"
161
+ ok, out = openrouter_chat([{"role": "user", "content": prompt}], OPENROUTER_MODEL, 400, 0.2)
162
+ st.session_state.summary = out if ok else extractive_summary(st.session_state.raw_text)
163
+ st.session_state.mermaid_code = generate_mermaid_from_summary(st.session_state.summary)
164
+
165
+ if st.session_state.summary:
166
+ st.header("πŸ“Œ Summary")
167
+ st.markdown(st.session_state.summary)
168
 
169
+ st.header("πŸ—ΊοΈ Diagram")
170
+ render_mermaid(st.session_state.mermaid_code)
171
+ st.code(st.session_state.mermaid_code, language="mermaid")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
 
173
+ st.header("πŸ”Š Audio")
174
+ if st.button("Generate Audio"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  audio_bytes = None
176
+ audio_source = "None"
177
+ with st.spinner("Generating audio..."):
178
  ok, out = eleven_tts_bytes(st.session_state.summary)
179
  if ok:
 
180
  audio_bytes = out
181
+ audio_source = "ElevenLabs"
182
+ elif HAS_PYTTSX3:
183
+ ok2, out2 = pyttsx3_tts_file(st.session_state.summary)
184
+ if ok2:
185
+ audio_bytes = out2
186
+ audio_source = "pyttsx3"
187
  else:
188
+ st.error("Audio generation failed: No API key and pyttsx3 not available.")
 
 
 
 
 
 
 
 
 
 
189
 
190
  if audio_bytes:
191
+ st.audio(audio_bytes, format="audio/mpeg" if audio_source == "ElevenLabs" else "audio/wav")
192
+ st.info(f"Audio generated using: **{audio_source}**")
 
 
193
  else:
194
+ st.error("Audio generation failed. Check your API key and local setup.")
 
 
 
 
195
 
196
  st.markdown("---")
197
+ st.header("πŸ’¬ Q&A Chatbot")
198
+ for chat_message in st.session_state.chat_history:
199
+ role, content = chat_message
200
+ with st.chat_message(role):
201
+ st.markdown(content)
202
+
203
+ prompt = st.chat_input("Ask a question about the PDF")
204
+ if prompt:
205
+ st.session_state.chat_history.append(("user", prompt))
206
+ with st.chat_message("user"):
207
+ st.markdown(prompt)
208
+
209
+ with st.chat_message("assistant"):
210
+ with st.spinner("Thinking..."):
211
+ qa_prompt = f"Context:\n{st.session_state.raw_text[:15000]}\n\nQuestion: {prompt}\nAnswer concisely."
212
+ ok, out = openrouter_chat([{"role": "user", "content": qa_prompt}], OPENROUTER_MODEL, 600, 0.2)
213
+
214
  if ok:
215
+ st.markdown(out)
216
+ st.session_state.chat_history.append(("assistant", out))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
  else:
218
+ # Naive local fallback for Q&A
219
+ st.warning("OpenRouter failed. Using local fallback.")
220
+ sentences = re.split(r'(?<=[.!?])\s+', st.session_state.raw_text)
221
+ matches = [s for s in sentences if all(w in s.lower() for w in re.findall(r'\w+', prompt.lower())[:3])]
222
+ fallback_answer = " ".join(matches[:3]) if matches else "I couldn't find a relevant answer in the document."
223
+ st.markdown(fallback_answer)
224
+ st.session_state.chat_history.append(("assistant", fallback_answer))