ChatBotsTA commited on
Commit
6db8576
Β·
verified Β·
1 Parent(s): cca24fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +224 -133
app.py CHANGED
@@ -1,193 +1,284 @@
1
- # streamlit_pdf_qa.py
2
  import os
 
 
 
3
  import streamlit as st
4
- import openai
5
  from PyPDF2 import PdfReader
6
- import requests
7
- import re
8
  from typing import List, Optional
9
 
10
  # ============ CONFIG =============
11
- openai.api_key = os.getenv("OPENAI_API_KEY")
 
12
  ELEVEN_API_KEY = os.getenv("ELEVEN_API_KEY")
13
- # optional: allow switching model by env or fallback
14
- OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini") # fallback to what's set
15
 
16
  # ============ HELPERS ============
17
  def clean_text(text: str) -> str:
18
- text = re.sub(r"\s+", " ", text)
19
- return text.strip()
20
 
21
- @st.cache_data(show_spinner=False)
22
  def extract_text_from_pdf(uploaded_file) -> str:
23
- """
24
- Extract all text from a PDF UploadFile (or file-like)
25
- """
26
  reader = PdfReader(uploaded_file)
27
- text_parts = []
28
  for page in reader.pages:
29
- page_text = page.extract_text()
30
- if page_text:
31
- text_parts.append(page_text)
32
- return clean_text(" ".join(text_parts))
33
 
34
  def chunk_text_by_chars(text: str, chunk_size: int = 3000, overlap: int = 200) -> List[str]:
35
- """
36
- Chunk text by character length. Overlap helps keep context across chunks.
37
- """
38
  chunks = []
39
  start = 0
40
- text_len = len(text)
41
- while start < text_len:
42
  end = start + chunk_size
43
  chunks.append(text[start:end])
44
  start = max(end - overlap, end)
45
  return chunks
46
 
47
- def call_openai_chat(messages: list, max_tokens: int = 1000, temperature: float = 0.2) -> str:
48
- if not openai.api_key:
49
- raise RuntimeError("OPENAI_API_KEY not set")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  try:
51
- response = openai.ChatCompletion.create(
52
- model=OPENAI_MODEL,
53
- messages=messages,
54
- max_tokens=max_tokens,
55
- temperature=temperature,
56
- )
57
- # robust extraction of content
58
- content = None
59
- if response and "choices" in response and len(response["choices"]) > 0:
60
- choice = response["choices"][0]
61
- # choice may contain 'message' dict
62
- if "message" in choice and "content" in choice["message"]:
63
- content = choice["message"]["content"]
64
- # fallback
65
- elif "text" in choice:
66
- content = choice["text"]
67
- return content or ""
68
  except Exception as e:
69
- # raise the exception upward so UI can show it
70
- raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
- def ask_gpt(question: str, context: str) -> str:
73
- prompt = f"Context:\n{context}\n\nQuestion: {question}\nAnswer:"
74
  messages = [
75
- {"role": "system", "content": "You are a helpful assistant."},
76
  {"role": "user", "content": prompt},
77
  ]
78
- return call_openai_chat(messages, max_tokens=600)
79
 
80
- def summarize_text(text: str) -> str:
81
- prompt = f"Summarize the following text in a clear, concise way:\n\n{text}"
82
  messages = [
83
- {"role": "system", "content": "You are a concise summarizer."},
84
  {"role": "user", "content": prompt},
85
  ]
86
- return call_openai_chat(messages, max_tokens=400)
87
 
 
88
  def text_to_speech_eleven(text: str, voice_id: str = "pnCWbS8Aqipqqr5wzjuy") -> Optional[bytes]:
89
- """
90
- Send text to ElevenLabs text-to-speech API.
91
- Returns raw audio bytes or None on failure.
92
- """
93
  if not ELEVEN_API_KEY:
94
  return None
95
  url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
96
- headers = {
97
- "Accept": "audio/mpeg",
98
- "xi-api-key": ELEVEN_API_KEY,
99
- "Content-Type": "application/json"
100
- }
101
- data = {
102
- "text": text,
103
- "model_id": "eleven_multilingual_v2",
104
- "voice_settings": {"stability": 0.5, "similarity_boost": 0.5}
105
- }
106
- try:
107
- resp = requests.post(url, json=data, headers=headers, timeout=30)
108
- if resp.ok:
109
- return resp.content
110
- else:
111
- st.warning(f"ElevenLabs TTS failed: {resp.status_code} {resp.text[:300]}")
112
- return None
113
- except Exception as e:
114
- st.warning(f"ElevenLabs TTS error: {e}")
115
  return None
116
 
117
- # ============ STREAMLIT APP ============
118
- st.set_page_config(page_title="PDF Q&A", layout="wide")
119
- st.title("πŸ“„ PDF Q&A with Summarization + Audio")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
- # show API key status
122
- col1, col2 = st.columns(2)
123
- with col1:
124
- if openai.api_key:
125
- st.success("OpenAI key detected βœ…")
126
  else:
127
- st.error("OPENAI_API_KEY is not set. Chat features will not work.")
128
- with col2:
 
 
 
 
 
129
  if ELEVEN_API_KEY:
130
  st.success("ElevenLabs key detected βœ…")
131
  else:
132
- st.info("ELEVEN_API_KEY not set. Audio playback will be disabled.")
133
 
134
- uploaded_file = st.file_uploader("Upload a PDF", type="pdf")
135
- if uploaded_file is not None:
136
  try:
137
- with st.spinner("Extracting text from PDF..."):
138
  raw_text = extract_text_from_pdf(uploaded_file)
139
  except Exception as e:
140
- st.error(f"Failed to extract PDF text: {e}")
141
  raw_text = ""
142
 
143
  if not raw_text:
144
- st.warning("No text was extracted from this PDF. It may be scanned images (OCR needed).")
145
  else:
146
- st.success("PDF loaded successfully βœ…")
147
- st.markdown(f"**Extracted text length:** {len(raw_text)} characters")
 
 
 
 
 
 
 
148
 
149
- # Summarize button
150
- if st.button("Summarize Document"):
151
- with st.spinner("Summarizing..."):
152
- try:
153
- # limit input size for summarization
154
- to_summarize = raw_text[:15000]
155
- summary = summarize_text(to_summarize)
156
- st.subheader("πŸ“Œ Summary")
157
- st.write(summary)
158
-
159
- audio_bytes = text_to_speech_eleven(summary)
160
- if audio_bytes:
161
- st.audio(audio_bytes, format="audio/mp3")
162
- elif ELEVEN_API_KEY is None:
163
- st.info("TTS skipped because ELEVEN_API_KEY is not set.")
164
- except Exception as e:
165
- st.error(f"Summarization failed: {e}")
166
 
167
- # Q&A textbox
168
- query = st.text_input("Ask a question about the PDF:")
169
  if query:
170
- with st.spinner("Thinking..."):
 
 
171
  try:
172
- chunks = chunk_text_by_chars(raw_text, chunk_size=3000, overlap=200)
173
- # keep a small number of chunks to control cost/time
174
- answers = []
175
- max_chunks = 3
176
- for i, c in enumerate(chunks[:max_chunks]):
177
- ans = ask_gpt(query, c)
178
- answers.append(ans)
179
- final_answer = "\n\n".join([a for a in answers if a])
180
- if not final_answer.strip():
181
- st.warning("No answer returned from the model.")
182
- else:
183
- st.subheader("πŸ’‘ Answer")
184
- st.write(final_answer)
185
-
186
- audio = text_to_speech_eleven(final_answer)
187
- if audio:
188
- st.audio(audio, format="audio/mp3")
189
- elif ELEVEN_API_KEY is None:
190
- st.info("TTS skipped because ELEVEN_API_KEY is not set.")
191
  except Exception as e:
192
  st.error(f"Q&A failed: {e}")
193
  else:
 
1
+ # app.py
2
  import os
3
+ import re
4
+ import json
5
+ import requests
6
  import streamlit as st
 
7
  from PyPDF2 import PdfReader
 
 
8
  from typing import List, Optional
9
 
10
  # ============ CONFIG =============
11
+ OPENROUTER_KEY = os.getenv("OPENROUTER_API_KEY")
12
+ OPENROUTER_MODEL = os.getenv("OPENROUTER_MODEL", "gpt-4o-mini") # change if you prefer
13
  ELEVEN_API_KEY = os.getenv("ELEVEN_API_KEY")
14
+ HUGGINGFACE_KEY = os.getenv("HUGGINGFACE_API_KEY") # optional: if set, we'll call a HF mermaid model
15
+ HF_MERMAID_MODEL = os.getenv("HF_MERMAID_MODEL", "TroyDoesAI/MermaidStable3B") # example community model
16
 
17
  # ============ HELPERS ============
18
  def clean_text(text: str) -> str:
19
+ return re.sub(r"\s+", " ", text or "").strip()
 
20
 
 
21
  def extract_text_from_pdf(uploaded_file) -> str:
 
 
 
22
  reader = PdfReader(uploaded_file)
23
+ parts = []
24
  for page in reader.pages:
25
+ t = page.extract_text()
26
+ if t:
27
+ parts.append(t)
28
+ return clean_text(" ".join(parts))
29
 
30
  def chunk_text_by_chars(text: str, chunk_size: int = 3000, overlap: int = 200) -> List[str]:
31
+ if not text:
32
+ return []
 
33
  chunks = []
34
  start = 0
35
+ while start < len(text):
 
36
  end = start + chunk_size
37
  chunks.append(text[start:end])
38
  start = max(end - overlap, end)
39
  return chunks
40
 
41
+ # ---------- OpenRouter chat (replacement for openai.ChatCompletion) ----------
42
+ def openrouter_chat(messages: List[dict], model: str = OPENROUTER_MODEL, max_tokens: int = 800, temperature: float = 0.2) -> str:
43
+ """
44
+ Send messages (OpenAI-style) to OpenRouter's chat completions endpoint.
45
+ Requires OPENROUTER_API_KEY in ENV.
46
+ """
47
+ if not OPENROUTER_KEY:
48
+ raise RuntimeError("OPENROUTER_API_KEY not set")
49
+
50
+ url = "https://api.openrouter.ai/v1/chat/completions"
51
+ headers = {"Authorization": f"Bearer {OPENROUTER_KEY}", "Content-Type": "application/json"}
52
+ payload = {
53
+ "model": model,
54
+ "messages": messages,
55
+ "max_tokens": max_tokens,
56
+ "temperature": temperature,
57
+ }
58
+ resp = requests.post(url, json=payload, headers=headers, timeout=60)
59
  try:
60
+ resp.raise_for_status()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  except Exception as e:
62
+ raise RuntimeError(f"OpenRouter API error: {resp.status_code} {resp.text}") from e
63
+
64
+ data = resp.json()
65
+ # robustly extract text
66
+ text = ""
67
+ try:
68
+ choices = data.get("choices", [])
69
+ if choices:
70
+ c = choices[0]
71
+ # OpenRouter returns similar shape to OpenAI
72
+ if "message" in c and "content" in c["message"]:
73
+ text = c["message"]["content"]
74
+ elif "text" in c:
75
+ text = c["text"]
76
+ except Exception:
77
+ text = ""
78
+ return text or ""
79
 
80
+ def ask_model_for_summary(text: str) -> str:
81
+ prompt = f"Summarize the following text clearly and concisely (bullet points, 5-8 bullets max):\n\n{text}"
82
  messages = [
83
+ {"role": "system", "content": "You are a concise summarizer."},
84
  {"role": "user", "content": prompt},
85
  ]
86
+ return openrouter_chat(messages, max_tokens=400)
87
 
88
+ def ask_model_question(question: str, context: str) -> str:
89
+ prompt = f"Context:\n{context}\n\nQuestion: {question}\nAnswer in a concise helpful way."
90
  messages = [
91
+ {"role": "system", "content": "You are a helpful assistant."},
92
  {"role": "user", "content": prompt},
93
  ]
94
+ return openrouter_chat(messages, max_tokens=600)
95
 
96
+ # ---------- ElevenLabs TTS ----------
97
  def text_to_speech_eleven(text: str, voice_id: str = "pnCWbS8Aqipqqr5wzjuy") -> Optional[bytes]:
 
 
 
 
98
  if not ELEVEN_API_KEY:
99
  return None
100
  url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
101
+ headers = {"xi-api-key": ELEVEN_API_KEY, "Accept": "audio/mpeg", "Content-Type": "application/json"}
102
+ data = {"text": text, "model_id": "eleven_multilingual_v2", "voice_settings": {"stability": 0.5, "similarity_boost": 0.5}}
103
+ r = requests.post(url, json=data, headers=headers, timeout=30)
104
+ if r.ok:
105
+ return r.content
106
+ else:
107
+ st.warning(f"ElevenLabs TTS failed: {r.status_code} {r.text[:300]}")
 
 
 
 
 
 
 
 
 
 
 
 
108
  return None
109
 
110
+ # ---------- Mermaid generation (Hugging Face model optional) ----------
111
+ def call_hf_mermaid(prompt: str, model: str = HF_MERMAID_MODEL) -> Optional[str]:
112
+ """
113
+ If HUGGINGFACE_KEY is set, call Hugging Face Inference API for model that outputs Mermaid or Mermaid-like code.
114
+ Many community models/Spaces are simple text-output LLMs that can be prompted to return mermaid code.
115
+ """
116
+ if not HUGGINGFACE_KEY:
117
+ return None
118
+ url = f"https://api-inference.huggingface.co/models/{model}"
119
+ headers = {"Authorization": f"Bearer {HUGGINGFACE_KEY}", "Accept": "application/json"}
120
+ payload = {"inputs": prompt, "parameters": {"max_new_tokens": 512, "temperature": 0.2}}
121
+ r = requests.post(url, headers=headers, json=payload, timeout=60)
122
+ if not r.ok:
123
+ st.warning(f"Hugging Face model call failed: {r.status_code} {r.text[:300]}")
124
+ return None
125
+ j = r.json()
126
+ # shape varies by model; try to extract text
127
+ if isinstance(j, dict) and "error" in j:
128
+ st.warning(f"Hugging Face error: {j['error']}")
129
+ return None
130
+ if isinstance(j, list) and len(j) > 0 and isinstance(j[0], dict) and "generated_text" in j[0]:
131
+ return j[0]["generated_text"]
132
+ # some models return plain text in str
133
+ if isinstance(j, str):
134
+ return j
135
+ # fallback: try to get 'output' key
136
+ if isinstance(j, dict):
137
+ for k in ("generated_text", "output", "text"):
138
+ if k in j:
139
+ return j[k]
140
+ return None
141
+
142
+ def generate_mermaid_from_summary(summary: str) -> str:
143
+ """
144
+ Try HF model first (if key provided). If not available or fails, produce a clean Mermaid flowchart locally.
145
+ We'll create a simple flow: split summary into sentences / bullets and link them sequentially.
146
+ """
147
+ # first try HF
148
+ prompt = (
149
+ "Given the following concise summary, produce a Mermaid flowchart (use 'graph TD' or 'flowchart TD' syntax). "
150
+ "Output only the Mermaid code block (no extra explanation). Summary:\n\n" + summary
151
+ )
152
+ hf_output = call_hf_mermaid(prompt)
153
+ if hf_output:
154
+ # try to extract just the mermaid text
155
+ # if the model wrapped in ```mermaid ... ``` try to strip
156
+ m = re.search(r"```(?:mermaid)?\n([\s\S]+?)```", hf_output, re.IGNORECASE)
157
+ if m:
158
+ return m.group(1).strip()
159
+ return hf_output.strip()
160
+
161
+ # fallback local generator
162
+ # split by bullet/newline or sentences
163
+ lines = re.split(r"\n+|-{1,}\s*|β€’\s*", summary)
164
+ nodes = [clean_text(l) for l in lines if clean_text(l)]
165
+ # keep a reasonable number
166
+ nodes = nodes[:8]
167
+ if not nodes:
168
+ nodes = ["Summary empty"]
169
+ mermaid = "flowchart TD\n"
170
+ # create nodes with safe ids
171
+ for i, n in enumerate(nodes):
172
+ # short id
173
+ mermaid += f' A{i}["{n.replace(\'"\', "\\\'")[:80]}"]\n'
174
+ for i in range(len(nodes) - 1):
175
+ mermaid += f" A{i} --> A{i+1}\n"
176
+ return mermaid
177
+
178
+ # ---------- Render mermaid in browser ----------
179
+ def render_mermaid(mermaid_code: str, height: int = 400):
180
+ """
181
+ Render Mermaid chart client-side using mermaid.js in an HTML component.
182
+ """
183
+ # wrap in HTML that loads mermaid CDN
184
+ html = f"""
185
+ <div id="mermaid-target">
186
+ <pre class="mermaid">
187
+ {mermaid_code}
188
+ </pre>
189
+ </div>
190
+ <script src="https://cdn.jsdelivr.net/npm/mermaid@10/dist/mermaid.min.js"></script>
191
+ <script>
192
+ mermaid.initialize({{startOnLoad:true}});
193
+ </script>
194
+ """
195
+ st.components.v1.html(html, height=height, scrolling=True)
196
+
197
+ # ============ STREAMLIT UI ============
198
+ st.set_page_config(page_title="PDF Q&A + Summary Diagram", layout="wide")
199
+ st.title("πŸ“„ PDF Q&A + Summary Diagram + Audio")
200
 
201
+ # API status
202
+ c1, c2, c3 = st.columns(3)
203
+ with c1:
204
+ if OPENROUTER_KEY:
205
+ st.success("OpenRouter key detected βœ…")
206
  else:
207
+ st.error("OPENROUTER_API_KEY not set β€” summarization and Q&A will not work.")
208
+ with c2:
209
+ if HUGGINGFACE_KEY:
210
+ st.success("Hugging Face key detected (will try HF mermaid model) βœ…")
211
+ else:
212
+ st.info("No HUGGINGFACE_API_KEY β€” app will use local Mermaid fallback.")
213
+ with c3:
214
  if ELEVEN_API_KEY:
215
  st.success("ElevenLabs key detected βœ…")
216
  else:
217
+ st.info("No ELEVEN_API_KEY β€” audio disabled.")
218
 
219
+ uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"])
220
+ if uploaded_file:
221
  try:
222
+ with st.spinner("Extracting text..."):
223
  raw_text = extract_text_from_pdf(uploaded_file)
224
  except Exception as e:
225
+ st.error(f"PDF extraction failed: {e}")
226
  raw_text = ""
227
 
228
  if not raw_text:
229
+ st.warning("No text extracted. If the PDF is scanned images you need OCR (Tesseract) or an OCR service.")
230
  else:
231
+ st.success(f"Extracted {len(raw_text)} characters")
232
+ if st.button("Summarize and generate diagram"):
233
+ try:
234
+ with st.spinner("Summarizing with OpenRouter..."):
235
+ # limit to avoid huge inputs
236
+ to_sum = raw_text[:15000]
237
+ summary = ask_model_for_summary(to_sum)
238
+ st.subheader("πŸ“Œ Summary")
239
+ st.write(summary)
240
 
241
+ # TTS summary
242
+ audio = text_to_speech_eleven(summary)
243
+ if audio:
244
+ st.audio(audio, format="audio/mp3")
245
+ elif not ELEVEN_API_KEY:
246
+ st.info("TTS not available (ELEVEN_API_KEY missing).")
247
+
248
+ # produce mermaid
249
+ mermaid_code = generate_mermaid_from_summary(summary)
250
+ st.subheader("πŸ—ΊοΈ Summary Diagram (Mermaid)")
251
+ render_mermaid(mermaid_code, height=480)
252
+ # also show the raw mermaid code for copy/paste
253
+ st.markdown("**Mermaid code (copy/paste):**")
254
+ st.code(mermaid_code, language="mermaid")
255
+
256
+ except Exception as e:
257
+ st.error(f"Summarize/diagram generation failed: {e}")
258
 
259
+ # Q&A box
260
+ query = st.text_input("Ask a question about the PDF (use Enter):")
261
  if query:
262
+ if not OPENROUTER_KEY:
263
+ st.error("Cannot answer β€” OPENROUTER_API_KEY is not set.")
264
+ else:
265
  try:
266
+ with st.spinner("Answering via OpenRouter..."):
267
+ chunks = chunk_text_by_chars(raw_text, chunk_size=3000, overlap=200)
268
+ answers = []
269
+ for c in chunks[:3]: # limit to 3 chunks
270
+ a = ask_model_question(query, c)
271
+ if a:
272
+ answers.append(a)
273
+ final = "\n\n".join(answers).strip()
274
+ if not final:
275
+ st.warning("No answer returned from model.")
276
+ else:
277
+ st.subheader("πŸ’‘ Answer")
278
+ st.write(final)
279
+ audio2 = text_to_speech_eleven(final)
280
+ if audio2:
281
+ st.audio(audio2, format="audio/mp3")
 
 
 
282
  except Exception as e:
283
  st.error(f"Q&A failed: {e}")
284
  else: