ChatBotsTA commited on
Commit
b824d13
Β·
verified Β·
1 Parent(s): 06f01a2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +182 -133
app.py CHANGED
@@ -1,145 +1,194 @@
 
1
  import os
2
- import gradio as gr
3
- import fitz # PyMuPDF
4
- import pinecone
5
- import qdrant_client
6
- from openai import OpenAI
7
- import graphviz
8
  import requests
9
-
10
- # =================== CONFIG ===================
11
- OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
12
- PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
13
- QDRANT_API_KEY = os.environ.get("QDRANT_API_KEY")
14
- ELEVENLABS_API_KEY = os.environ.get("ELEVENLABS_API_KEY")
15
-
16
- client = OpenAI(api_key=OPENAI_API_KEY)
17
- VECTOR_DB = "qdrant" # change to "pinecone" if you want Pinecone
18
-
19
-
20
- # =================== HELPERS ===================
21
- def extract_text_from_pdf(pdf_path):
22
- doc = fitz.open(pdf_path)
23
- text = ""
24
- for page in doc:
25
- text += page.get_text()
26
- return text
27
-
28
-
29
- def ingest_text(text, doc_name="doc"):
30
- if VECTOR_DB == "qdrant":
31
- qclient = qdrant_client.QdrantClient(":memory:")
32
- qclient.recreate_collection(
33
- collection_name=doc_name,
34
- vectors_config={"size": 1536, "distance": "Cosine"}
35
- )
36
- qclient.upload_points(
37
- collection_name=doc_name,
38
- points=[
39
- {"id": 0, "vector": [0.0]*1536, "payload": {"text": text}}
40
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  )
42
- elif VECTOR_DB == "pinecone":
43
- pinecone.init(api_key=PINECONE_API_KEY, environment="gcp-starter")
44
- if doc_name not in pinecone.list_indexes():
45
- pinecone.create_index(doc_name, dimension=1536, metric="cosine")
46
- index = pinecone.Index(doc_name)
47
- index.upsert([(str(0), [0.0]*1536, {"text": text})])
48
- return f"Ingested {len(text.split())} words."
49
-
50
-
51
- def summarize_text(text):
52
- resp = client.chat.completions.create(
53
- model="gpt-3.5-turbo",
54
- messages=[{"role": "system", "content": "Summarize clearly."},
55
- {"role": "user", "content": text[:4000]}]
56
- )
57
- return resp.choices[0].message.content
58
-
59
-
60
- def generate_audio(summary_text):
61
- if not ELEVENLABS_API_KEY:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  return None
63
-
64
- url = "https://api.elevenlabs.io/v1/text-to-speech/pnYgVoCjYp9s9v1sXlKS" # default voice
65
  headers = {
66
- "xi-api-key": ELEVENLABS_API_KEY,
 
67
  "Content-Type": "application/json"
68
  }
69
  data = {
70
- "text": summary_text,
71
- "voice_settings": {"stability": 0.5, "similarity_boost": 0.7}
 
72
  }
73
- response = requests.post(url, headers=headers, json=data)
74
-
75
- if response.status_code == 200:
76
- audio_path = "summary_audio.mp3"
77
- with open(audio_path, "wb") as f:
78
- f.write(response.content)
79
- return audio_path
80
- else:
 
81
  return None
82
 
 
 
 
83
 
84
- def generate_diagram(text):
85
- dot = graphviz.Digraph()
86
- dot.node("A", "PDF Content")
87
- dot.node("B", "Summary")
88
- dot.node("C", "Key Ideas")
89
- dot.edges([("A", "B"), ("B", "C")])
90
- return dot.pipe(format="png")
91
-
92
-
93
- def chat_with_pdf(text, question):
94
- resp = client.chat.completions.create(
95
- model="gpt-3.5-turbo",
96
- messages=[
97
- {"role": "system", "content": "You are a helpful assistant with access to the document."},
98
- {"role": "user", "content": f"Document:\n{text[:3000]}\n\nQuestion: {question}"}
99
- ]
100
- )
101
- return resp.choices[0].message.content
102
-
103
-
104
- # =================== GRADIO APP ===================
105
- with gr.Blocks(theme="soft") as demo:
106
- gr.Markdown("# πŸ“š PDF Assistant β€” Summarize, Diagram, Audio, Chat")
107
-
108
- with gr.Row():
109
- pdf_file = gr.File(label="Upload PDF", file_types=[".pdf"])
110
- doc_name = gr.Textbox(label="Doc name", value="mydoc")
111
-
112
- ingest_btn = gr.Button("πŸš€ Ingest PDF")
113
- ingest_status = gr.Markdown("")
114
-
115
- summary_btn = gr.Button("πŸ“– Summarize")
116
- summary_output = gr.Textbox(label="Summary", lines=8)
117
-
118
- audio_output = gr.Audio(label="πŸ”Š Summary Audio")
119
-
120
- diagram_btn = gr.Button("πŸ“ Generate Diagram")
121
- diagram_output = gr.Image(type="numpy", label="Diagram Preview")
122
-
123
- with gr.Row():
124
- question = gr.Textbox(label="Ask the PDF a question")
125
- answer = gr.Textbox(label="Answer")
126
- ask_btn = gr.Button("πŸ’¬ Ask")
127
-
128
- pdf_text_state = gr.State("")
129
-
130
- def handle_ingest(pdf_file, doc_name):
131
- text = extract_text_from_pdf(pdf_file.name)
132
- status = ingest_text(text, doc_name)
133
- return text, status
134
-
135
- def handle_summary(text):
136
- summary = summarize_text(text)
137
- audio = generate_audio(summary)
138
- return summary, audio
139
-
140
- ingest_btn.click(handle_ingest, [pdf_file, doc_name], [pdf_text_state, ingest_status])
141
- summary_btn.click(handle_summary, pdf_text_state, [summary_output, audio_output])
142
- diagram_btn.click(lambda t: generate_diagram(t), pdf_text_state, diagram_output)
143
- ask_btn.click(lambda t, q: chat_with_pdf(t, q), [pdf_text_state, question], answer)
144
-
145
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # streamlit_pdf_qa.py
2
  import os
3
+ import streamlit as st
4
+ import openai
5
+ from PyPDF2 import PdfReader
 
 
 
6
  import requests
7
+ import re
8
+ from typing import List, Optional
9
+
10
+ # ============ CONFIG =============
11
+ openai.api_key = os.getenv("OPENAI_API_KEY")
12
+ ELEVEN_API_KEY = os.getenv("ELEVEN_API_KEY")
13
+ # optional: allow switching model by env or fallback
14
+ OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini") # fallback to what's set
15
+
16
+ # ============ HELPERS ============
17
+ def clean_text(text: str) -> str:
18
+ text = re.sub(r"\s+", " ", text)
19
+ return text.strip()
20
+
21
+ @st.cache_data(show_spinner=False)
22
+ def extract_text_from_pdf(uploaded_file) -> str:
23
+ """
24
+ Extract all text from a PDF UploadFile (or file-like)
25
+ """
26
+ reader = PdfReader(uploaded_file)
27
+ text_parts = []
28
+ for page in reader.pages:
29
+ page_text = page.extract_text()
30
+ if page_text:
31
+ text_parts.append(page_text)
32
+ return clean_text(" ".join(text_parts))
33
+
34
+ def chunk_text_by_chars(text: str, chunk_size: int = 3000, overlap: int = 200) -> List[str]:
35
+ """
36
+ Chunk text by character length. Overlap helps keep context across chunks.
37
+ """
38
+ chunks = []
39
+ start = 0
40
+ text_len = len(text)
41
+ while start < text_len:
42
+ end = start + chunk_size
43
+ chunks.append(text[start:end])
44
+ start = max(end - overlap, end)
45
+ return chunks
46
+
47
+ def call_openai_chat(messages: list, max_tokens: int = 1000, temperature: float = 0.2) -> str:
48
+ if not openai.api_key:
49
+ raise RuntimeError("OPENAI_API_KEY not set")
50
+ try:
51
+ response = openai.ChatCompletion.create(
52
+ model=OPENAI_MODEL,
53
+ messages=messages,
54
+ max_tokens=max_tokens,
55
+ temperature=temperature,
56
  )
57
+ # robust extraction of content
58
+ content = None
59
+ if response and "choices" in response and len(response["choices"]) > 0:
60
+ choice = response["choices"][0]
61
+ # choice may contain 'message' dict
62
+ if "message" in choice and "content" in choice["message"]:
63
+ content = choice["message"]["content"]
64
+ # fallback
65
+ elif "text" in choice:
66
+ content = choice["text"]
67
+ return content or ""
68
+ except Exception as e:
69
+ # raise the exception upward so UI can show it
70
+ raise
71
+
72
+ def ask_gpt(question: str, context: str) -> str:
73
+ prompt = f"Context:\n{context}\n\nQuestion: {question}\nAnswer:"
74
+ messages = [
75
+ {"role": "system", "content": "You are a helpful assistant."},
76
+ {"role": "user", "content": prompt},
77
+ ]
78
+ return call_openai_chat(messages, max_tokens=600)
79
+
80
+ def summarize_text(text: str) -> str:
81
+ prompt = f"Summarize the following text in a clear, concise way:\n\n{text}"
82
+ messages = [
83
+ {"role": "system", "content": "You are a concise summarizer."},
84
+ {"role": "user", "content": prompt},
85
+ ]
86
+ return call_openai_chat(messages, max_tokens=400)
87
+
88
+ def text_to_speech_eleven(text: str, voice_id: str = "pnCWbS8Aqipqqr5wzjuy") -> Optional[bytes]:
89
+ """
90
+ Send text to ElevenLabs text-to-speech API.
91
+ Returns raw audio bytes or None on failure.
92
+ """
93
+ if not ELEVEN_API_KEY:
94
  return None
95
+ url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
 
96
  headers = {
97
+ "Accept": "audio/mpeg",
98
+ "xi-api-key": ELEVEN_API_KEY,
99
  "Content-Type": "application/json"
100
  }
101
  data = {
102
+ "text": text,
103
+ "model_id": "eleven_multilingual_v2",
104
+ "voice_settings": {"stability": 0.5, "similarity_boost": 0.5}
105
  }
106
+ try:
107
+ resp = requests.post(url, json=data, headers=headers, timeout=30)
108
+ if resp.ok:
109
+ return resp.content
110
+ else:
111
+ st.warning(f"ElevenLabs TTS failed: {resp.status_code} {resp.text[:300]}")
112
+ return None
113
+ except Exception as e:
114
+ st.warning(f"ElevenLabs TTS error: {e}")
115
  return None
116
 
117
+ # ============ STREAMLIT APP ============
118
+ st.set_page_config(page_title="PDF Q&A", layout="wide")
119
+ st.title("πŸ“„ PDF Q&A with Summarization + Audio")
120
 
121
+ # show API key status
122
+ col1, col2 = st.columns(2)
123
+ with col1:
124
+ if openai.api_key:
125
+ st.success("OpenAI key detected βœ…")
126
+ else:
127
+ st.error("OPENAI_API_KEY is not set. Chat features will not work.")
128
+ with col2:
129
+ if ELEVEN_API_KEY:
130
+ st.success("ElevenLabs key detected βœ…")
131
+ else:
132
+ st.info("ELEVEN_API_KEY not set. Audio playback will be disabled.")
133
+
134
+ uploaded_file = st.file_uploader("Upload a PDF", type="pdf")
135
+ if uploaded_file is not None:
136
+ try:
137
+ with st.spinner("Extracting text from PDF..."):
138
+ raw_text = extract_text_from_pdf(uploaded_file)
139
+ except Exception as e:
140
+ st.error(f"Failed to extract PDF text: {e}")
141
+ raw_text = ""
142
+
143
+ if not raw_text:
144
+ st.warning("No text was extracted from this PDF. It may be scanned images (OCR needed).")
145
+ else:
146
+ st.success("PDF loaded successfully βœ…")
147
+ st.markdown(f"**Extracted text length:** {len(raw_text)} characters")
148
+
149
+ # Summarize button
150
+ if st.button("Summarize Document"):
151
+ with st.spinner("Summarizing..."):
152
+ try:
153
+ # limit input size for summarization
154
+ to_summarize = raw_text[:15000]
155
+ summary = summarize_text(to_summarize)
156
+ st.subheader("πŸ“Œ Summary")
157
+ st.write(summary)
158
+
159
+ audio_bytes = text_to_speech_eleven(summary)
160
+ if audio_bytes:
161
+ st.audio(audio_bytes, format="audio/mp3")
162
+ elif ELEVEN_API_KEY is None:
163
+ st.info("TTS skipped because ELEVEN_API_KEY is not set.")
164
+ except Exception as e:
165
+ st.error(f"Summarization failed: {e}")
166
+
167
+ # Q&A textbox
168
+ query = st.text_input("Ask a question about the PDF:")
169
+ if query:
170
+ with st.spinner("Thinking..."):
171
+ try:
172
+ chunks = chunk_text_by_chars(raw_text, chunk_size=3000, overlap=200)
173
+ # keep a small number of chunks to control cost/time
174
+ answers = []
175
+ max_chunks = 3
176
+ for i, c in enumerate(chunks[:max_chunks]):
177
+ ans = ask_gpt(query, c)
178
+ answers.append(ans)
179
+ final_answer = "\n\n".join([a for a in answers if a])
180
+ if not final_answer.strip():
181
+ st.warning("No answer returned from the model.")
182
+ else:
183
+ st.subheader("πŸ’‘ Answer")
184
+ st.write(final_answer)
185
+
186
+ audio = text_to_speech_eleven(final_answer)
187
+ if audio:
188
+ st.audio(audio, format="audio/mp3")
189
+ elif ELEVEN_API_KEY is None:
190
+ st.info("TTS skipped because ELEVEN_API_KEY is not set.")
191
+ except Exception as e:
192
+ st.error(f"Q&A failed: {e}")
193
+ else:
194
+ st.info("Upload a PDF to begin.")