ChatBotsTA commited on
Commit
cb48616
Β·
verified Β·
1 Parent(s): 871eb3b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -57
app.py CHANGED
@@ -1,71 +1,106 @@
1
  import os
2
- import tempfile
3
- import streamlit as st
4
- from dotenv import load_dotenv
5
- from PyPDF2 import PdfReader
6
- from openai import OpenAI
7
-
8
- # Vector DB imports (Qdrant + Pinecone)
9
  import pinecone
10
- from qdrant_client import QdrantClient
 
 
11
 
12
- # Load secrets
13
- load_dotenv()
14
- OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
15
- PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
16
- QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
17
 
18
  client = OpenAI(api_key=OPENAI_API_KEY)
19
-
20
- # Choose vector DB here
21
  VECTOR_DB = "qdrant" # change to "pinecone" if needed
22
 
23
- # Initialize vector DB
24
- if VECTOR_DB == "pinecone":
25
- pinecone.init(api_key=PINECONE_API_KEY, environment="gcp-starter")
26
- index_name = "pdf-index"
27
- if index_name not in pinecone.list_indexes():
28
- pinecone.create_index(index_name, dimension=1536)
29
- vector_db = pinecone.Index(index_name)
30
- else:
31
- vector_db = QdrantClient(
32
- url="https://your-qdrant-url", api_key=QDRANT_API_KEY
33
- )
34
 
35
- # Streamlit UI
36
- st.title("πŸ“„ PDF AI Assistant")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
- uploaded_file = st.file_uploader("Upload your PDF", type="pdf")
 
 
 
 
 
 
39
 
40
- if uploaded_file:
41
- with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
42
- tmp_file.write(uploaded_file.read())
43
- pdf_path = tmp_file.name
 
 
 
44
 
45
- reader = PdfReader(pdf_path)
46
- text = "".join([page.extract_text() for page in reader.pages if page.extract_text()])
 
 
 
 
 
 
 
47
 
48
- if st.button("Summarize"):
49
- response = client.chat.completions.create(
50
- model="gpt-3.5-turbo",
51
- messages=[{"role": "user", "content": f"Summarize this: {text[:4000]}"}],
52
- )
53
- st.subheader("Summary")
54
- st.write(response.choices[0].message.content)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
- if st.button("Generate Diagram"):
57
- response = client.chat.completions.create(
58
- model="gpt-3.5-turbo",
59
- messages=[{"role": "user", "content": f"Make a mermaid diagram for: {text[:2000]}"}],
60
- )
61
- st.subheader("Diagram")
62
- st.code(response.choices[0].message.content, language="mermaid")
63
 
64
- st.subheader("πŸ’¬ Chat with PDF")
65
- query = st.text_input("Ask a question about your PDF:")
66
- if query:
67
- response = client.chat.completions.create(
68
- model="gpt-3.5-turbo",
69
- messages=[{"role": "user", "content": f"Answer based on PDF: {query}\n\n{text[:4000]}"}],
70
- )
71
- st.write(response.choices[0].message.content)
 
1
  import os
2
+ import gradio as gr
3
+ import fitz # PyMuPDF
 
 
 
 
 
4
  import pinecone
5
+ import qdrant_client
6
+ from openai import OpenAI
7
+ import graphviz
8
 
9
+ # =================== CONFIG ===================
10
+ OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
11
+ PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
12
+ QDRANT_API_KEY = os.environ.get("QDRANT_API_KEY")
 
13
 
14
  client = OpenAI(api_key=OPENAI_API_KEY)
 
 
15
  VECTOR_DB = "qdrant" # change to "pinecone" if needed
16
 
17
+ # =================== HELPERS ===================
18
+ def extract_text_from_pdf(pdf_path):
19
+ doc = fitz.open(pdf_path)
20
+ text = ""
21
+ for page in doc:
22
+ text += page.get_text()
23
+ return text
 
 
 
 
24
 
25
+ def ingest_text(text, doc_name="doc"):
26
+ if VECTOR_DB == "qdrant":
27
+ qclient = qdrant_client.QdrantClient(":memory:")
28
+ qclient.recreate_collection(
29
+ collection_name=doc_name,
30
+ vectors_config={"size": 1536, "distance": "Cosine"}
31
+ )
32
+ qclient.upload_points(
33
+ collection_name=doc_name,
34
+ points=[
35
+ {"id": 0, "vector": [0.0]*1536, "payload": {"text": text}}
36
+ ]
37
+ )
38
+ elif VECTOR_DB == "pinecone":
39
+ pinecone.init(api_key=PINECONE_API_KEY, environment="gcp-starter")
40
+ if doc_name not in pinecone.list_indexes():
41
+ pinecone.create_index(doc_name, dimension=1536, metric="cosine")
42
+ index = pinecone.Index(doc_name)
43
+ index.upsert([(str(0), [0.0]*1536, {"text": text})])
44
+ return f"Ingested {len(text.split())} words."
45
 
46
+ def summarize_text(text):
47
+ resp = client.chat.completions.create(
48
+ model="gpt-3.5-turbo",
49
+ messages=[{"role":"system","content":"Summarize clearly."},
50
+ {"role":"user","content":text[:4000]}]
51
+ )
52
+ return resp.choices[0].message.content
53
 
54
+ def generate_diagram(text):
55
+ dot = graphviz.Digraph()
56
+ dot.node("A", "PDF Content")
57
+ dot.node("B", "Summary")
58
+ dot.node("C", "Key Ideas")
59
+ dot.edges([("A","B"),("B","C")])
60
+ return dot.pipe(format="png")
61
 
62
+ def chat_with_pdf(text, question):
63
+ resp = client.chat.completions.create(
64
+ model="gpt-3.5-turbo",
65
+ messages=[
66
+ {"role":"system","content":"You are a helpful assistant with access to the document."},
67
+ {"role":"user","content":f"Document:\n{text[:3000]}\n\nQuestion:{question}"}
68
+ ]
69
+ )
70
+ return resp.choices[0].message.content
71
 
72
+ # =================== GRADIO APP ===================
73
+ with gr.Blocks(theme="soft") as demo:
74
+ gr.Markdown("# πŸ“š PDF Assistant β€” Summarize, Diagram, Chat")
75
+
76
+ with gr.Row():
77
+ pdf_file = gr.File(label="Upload PDF", file_types=[".pdf"])
78
+ doc_name = gr.Textbox(label="Doc name", value="mydoc")
79
+
80
+ ingest_btn = gr.Button("πŸš€ Ingest PDF")
81
+ ingest_status = gr.Markdown("")
82
+
83
+ summary_btn = gr.Button("πŸ“– Summarize")
84
+ summary_output = gr.Textbox(label="Summary", lines=8)
85
+
86
+ diagram_btn = gr.Button("πŸ“ Generate Diagram")
87
+ diagram_output = gr.Image(type="numpy", label="Diagram Preview")
88
+
89
+ with gr.Row():
90
+ question = gr.Textbox(label="Ask the PDF a question")
91
+ answer = gr.Textbox(label="Answer")
92
+ ask_btn = gr.Button("πŸ’¬ Ask")
93
+
94
+ pdf_text_state = gr.State("")
95
 
96
+ def handle_ingest(pdf_file, doc_name):
97
+ text = extract_text_from_pdf(pdf_file.name)
98
+ status = ingest_text(text, doc_name)
99
+ return text, status
 
 
 
100
 
101
+ ingest_btn.click(handle_ingest, [pdf_file, doc_name], [pdf_text_state, ingest_status])
102
+ summary_btn.click(lambda t: summarize_text(t), pdf_text_state, summary_output)
103
+ diagram_btn.click(lambda t: generate_diagram(t), pdf_text_state, diagram_output)
104
+ ask_btn.click(lambda t, q: chat_with_pdf(t, q), [pdf_text_state, question], answer)
105
+
106
+ demo.launch()