ChatBotsTA commited on
Commit
c9143ee
Β·
verified Β·
1 Parent(s): 7d999e6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -74
app.py CHANGED
@@ -1,83 +1,71 @@
1
- import gradio as gr
2
- from transformers import pipeline
3
- import torch
4
- import tempfile
5
  import os
6
- import graphviz
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
- # -------------------------
9
- # Load lightweight models
10
- # -------------------------
11
- summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
12
- tts = pipeline("text-to-speech", model="facebook/mms-tts-eng")
 
 
 
 
 
 
13
 
14
- # -------------------------
15
- # Helpers
16
- # -------------------------
17
- def summarize_pdf(pdf_file):
18
- try:
19
- import pypdf
20
- reader = pypdf.PdfReader(pdf_file.name)
21
- text = ""
22
- for page in reader.pages:
23
- text += page.extract_text() or ""
24
- if not text.strip():
25
- return "❌ No text extracted from PDF."
26
- # keep only first 2000 chars (model limit)
27
- chunk = text[:2000]
28
- summary = summarizer(chunk, max_length=120, min_length=40, do_sample=False)[0]['summary_text']
29
- return summary
30
- except Exception as e:
31
- return f"❌ Error in summarization: {e}"
32
 
33
- def summary_audio(summary_text):
34
- try:
35
- speech = tts(summary_text)
36
- tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
37
- import soundfile as sf
38
- sf.write(tmp.name, speech["audio"], speech["sampling_rate"])
39
- return tmp.name
40
- except Exception as e:
41
- return f"❌ Error in audio generation: {e}"
42
 
43
- def summary_diagram(summary_text):
44
- try:
45
- dot = graphviz.Digraph()
46
- dot.node("Summary", "πŸ“„ Summary")
47
- for i, sentence in enumerate(summary_text.split(".")[:5]):
48
- s = sentence.strip()
49
- if not s:
50
- continue
51
- dot.node(f"S{i}", s[:40] + ("..." if len(s) > 40 else ""))
52
- dot.edge("Summary", f"S{i}")
53
- out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".png").name
54
- dot.render(out_path, format="png", cleanup=True)
55
- return out_path + ".png"
56
- except Exception as e:
57
- return f"❌ Error in diagram generation: {e}"
58
 
59
- # -------------------------
60
- # Gradio UI
61
- # -------------------------
62
- with gr.Blocks(css=".gradio-container {background-color: #f5f5f5}") as demo:
63
- gr.Markdown("<h1 style='text-align:center;color:#4CAF50;'>πŸ“š PDF Assistant</h1>")
64
- with gr.Row():
65
- with gr.Column():
66
- pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
67
- summarize_btn = gr.Button("Summarize πŸš€", variant="primary")
68
- summary_output = gr.Textbox(label="Summary")
69
- audio_output = gr.Audio(label="Summary Audio")
70
- diagram_output = gr.Image(label="Summary Diagram")
71
 
72
- def full_pipeline(pdf_file):
73
- summary = summarize_pdf(pdf_file)
74
- if summary.startswith("❌"):
75
- return summary, None, None
76
- audio = summary_audio(summary)
77
- diagram = summary_diagram(summary)
78
- return summary, audio, diagram
79
 
80
- summarize_btn.click(full_pipeline, inputs=pdf_input, outputs=[summary_output, audio_output, diagram_output])
 
 
 
 
 
 
81
 
82
- if __name__ == "__main__":
83
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
+ import tempfile
3
+ import streamlit as st
4
+ from dotenv import load_dotenv
5
+ from PyPDF2 import PdfReader
6
+ from openai import OpenAI
7
+
8
+ # Vector DB imports (Qdrant + Pinecone)
9
+ import pinecone
10
+ from qdrant_client import QdrantClient
11
+
12
+ # Load secrets
13
+ load_dotenv()
14
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
15
+ PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
16
+ QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
17
+
18
+ client = OpenAI(api_key=OPENAI_API_KEY)
19
+
20
+ # Choose vector DB here
21
+ VECTOR_DB = "qdrant" # change to "pinecone" if needed
22
 
23
+ # Initialize vector DB
24
+ if VECTOR_DB == "pinecone":
25
+ pinecone.init(api_key=PINECONE_API_KEY, environment="gcp-starter")
26
+ index_name = "pdf-index"
27
+ if index_name not in pinecone.list_indexes():
28
+ pinecone.create_index(index_name, dimension=1536)
29
+ vector_db = pinecone.Index(index_name)
30
+ else:
31
+ vector_db = QdrantClient(
32
+ url="https://your-qdrant-url", api_key=QDRANT_API_KEY
33
+ )
34
 
35
+ # Streamlit UI
36
+ st.title("πŸ“„ PDF AI Assistant")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
+ uploaded_file = st.file_uploader("Upload your PDF", type="pdf")
 
 
 
 
 
 
 
 
39
 
40
+ if uploaded_file:
41
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
42
+ tmp_file.write(uploaded_file.read())
43
+ pdf_path = tmp_file.name
 
 
 
 
 
 
 
 
 
 
 
44
 
45
+ reader = PdfReader(pdf_path)
46
+ text = "".join([page.extract_text() for page in reader.pages if page.extract_text()])
 
 
 
 
 
 
 
 
 
 
47
 
48
+ if st.button("Summarize"):
49
+ response = client.chat.completions.create(
50
+ model="gpt-3.5-turbo",
51
+ messages=[{"role": "user", "content": f"Summarize this: {text[:4000]}"}],
52
+ )
53
+ st.subheader("Summary")
54
+ st.write(response.choices[0].message.content)
55
 
56
+ if st.button("Generate Diagram"):
57
+ response = client.chat.completions.create(
58
+ model="gpt-3.5-turbo",
59
+ messages=[{"role": "user", "content": f"Make a mermaid diagram for: {text[:2000]}"}],
60
+ )
61
+ st.subheader("Diagram")
62
+ st.code(response.choices[0].message.content, language="mermaid")
63
 
64
+ st.subheader("πŸ’¬ Chat with PDF")
65
+ query = st.text_input("Ask a question about your PDF:")
66
+ if query:
67
+ response = client.chat.completions.create(
68
+ model="gpt-3.5-turbo",
69
+ messages=[{"role": "user", "content": f"Answer based on PDF: {query}\n\n{text[:4000]}"}],
70
+ )
71
+ st.write(response.choices[0].message.content)