Muqadas-13 commited on
Commit
2153a88
Β·
verified Β·
1 Parent(s): 0544644

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -100
app.py CHANGED
@@ -1,104 +1,97 @@
1
- import os
2
- import streamlit as st
3
- from PyPDF2 import PdfReader
4
- from docx import Document
 
5
  import faiss
6
  import numpy as np
7
- import torch
8
  from groq import Groq
9
- from sentence_transformers import SentenceTransformer
10
 
11
- # βœ… Force CPU to avoid meta tensor issues
12
- torch.set_default_device("cpu")
13
-
14
- # βœ… Load Groq API key from environment
15
- client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
16
-
17
- # βœ… Load sentence transformer model safely
18
- embed_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", trust_remote_code=True)
19
-
20
- # βœ… Initialize FAISS index and chunk storage
21
- INDEX = faiss.IndexFlatL2(384)
22
- stored_chunks = []
23
-
24
- # βœ… Streamlit UI Styling
25
- st.markdown("""
26
- <style>
27
- .main-title {
28
- font-size: 40px;
29
- color: #2E86C1;
30
- font-weight: bold;
31
- text-align: center;
32
- margin-bottom: 30px;
33
- }
34
- .card {
35
- background-color: #ffffff;
36
- padding: 20px;
37
- border-radius: 15px;
38
- box-shadow: 0 4px 10px rgba(0, 0, 0, 0.1);
39
- margin-top: 20px;
40
- }
41
- body {
42
- background-color: #f8fbfd;
43
- }
44
- </style>
45
- """, unsafe_allow_html=True)
46
-
47
- st.markdown('<div class="main-title">πŸ“„ Smart RAG Document QA Assistant</div>', unsafe_allow_html=True)
48
-
49
- # βœ… Extract text from uploaded files
50
- def extract_text(file):
51
- if file.type == "application/pdf":
52
- reader = PdfReader(file)
53
- return " ".join([page.extract_text() or "" for page in reader.pages])
54
- elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
55
- doc = Document(file)
56
- return "\n".join([p.text for p in doc.paragraphs])
57
- elif file.type.startswith("text"):
58
- return file.read().decode("utf-8")
59
- return ""
60
-
61
- # βœ… Chunk long text into smaller pieces
62
- def chunk_text(text, chunk_size=200):
63
- words = text.split()
64
- return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
65
-
66
- # βœ… Store embeddings in FAISS
67
- def store_embeddings(chunks):
68
- vectors = embed_model.encode(chunks)
69
- INDEX.add(np.array(vectors, dtype=np.float32))
70
- stored_chunks.extend(chunks)
71
-
72
- # βœ… Retrieve most relevant chunks for a query
73
- def retrieve_similar_chunks(query, top_k=3):
74
- query_vector = embed_model.encode([query])
75
- distances, indices = INDEX.search(np.array(query_vector, dtype=np.float32), top_k)
76
- return [stored_chunks[i] for i in indices[0]]
77
-
78
- # βœ… Use Groq to answer based on context
79
- def get_llm_answer(query, context):
80
- prompt = f"Answer the question based on the following context:\n\n{context}\n\nQuestion: {query}"
81
- chat_completion = client.chat.completions.create(
82
- messages=[{"role": "user", "content": prompt}],
83
- model="llama3-70b-8192"
84
- )
85
- return chat_completion.choices[0].message.content
86
-
87
- # βœ… Streamlit App Interface
88
- uploaded_file = st.file_uploader("πŸ“ Upload your document", type=["pdf", "docx", "txt"])
89
- query = st.text_input("πŸ’¬ Ask a question about your document")
90
-
91
- if uploaded_file:
92
- with st.spinner("Processing file..."):
93
- text = extract_text(uploaded_file)
94
- chunks = chunk_text(text)
95
- store_embeddings(chunks)
96
- st.success("βœ… Document uploaded and indexed!")
97
-
98
- if st.button("🧠 Get Answer") and query:
99
- with st.spinner("Thinking..."):
100
- context = "\n\n".join(retrieve_similar_chunks(query))
101
- answer = get_llm_answer(query, context)
102
- st.markdown(f'<div class="card"><b>Answer:</b><br>{answer}</div>', unsafe_allow_html=True)
103
-
104
- st.markdown("<br><center style='color: grey;'>Built by Muqadas with ❀️ using Streamlit + Groq + FAISS</center>", unsafe_allow_html=True)
 
1
+ !pip install -q gradio sentence-transformers faiss-cpu pdfplumber groq
2
+
3
+ import gradio as gr
4
+ import pdfplumber
5
+ from sentence_transformers import SentenceTransformer
6
  import faiss
7
  import numpy as np
 
8
  from groq import Groq
 
9
 
10
+ # Global vars
11
+ model = SentenceTransformer("all-MiniLM-L6-v2")
12
+ documents, embeddings, index, text_chunks, client = [], None, None, [], None
13
+
14
+ def ask_llama3(system_prompt, user_prompt):
15
+ global client
16
+ try:
17
+ chat_completion = client.chat.completions.create(
18
+ model="llama-3.1-8b-instant",
19
+ messages=[
20
+ {"role": "system", "content": system_prompt},
21
+ {"role": "user", "content": user_prompt}
22
+ ]
23
+ )
24
+ return chat_completion.choices[0].message.content
25
+ except Exception as e:
26
+ return f"❌ LLaMA3 error: {e}"
27
+
28
+ def pdf_to_chunks(pdf_file, user_key):
29
+ global text_chunks, embeddings, index, client
30
+ try:
31
+ client = Groq(api_key=user_key)
32
+ except Exception as e:
33
+ return f"❌ API key error: {e}"
34
+
35
+ text_chunks = []
36
+ try:
37
+ with pdfplumber.open(pdf_file.name) as pdf:
38
+ for page in pdf.pages:
39
+ text = page.extract_text()
40
+ if text:
41
+ sentences = text.split(". ")
42
+ text_chunks.extend(sentences)
43
+
44
+ if not text_chunks:
45
+ return "❗ No text found in PDF."
46
+
47
+ embeddings = model.encode(text_chunks, convert_to_tensor=False)
48
+ embeddings = np.array(embeddings).astype("float32")
49
+ dimension = embeddings.shape[1]
50
+ index = faiss.IndexFlatL2(dimension)
51
+ index.add(embeddings)
52
+
53
+ return "βœ… PDF processed and indexed successfully."
54
+ except Exception as e:
55
+ return f"❌ PDF processing error: {e}"
56
+
57
+ def query_document(question):
58
+ global index, text_chunks, model
59
+ if index is None or not text_chunks:
60
+ return "❗ Please upload and process a PDF first."
61
+
62
+ try:
63
+ q_embedding = model.encode([question])[0].astype("float32")
64
+ D, I = index.search(np.array([q_embedding]), 5)
65
+ retrieved_chunks = [text_chunks[i] for i in I[0]]
66
+ context = "\n".join(retrieved_chunks)
67
+
68
+ system_prompt = "You are a helpful study supervisor. Use the provided context to answer clearly."
69
+ user_prompt = f"Context:\n{context}\n\nQuestion:\n{question}"
70
+
71
+ return ask_llama3(system_prompt, user_prompt)
72
+ except Exception as e:
73
+ return f"❌ Query error: {e}"
74
+
75
+ # UI
76
+ with gr.Blocks() as demo:
77
+ gr.Markdown("""
78
+ <div style="text-align:center; background:#f97316; color:white; padding: 12px; border-radius: 10px;">
79
+ <h2>πŸ“˜ PDF Study Assistant</h2>
80
+ <p>Ask questions from your uploaded PDF using Groq + LLaMA3</p>
81
+ </div>
82
+ """)
83
+
84
+ with gr.Column():
85
+ api_input = gr.Textbox(label="πŸ”‘ Groq API Key", type="password")
86
+ pdf_input = gr.File(label="πŸ“„ Upload PDF", file_types=[".pdf"])
87
+ upload_btn = gr.Button("πŸ“₯ Extract & Index PDF", variant="primary")
88
+ status_output = gr.Textbox(label="πŸ› οΈ Status", interactive=False)
89
+
90
+ question = gr.Textbox(label="❓ Ask a Question", lines=2)
91
+ get_answer_btn = gr.Button("πŸ’¬ Get Answer")
92
+ answer_output = gr.Textbox(label="πŸ“’ Answer", lines=10, interactive=False)
93
+
94
+ upload_btn.click(fn=pdf_to_chunks, inputs=[pdf_input, api_input], outputs=[status_output])
95
+ get_answer_btn.click(fn=query_document, inputs=[question], outputs=[answer_output])
96
+
97
+ demo.launch()