SohaAyub commited on
Commit
2deda75
·
verified ·
1 Parent(s): 6323ac6

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +140 -0
app.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import numpy as np
4
+ import faiss
5
+
6
+ from groq import Groq
7
+ from pypdf import PdfReader
8
+ from sentence_transformers import SentenceTransformer
9
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
10
+
11
+ # =====================================================
12
+ # Configuration
13
+ # =====================================================
14
+ RELEVANCE_THRESHOLD = 1.2 # lower = stricter relevance
15
+
16
+ # =====================================================
17
+ # Initialize Groq Client
18
+ # =====================================================
19
+ client = Groq(api_key= userdata.get('RAG_GROQ'))
20
+ # =====================================================
21
+ # Load Embedding Model
22
+ # =====================================================
23
+ embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
24
+
25
+ # =====================================================
26
+ # Global Vector Store
27
+ # =====================================================
28
+ vector_store = None
29
+ stored_chunks = []
30
+
31
+ # =====================================================
32
+ # PDF Processing Function
33
+ # =====================================================
34
+ def process_pdf(pdf_file):
35
+ global vector_store, stored_chunks
36
+
37
+ reader = PdfReader(pdf_file)
38
+ full_text = ""
39
+
40
+ for page in reader.pages:
41
+ if page.extract_text():
42
+ full_text += page.extract_text() + "\n"
43
+
44
+ splitter = RecursiveCharacterTextSplitter(
45
+ chunk_size=500,
46
+ chunk_overlap=100
47
+ )
48
+ chunks = splitter.split_text(full_text)
49
+
50
+ embeddings = embedding_model.encode(chunks)
51
+
52
+ dimension = embeddings.shape[1]
53
+ vector_store = faiss.IndexFlatL2(dimension)
54
+ vector_store.add(np.array(embeddings))
55
+
56
+ stored_chunks = chunks
57
+
58
+ return "✅ PDF processed successfully. You can now ask questions."
59
+
60
+ # =====================================================
61
+ # Question Answering Function
62
+ # =====================================================
63
+ def answer_question(question):
64
+ if vector_store is None:
65
+ return "⚠️ Please upload and process a PDF first."
66
+
67
+ question_embedding = embedding_model.encode([question])
68
+ distances, indices = vector_store.search(
69
+ np.array(question_embedding), k=3
70
+ )
71
+
72
+ avg_distance = distances[0].mean()
73
+
74
+ context = ""
75
+ for idx in indices[0]:
76
+ context += stored_chunks[idx] + "\n"
77
+
78
+ # Relevance feedback
79
+ if avg_distance > RELEVANCE_THRESHOLD:
80
+ relevance_note = (
81
+ "⚠️ **Note:** This question is not directly answered in the document.\n"
82
+ "The response below is based on loosely related context.\n\n"
83
+ )
84
+ else:
85
+ relevance_note = ""
86
+
87
+ prompt = f"""
88
+ You are an honest and careful AI assistant.
89
+
90
+ Instructions:
91
+ - Answer ONLY using the provided context.
92
+ - If the answer is not explicitly stated, say:
93
+ "This is not directly mentioned in the document, but based on related context..."
94
+
95
+ Context:
96
+ {context}
97
+
98
+ Question:
99
+ {question}
100
+ """
101
+
102
+ response = client.chat.completions.create(
103
+ model="llama-3.3-70b-versatile",
104
+ messages=[
105
+ {"role": "user", "content": prompt}
106
+ ]
107
+ )
108
+
109
+ return relevance_note + response.choices[0].message.content
110
+
111
+ # =====================================================
112
+ # Gradio UI
113
+ # =====================================================
114
+ with gr.Blocks() as app:
115
+ gr.Markdown("## 📄 RAG-based PDF Question Answering (Groq + FAISS)")
116
+ gr.Markdown(
117
+ "Upload a PDF and ask questions. "
118
+ "The system will clearly tell you if an answer is not directly mentioned."
119
+ )
120
+
121
+ pdf_file = gr.File(label="Upload PDF", file_types=[".pdf"])
122
+ process_btn = gr.Button("Process PDF")
123
+ status_box = gr.Textbox(label="Status", interactive=False)
124
+
125
+ question_box = gr.Textbox(label="Ask a Question")
126
+ answer_box = gr.Textbox(label="Answer", lines=8)
127
+
128
+ process_btn.click(
129
+ process_pdf,
130
+ inputs=pdf_file,
131
+ outputs=status_box
132
+ )
133
+
134
+ question_box.submit(
135
+ answer_question,
136
+ inputs=question_box,
137
+ outputs=answer_box
138
+ )
139
+
140
+ app.launch()