muhammadrazapathan commited on
Commit
da7c2bc
Β·
verified Β·
1 Parent(s): 2c9888e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +230 -0
app.py ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import tempfile
4
+ from dotenv import load_dotenv
5
+
6
+ from langchain_community.document_loaders import PyPDFLoader
7
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
8
+ from langchain_huggingface import HuggingFaceEmbeddings
9
+ from langchain_community.vectorstores import FAISS
10
+
11
+ from groq import Groq
12
+
13
+ # ================== ENVIRONMENT ==================
14
+ load_dotenv()
15
+ GROQ_API_KEY = os.getenv("import os
16
+ import gradio as gr
17
+ import tempfile
18
+ from dotenv import load_dotenv
19
+
20
+ from langchain_community.document_loaders import PyPDFLoader
21
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
22
+ from langchain_huggingface import HuggingFaceEmbeddings
23
+ from langchain_community.vectorstores import FAISS
24
+
25
+ from groq import Groq
26
+
27
+ # ================== ENVIRONMENT ==================
28
+ load_dotenv()
29
+ GROQ_API_KEY = os.getenv("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")
30
+
31
+ client = None
32
+ if GROQ_API_KEY:
33
+ client = Groq(api_key=GROQ_API_KEY)
34
+
35
+ # ================== GLOBAL DATABASE ==================
36
+ vector_db = None
37
+
38
+ # ================== LLM FUNCTION ==================
39
+ def groq_llm(prompt):
40
+ if client is None:
41
+ return "❌ GROQ API key not set. Please set it in environment variables or Hugging Face Secrets."
42
+ response = client.chat.completions.create(
43
+ model="llama-3.3-70b-versatile",
44
+ messages=[{"role": "user", "content": prompt}],
45
+ )
46
+ return response.choices[0].message.content
47
+
48
+ # ================== PDF PROCESSING ==================
49
+ def process_pdf(file):
50
+ global vector_db
51
+
52
+ if file is None:
53
+ return "❌ Please upload a PDF file."
54
+
55
+ # Save file temporarily
56
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
57
+ tmp.write(file)
58
+ pdf_path = tmp.name
59
+
60
+ # Load PDF
61
+ loader = PyPDFLoader(pdf_path)
62
+ documents = loader.load()
63
+
64
+ # Chunking
65
+ splitter = RecursiveCharacterTextSplitter(
66
+ chunk_size=500,
67
+ chunk_overlap=100
68
+ )
69
+ docs = splitter.split_documents(documents)
70
+
71
+ # Embeddings (open-source)
72
+ embeddings = HuggingFaceEmbeddings(
73
+ model_name="sentence-transformers/all-MiniLM-L6-v2"
74
+ )
75
+
76
+ # Vector store
77
+ vector_db = FAISS.from_documents(docs, embeddings)
78
+
79
+ return f"βœ… Document processed successfully! {len(docs)} chunks created."
80
+
81
+ # ================== QUESTION ANSWERING ==================
82
+ def ask_question(question):
83
+ global vector_db
84
+
85
+ if vector_db is None:
86
+ return "❌ Please upload and process a document first."
87
+
88
+ retriever = vector_db.as_retriever(search_kwargs={"k": 3})
89
+ docs = retriever.get_relevant_documents(question)
90
+
91
+ context = "\n\n".join([doc.page_content for doc in docs])
92
+
93
+ prompt = f"""
94
+ You are an intelligent assistant. Use the following context to answer the user's question.
95
+
96
+ Context:
97
+ {context}
98
+
99
+ Question:
100
+ {question}
101
+
102
+ Answer:
103
+ """
104
+
105
+ answer = groq_llm(prompt)
106
+ return answer
107
+
108
+ # ================== GRADIO UI ==================
109
+ with gr.Blocks(title="πŸ“„ RAG PDF Question Answering App") as demo:
110
+ gr.Markdown("## πŸ“„ RAG (Retrieval-Augmented Generation) Application")
111
+ gr.Markdown("Upload a PDF document and ask questions about its content.")
112
+
113
+ if client is None:
114
+ gr.Markdown("⚠️ **GROQ API key is not set.** Please set it to enable answering.")
115
+
116
+ with gr.Row():
117
+ pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"])
118
+ process_btn = gr.Button("πŸ“₯ Process Document")
119
+
120
+ status = gr.Textbox(label="Status", interactive=False)
121
+
122
+ with gr.Row():
123
+ question = gr.Textbox(label="Ask a Question", placeholder="Type your question here...")
124
+ answer = gr.Textbox(label="Answer", interactive=False)
125
+
126
+ process_btn.click(fn=process_pdf, inputs=pdf_upload, outputs=status)
127
+ question.submit(fn=ask_question, inputs=question, outputs=answer)
128
+
129
+ demo.launch()
130
+ ")
131
+
132
+ client = None
133
+ if GROQ_API_KEY:
134
+ client = Groq(api_key=GROQ_API_KEY)
135
+
136
+ # ================== GLOBAL DATABASE ==================
137
+ vector_db = None
138
+
139
+ # ================== LLM FUNCTION ==================
140
+ def groq_llm(prompt):
141
+ if client is None:
142
+ return "❌ GROQ API key not set. Please set it in environment variables or Hugging Face Secrets."
143
+ response = client.chat.completions.create(
144
+ model="llama-3.3-70b-versatile",
145
+ messages=[{"role": "user", "content": prompt}],
146
+ )
147
+ return response.choices[0].message.content
148
+
149
+ # ================== PDF PROCESSING ==================
150
+ def process_pdf(file):
151
+ global vector_db
152
+
153
+ if file is None:
154
+ return "❌ Please upload a PDF file."
155
+
156
+ # Save file temporarily
157
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
158
+ tmp.write(file)
159
+ pdf_path = tmp.name
160
+
161
+ # Load PDF
162
+ loader = PyPDFLoader(pdf_path)
163
+ documents = loader.load()
164
+
165
+ # Chunking
166
+ splitter = RecursiveCharacterTextSplitter(
167
+ chunk_size=500,
168
+ chunk_overlap=100
169
+ )
170
+ docs = splitter.split_documents(documents)
171
+
172
+ # Embeddings (open-source)
173
+ embeddings = HuggingFaceEmbeddings(
174
+ model_name="sentence-transformers/all-MiniLM-L6-v2"
175
+ )
176
+
177
+ # Vector store
178
+ vector_db = FAISS.from_documents(docs, embeddings)
179
+
180
+ return f"βœ… Document processed successfully! {len(docs)} chunks created."
181
+
182
+ # ================== QUESTION ANSWERING ==================
183
+ def ask_question(question):
184
+ global vector_db
185
+
186
+ if vector_db is None:
187
+ return "❌ Please upload and process a document first."
188
+
189
+ retriever = vector_db.as_retriever(search_kwargs={"k": 3})
190
+ docs = retriever.get_relevant_documents(question)
191
+
192
+ context = "\n\n".join([doc.page_content for doc in docs])
193
+
194
+ prompt = f"""
195
+ You are an intelligent assistant. Use the following context to answer the user's question.
196
+
197
+ Context:
198
+ {context}
199
+
200
+ Question:
201
+ {question}
202
+
203
+ Answer:
204
+ """
205
+
206
+ answer = groq_llm(prompt)
207
+ return answer
208
+
209
+ # ================== GRADIO UI ==================
210
+ with gr.Blocks(title="πŸ“„ RAG PDF Question Answering App") as demo:
211
+ gr.Markdown("## πŸ“„ RAG (Retrieval-Augmented Generation) Application")
212
+ gr.Markdown("Upload a PDF document and ask questions about its content.")
213
+
214
+ if client is None:
215
+ gr.Markdown("⚠️ **GROQ API key is not set.** Please set it to enable answering.")
216
+
217
+ with gr.Row():
218
+ pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"])
219
+ process_btn = gr.Button("πŸ“₯ Process Document")
220
+
221
+ status = gr.Textbox(label="Status", interactive=False)
222
+
223
+ with gr.Row():
224
+ question = gr.Textbox(label="Ask a Question", placeholder="Type your question here...")
225
+ answer = gr.Textbox(label="Answer", interactive=False)
226
+
227
+ process_btn.click(fn=process_pdf, inputs=pdf_upload, outputs=status)
228
+ question.submit(fn=ask_question, inputs=question, outputs=answer)
229
+
230
+ demo.launch()