Zohaib366 commited on
Commit
215d2e6
ยท
verified ยท
1 Parent(s): 8f2a20e

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +114 -0
  2. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import fitz # PyMuPDF
3
+ import os
4
+ from sentence_transformers import SentenceTransformer
5
+ import numpy as np
6
+ import faiss
7
+ from groq import Groq
8
+
9
+ # Initialize Groq client
10
+ groq_client = Groq(api_key="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")
11
+ model = "llama3-8b-8192"
12
+
13
+ embedder = SentenceTransformer('all-MiniLM-L6-v2')
14
+
15
+ # Global state
16
+ state = {
17
+ "document_chunks": [],
18
+ "metadata": [],
19
+ "index": None,
20
+ "embeddings": None
21
+ }
22
+
23
+ # Extract text from PDF using file path
24
+ def extract_text_from_pdf(file_path):
25
+ doc = fitz.open(file_path)
26
+ texts = []
27
+ for i, page in enumerate(doc):
28
+ text = page.get_text().strip()
29
+ if text:
30
+ texts.append({"text": text, "page": i + 1})
31
+ return texts
32
+
33
+ # Process PDFs
34
+ def process_pdfs(files):
35
+ state["document_chunks"] = []
36
+ state["metadata"] = []
37
+
38
+ for file in files:
39
+ file_name = os.path.basename(file.name)
40
+ chunks = extract_text_from_pdf(file.name)
41
+ for chunk in chunks:
42
+ state["document_chunks"].append(chunk['text'])
43
+ state["metadata"].append({"file": file_name, "page": chunk['page']})
44
+
45
+ embeddings = embedder.encode(state["document_chunks"], show_progress_bar=True)
46
+ dim = embeddings.shape[1]
47
+ index = faiss.IndexFlatL2(dim)
48
+ index.add(np.array(embeddings))
49
+ state["index"] = index
50
+ state["embeddings"] = embeddings
51
+
52
+ return "โœ… Book(s) loaded successfully!"
53
+
54
+ # Retrieve top chunks
55
+ def retrieve_chunks(question, top_k=3):
56
+ if not state["index"]:
57
+ return []
58
+ q_embedding = embedder.encode([question])
59
+ D, I = state["index"].search(q_embedding, top_k)
60
+ return [(state["document_chunks"][i], state["metadata"][i]) for i in I[0]]
61
+
62
+ # Generate answer with source references
63
+ def generate_answer(context, question):
64
+ context_text = "\n\n".join(
65
+ f"{chunk}\n\n[Source: {meta['file']}, Page: {meta['page']}]"
66
+ for chunk, meta in context
67
+ )
68
+ prompt = f"""You are a helpful assistant. Use the context below to answer the question.
69
+ Include the source references (file name and page number) in your answer.
70
+
71
+ Context:
72
+ {context_text}
73
+
74
+ Question:
75
+ {question}
76
+
77
+ Answer (with sources):"""
78
+
79
+ response = groq_client.chat.completions.create(
80
+ model=model,
81
+ messages=[{"role": "user", "content": prompt}],
82
+ temperature=0.2
83
+ )
84
+ return response.choices[0].message.content
85
+
86
+ # Chat function for ChatInterface
87
+ def chatbot_interface_fn(message, history):
88
+ if not state["document_chunks"]:
89
+ return "โš ๏ธ Please upload PDF files first."
90
+ context = retrieve_chunks(message)
91
+ return generate_answer(context, message)
92
+
93
+ # Gradio UI
94
+ with gr.Blocks(title="RAG Chatbot") as demo:
95
+ gr.Markdown("# ๐Ÿ“š Enhanced RAG Chatbot\nUpload books and chat naturally!")
96
+
97
+ with gr.Row():
98
+ pdf_input = gr.File(file_types=[".pdf"], file_count="multiple", label="๐Ÿ“‚ Upload PDFs")
99
+ upload_btn = gr.Button("Upload & Process PDFs")
100
+ status = gr.Textbox(label="Status", interactive=False)
101
+
102
+ upload_btn.click(process_pdfs, inputs=[pdf_input], outputs=[status])
103
+
104
+ gr.ChatInterface(
105
+ fn=chatbot_interface_fn,
106
+ chatbot=gr.Chatbot(height=400, type="messages"),
107
+ textbox=gr.Textbox(placeholder="Ask about the PDFs...", scale=7),
108
+ title="๐Ÿ“– PDF Chat",
109
+ description="Ask questions based on uploaded PDF content.",
110
+ submit_btn="Send"
111
+ )
112
+
113
+ if __name__ == "__main__":
114
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio>=4.0.0
2
+ PyMuPDF
3
+ sentence-transformers
4
+ faiss-cpu
5
+ numpy
6
+ groq