Zohaib366 commited on
Commit
ac02361
Β·
verified Β·
1 Parent(s): 215d2e6

UPDATED APP.PY

Browse files
Files changed (1) hide show
  1. app.py +119 -114
app.py CHANGED
@@ -1,114 +1,119 @@
1
- import gradio as gr
2
- import fitz # PyMuPDF
3
- import os
4
- from sentence_transformers import SentenceTransformer
5
- import numpy as np
6
- import faiss
7
- from groq import Groq
8
-
9
- # Initialize Groq client
10
- groq_client = Groq(api_key="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")
11
- model = "llama3-8b-8192"
12
-
13
- embedder = SentenceTransformer('all-MiniLM-L6-v2')
14
-
15
- # Global state
16
- state = {
17
- "document_chunks": [],
18
- "metadata": [],
19
- "index": None,
20
- "embeddings": None
21
- }
22
-
23
- # Extract text from PDF using file path
24
- def extract_text_from_pdf(file_path):
25
- doc = fitz.open(file_path)
26
- texts = []
27
- for i, page in enumerate(doc):
28
- text = page.get_text().strip()
29
- if text:
30
- texts.append({"text": text, "page": i + 1})
31
- return texts
32
-
33
- # Process PDFs
34
- def process_pdfs(files):
35
- state["document_chunks"] = []
36
- state["metadata"] = []
37
-
38
- for file in files:
39
- file_name = os.path.basename(file.name)
40
- chunks = extract_text_from_pdf(file.name)
41
- for chunk in chunks:
42
- state["document_chunks"].append(chunk['text'])
43
- state["metadata"].append({"file": file_name, "page": chunk['page']})
44
-
45
- embeddings = embedder.encode(state["document_chunks"], show_progress_bar=True)
46
- dim = embeddings.shape[1]
47
- index = faiss.IndexFlatL2(dim)
48
- index.add(np.array(embeddings))
49
- state["index"] = index
50
- state["embeddings"] = embeddings
51
-
52
- return "βœ… Book(s) loaded successfully!"
53
-
54
- # Retrieve top chunks
55
- def retrieve_chunks(question, top_k=3):
56
- if not state["index"]:
57
- return []
58
- q_embedding = embedder.encode([question])
59
- D, I = state["index"].search(q_embedding, top_k)
60
- return [(state["document_chunks"][i], state["metadata"][i]) for i in I[0]]
61
-
62
- # Generate answer with source references
63
- def generate_answer(context, question):
64
- context_text = "\n\n".join(
65
- f"{chunk}\n\n[Source: {meta['file']}, Page: {meta['page']}]"
66
- for chunk, meta in context
67
- )
68
- prompt = f"""You are a helpful assistant. Use the context below to answer the question.
69
- Include the source references (file name and page number) in your answer.
70
-
71
- Context:
72
- {context_text}
73
-
74
- Question:
75
- {question}
76
-
77
- Answer (with sources):"""
78
-
79
- response = groq_client.chat.completions.create(
80
- model=model,
81
- messages=[{"role": "user", "content": prompt}],
82
- temperature=0.2
83
- )
84
- return response.choices[0].message.content
85
-
86
- # Chat function for ChatInterface
87
- def chatbot_interface_fn(message, history):
88
- if not state["document_chunks"]:
89
- return "⚠️ Please upload PDF files first."
90
- context = retrieve_chunks(message)
91
- return generate_answer(context, message)
92
-
93
- # Gradio UI
94
- with gr.Blocks(title="RAG Chatbot") as demo:
95
- gr.Markdown("# πŸ“š Enhanced RAG Chatbot\nUpload books and chat naturally!")
96
-
97
- with gr.Row():
98
- pdf_input = gr.File(file_types=[".pdf"], file_count="multiple", label="πŸ“‚ Upload PDFs")
99
- upload_btn = gr.Button("Upload & Process PDFs")
100
- status = gr.Textbox(label="Status", interactive=False)
101
-
102
- upload_btn.click(process_pdfs, inputs=[pdf_input], outputs=[status])
103
-
104
- gr.ChatInterface(
105
- fn=chatbot_interface_fn,
106
- chatbot=gr.Chatbot(height=400, type="messages"),
107
- textbox=gr.Textbox(placeholder="Ask about the PDFs...", scale=7),
108
- title="πŸ“– PDF Chat",
109
- description="Ask questions based on uploaded PDF content.",
110
- submit_btn="Send"
111
- )
112
-
113
- if __name__ == "__main__":
114
- demo.launch()
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import fitz # PyMuPDF
3
+ import os
4
+ from sentence_transformers import SentenceTransformer
5
+ import numpy as np
6
+ import faiss
7
+ from groq import Groq
8
+
9
+ # Load API key from environment variable
10
+ api_key = os.getenv("GROQ_API_KEY")
11
+ if not api_key:
12
+ raise ValueError("❌ GROQ_API_KEY environment variable not set.")
13
+
14
+ # Initialize Groq client
15
+ groq_client = Groq(api_key=api_key)
16
+ model = "llama3-8b-8192"
17
+
18
+ embedder = SentenceTransformer('all-MiniLM-L6-v2')
19
+
20
+ # Global state
21
+ state = {
22
+ "document_chunks": [],
23
+ "metadata": [],
24
+ "index": None,
25
+ "embeddings": None
26
+ }
27
+
28
+ # Extract text from PDF using file path
29
+ def extract_text_from_pdf(file_path):
30
+ doc = fitz.open(file_path)
31
+ texts = []
32
+ for i, page in enumerate(doc):
33
+ text = page.get_text().strip()
34
+ if text:
35
+ texts.append({"text": text, "page": i + 1})
36
+ return texts
37
+
38
+ # Process PDFs
39
+ def process_pdfs(files):
40
+ state["document_chunks"] = []
41
+ state["metadata"] = []
42
+
43
+ for file in files:
44
+ file_name = os.path.basename(file.name)
45
+ chunks = extract_text_from_pdf(file.name)
46
+ for chunk in chunks:
47
+ state["document_chunks"].append(chunk['text'])
48
+ state["metadata"].append({"file": file_name, "page": chunk['page']})
49
+
50
+ embeddings = embedder.encode(state["document_chunks"], show_progress_bar=True)
51
+ dim = embeddings.shape[1]
52
+ index = faiss.IndexFlatL2(dim)
53
+ index.add(np.array(embeddings))
54
+ state["index"] = index
55
+ state["embeddings"] = embeddings
56
+
57
+ return "βœ… Book(s) loaded successfully!"
58
+
59
+ # Retrieve top chunks
60
+ def retrieve_chunks(question, top_k=3):
61
+ if not state["index"]:
62
+ return []
63
+ q_embedding = embedder.encode([question])
64
+ D, I = state["index"].search(q_embedding, top_k)
65
+ return [(state["document_chunks"][i], state["metadata"][i]) for i in I[0]]
66
+
67
+ # Generate answer with source references
68
+ def generate_answer(context, question):
69
+ context_text = "\n\n".join(
70
+ f"{chunk}\n\n[Source: {meta['file']}, Page: {meta['page']}]"
71
+ for chunk, meta in context
72
+ )
73
+ prompt = f"""You are a helpful assistant. Use the context below to answer the question.
74
+ Include the source references (file name and page number) in your answer.
75
+
76
+ Context:
77
+ {context_text}
78
+
79
+ Question:
80
+ {question}
81
+
82
+ Answer (with sources):"""
83
+
84
+ response = groq_client.chat.completions.create(
85
+ model=model,
86
+ messages=[{"role": "user", "content": prompt}],
87
+ temperature=0.2
88
+ )
89
+ return response.choices[0].message.content
90
+
91
+ # Chat function for ChatInterface
92
+ def chatbot_interface_fn(message, history):
93
+ if not state["document_chunks"]:
94
+ return "⚠️ Please upload PDF files first."
95
+ context = retrieve_chunks(message)
96
+ return generate_answer(context, message)
97
+
98
+ # Gradio UI
99
+ with gr.Blocks(title="RAG Chatbot") as demo:
100
+ gr.Markdown("# πŸ“š Enhanced RAG Chatbot\nUpload books and chat naturally!")
101
+
102
+ with gr.Row():
103
+ pdf_input = gr.File(file_types=[".pdf"], file_count="multiple", label="πŸ“‚ Upload PDFs")
104
+ upload_btn = gr.Button("Upload & Process PDFs")
105
+ status = gr.Textbox(label="Status", interactive=False)
106
+
107
+ upload_btn.click(process_pdfs, inputs=[pdf_input], outputs=[status])
108
+
109
+ gr.ChatInterface(
110
+ fn=chatbot_interface_fn,
111
+ chatbot=gr.Chatbot(height=400, type="messages"),
112
+ textbox=gr.Textbox(placeholder="Ask about the PDFs...", scale=7),
113
+ title="πŸ“– PDF Chat",
114
+ description="Ask questions based on uploaded PDF content.",
115
+ submit_btn="Send"
116
+ )
117
+
118
+ if __name__ == "__main__":
119
+ demo.launch()