Talha812 commited on
Commit
22d793c
·
verified ·
1 Parent(s): faf4092

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +187 -0
app.py ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import io
3
+ import requests
4
+ import numpy as np
5
+ import faiss
6
+ import gradio as gr
7
+ from groq import Groq
8
+ from sentence_transformers import SentenceTransformer
9
+ from PyPDF2 import PdfReader
10
+
11
+ # -----------------------
12
+ # CONFIG
13
+ # -----------------------
14
+ # Hugging Face Space will inject GROQ_API_KEY as a secret
15
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
16
+
17
+ if not GROQ_API_KEY:
18
+ raise ValueError("❌ Missing GROQ_API_KEY. Please set it in Hugging Face Space → Settings → Repository secrets.")
19
+
20
+ client = Groq(api_key=GROQ_API_KEY)
21
+ embedder = SentenceTransformer("all-MiniLM-L6-v2")
22
+
23
+ faiss_index = None
24
+ chunks = []
25
+ RELEVANCE_THRESHOLD = 0.28
26
+
27
+
28
+ # -----------------------
29
+ # HELPER FUNCTIONS
30
+ # -----------------------
31
+ def download_drive_file_bytes(drive_url: str) -> bytes:
32
+ """Download bytes from a Google Drive share link."""
33
+ file_id = drive_url.split("/d/")[1].split("/")[0]
34
+ download_url = f"https://drive.google.com/uc?export=download&id={file_id}"
35
+ r = requests.get(download_url, timeout=30)
36
+ r.raise_for_status()
37
+ return r.content
38
+
39
+
40
+ def pdf_bytes_to_text(pdf_bytes: bytes) -> str:
41
+ """Extract text from PDF bytes."""
42
+ try:
43
+ reader = PdfReader(io.BytesIO(pdf_bytes))
44
+ return "\n".join([page.extract_text() or "" for page in reader.pages])
45
+ except Exception:
46
+ return ""
47
+
48
+
49
+ def chunk_text(text, chunk_size=250, overlap=50):
50
+ words = text.split()
51
+ return [" ".join(words[i : i + chunk_size]) for i in range(0, len(words), chunk_size - overlap)]
52
+
53
+
54
+ def build_faiss_index_from_drive_links(drive_links):
55
+ global faiss_index, chunks
56
+ all_chunks = []
57
+ for link in drive_links:
58
+ try:
59
+ data = download_drive_file_bytes(link)
60
+ text = pdf_bytes_to_text(data)
61
+ if not text.strip():
62
+ try:
63
+ text = data.decode("utf-8")
64
+ except Exception:
65
+ text = ""
66
+ all_chunks.extend(chunk_text(text))
67
+ except Exception as e:
68
+ print(f"[Error] {link}: {e}")
69
+
70
+ if not all_chunks:
71
+ return "❌ No valid text found. Please check your Drive file links."
72
+
73
+ emb = embedder.encode(all_chunks, convert_to_numpy=True).astype("float32")
74
+ emb /= np.linalg.norm(emb, axis=1, keepdims=True)
75
+
76
+ faiss_index = faiss.IndexFlatIP(emb.shape[1])
77
+ faiss_index.add(emb)
78
+ chunks = all_chunks
79
+
80
+ return f"✅ Knowledge base ready! {len(chunks)} chunks indexed."
81
+
82
+
83
+ def retrieve_top_k(query, k=4):
84
+ if faiss_index is None:
85
+ return [], []
86
+ q_emb = embedder.encode([query], convert_to_numpy=True).astype("float32")
87
+ q_emb /= np.linalg.norm(q_emb, axis=1, keepdims=True)
88
+ D, I = faiss_index.search(q_emb, k)
89
+ retrieved = [chunks[i] for i in I[0]]
90
+ return retrieved, D[0]
91
+
92
+
93
+ def ask_groq_with_rag(query):
94
+ if not query.strip():
95
+ return "<div style='color:#b91c1c;'>Please type a question.</div>"
96
+
97
+ retrieved, scores = retrieve_top_k(query)
98
+ if not retrieved:
99
+ return "<div style='color:#b91c1c;'>Knowledge base not initialized.</div>"
100
+
101
+ if max(scores) < RELEVANCE_THRESHOLD:
102
+ return "<div style='border:2px solid #ef4444; border-radius:10px; padding:15px; background:#fff7f7;'><h3 style='color:#b91c1c;'>❌ Sorry, I don’t know — that’s not covered in my knowledge base.</h3></div>"
103
+
104
+ context = "\n\n---\n\n".join(retrieved)
105
+ prompt = f"""
106
+ You are a helpful assistant. Use ONLY the following context to answer.
107
+ If answer not found in context, say exactly:
108
+ "❌ Sorry, I don’t know — that’s not covered in my knowledge base."
109
+
110
+ Context:
111
+ {context}
112
+
113
+ Question:
114
+ {query}
115
+ """
116
+
117
+ try:
118
+ resp = client.chat.completions.create(
119
+ messages=[{"role": "user", "content": prompt}],
120
+ model="llama-3.3-70b-versatile",
121
+ )
122
+ ans = resp.choices[0].message.content.strip()
123
+ except Exception as e:
124
+ return f"<div style='color:#b91c1c;'>Error: {e}</div>"
125
+
126
+ return f"""
127
+ <div style="border-radius:12px; padding:16px; background:#faf5ff; border:1px solid #d8b4fe;">
128
+ <h3 style="color:#6b21a8;">💡 Answer from Knowledge Base</h3>
129
+ <div style="font-size:15px; color:#1f2937; line-height:1.45;">{ans}</div>
130
+ </div>
131
+ """
132
+
133
+
134
+ # -----------------------
135
+ # INITIALIZE KNOWLEDGE BASE
136
+ # -----------------------
137
+ DRIVE_LINKS = [
138
+ "https://drive.google.com/file/d/1gl_6EAvN5uzTUbir_ytOBUaSmr9pWKNF/view?usp=sharing"
139
+ ]
140
+
141
+ status_msg = build_faiss_index_from_drive_links(DRIVE_LINKS)
142
+
143
+ # -----------------------
144
+ # GRADIO UI (HCI-driven)
145
+ # -----------------------
146
+ css = """
147
+ body {
148
+ font-family: 'Inter', system-ui, sans-serif;
149
+ background: linear-gradient(135deg, #ede9fe, #fce7f3);
150
+ }
151
+ .gradio-container {max-width: 900px; margin: auto;}
152
+ h1 {text-align:center; color:#6d28d9;}
153
+ .status-box {
154
+ text-align:center;
155
+ font-size:16px;
156
+ color:#047857;
157
+ background:#ecfdf5;
158
+ border:1px solid #6ee7b7;
159
+ border-radius:8px;
160
+ padding:8px;
161
+ }
162
+ """
163
+
164
+ with gr.Blocks(css=css, title="RAG Knowledge Chatbot") as demo:
165
+ gr.Markdown("<h1>📚 RAG Knowledge Chatbot</h1>")
166
+ gr.Markdown("<p style='text-align:center;color:#7c3aed;'>Ask questions only from my internal knowledge base</p>")
167
+ gr.Markdown(f"<div class='status-box'>{status_msg}</div>")
168
+
169
+ with gr.Row():
170
+ with gr.Column(scale=2):
171
+ gr.Markdown("### 💭 Ask your question")
172
+ query = gr.Textbox(
173
+ placeholder="Type your question...",
174
+ lines=2,
175
+ show_label=False,
176
+ )
177
+ ask_btn = gr.Button("🚀 Ask", variant="primary")
178
+ answer_html = gr.HTML(
179
+ "<div style='font-size:15px;color:#374151;'>Answer will appear here...</div>"
180
+ )
181
+ ask_btn.click(ask_groq_with_rag, inputs=query, outputs=answer_html)
182
+ with gr.Column(scale=1):
183
+ gr.Markdown("### 📄 Knowledge Base Files")
184
+ for link in DRIVE_LINKS:
185
+ gr.Markdown(f"- <a href='{link}' target='_blank'>{link}</a>")
186
+
187
+ demo.launch()