Prakyath01 commited on
Commit
3683320
Β·
verified Β·
1 Parent(s): bcff74f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -58
app.py CHANGED
@@ -33,18 +33,16 @@ URLS = {
33
  }
34
 
35
 
36
- # ================= Knowledge Base ================= #
37
 
38
  def scrape_page(name, url):
39
  try:
40
- response = requests.get(url, timeout=20)
41
- response.raise_for_status()
42
- soup = BeautifulSoup(response.text, "html.parser")
43
  content = soup.find("div", class_="td-content")
44
-
45
  if not content:
46
  return None
47
-
48
  text = content.get_text(separator="\n").strip()
49
  return Document(page_content=text, metadata={"doc_id": name, "url": url})
50
  except Exception as e:
@@ -53,13 +51,12 @@ def scrape_page(name, url):
53
 
54
 
55
  def build_or_load_kb():
56
- print("[INFO] Loading embedding model...")
57
  embedding_model = HuggingFaceEmbeddings(
58
  model_name="sentence-transformers/all-MiniLM-L6-v2"
59
  )
60
 
61
  if os.path.isdir(PERSIST_DIR):
62
- print("[INFO] Loading existing vector DB...")
63
  vectordb = Chroma(
64
  embedding_function=embedding_model,
65
  persist_directory=PERSIST_DIR,
@@ -74,9 +71,9 @@ def build_or_load_kb():
74
  print("[INFO] No DB found β€” scraping docs...")
75
  docs = []
76
  for name, url in URLS.items():
77
- doc = scrape_page(name, url)
78
- if doc:
79
- docs.append(doc)
80
  print(f"[INFO] Scraped {len(docs)} docs")
81
 
82
  splitter = RecursiveCharacterTextSplitter(chunk_size=900, chunk_overlap=200)
@@ -84,16 +81,12 @@ def build_or_load_kb():
84
 
85
  vectordb = Chroma.from_documents(chunks, embedding_model, persist_directory=PERSIST_DIR)
86
  vectordb.persist()
87
-
88
- print("[INFO] Vector DB built & saved.")
89
  return vectordb, chunks
90
 
91
 
92
  vectordb, chunks = build_or_load_kb()
93
 
94
-
95
- # ================= Search & Reranker ================= #
96
-
97
  bm25 = BM25Okapi([c.page_content.split() for c in chunks])
98
  reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-12-v2")
99
 
@@ -113,7 +106,7 @@ def hybrid_search(query, top_k=5):
113
  unique_docs = []
114
  seen = set()
115
  for doc in vector_results + bm_results:
116
- key = (doc.metadata.get("doc_id"), doc.page_content[:50])
117
  if key not in seen:
118
  seen.add(key)
119
  unique_docs.append(doc)
@@ -121,22 +114,22 @@ def hybrid_search(query, top_k=5):
121
  if not unique_docs:
122
  return []
123
 
124
- rerank_pairs = [(query, doc.page_content) for doc in unique_docs]
125
- scores = reranker.predict(rerank_pairs)
126
  ranked = sorted(zip(scores, unique_docs), reverse=True)[:top_k]
127
 
128
- for s, doc in ranked:
129
- doc.metadata["rerank_score"] = float(s)
130
 
131
- return [doc for _, doc in ranked]
132
 
133
 
134
- # ================= LLM ================= #
135
 
136
  def call_llm(prompt):
137
  api_key = os.getenv("OPENROUTER_API_KEY")
138
  if not api_key:
139
- return "⚠️ Missing API key.\nGroundedness: 0%"
140
 
141
  try:
142
  res = requests.post(
@@ -149,33 +142,29 @@ def call_llm(prompt):
149
  json={
150
  "model": "meta-llama/llama-3.1-8b-instruct",
151
  "messages": [{"role": "user", "content": prompt}],
152
- "temperature": 0.0,
153
  "max_tokens": 400,
 
154
  },
 
155
  )
156
  res.raise_for_status()
157
- data = res.json()
158
- return data["choices"][0]["message"]["content"]
159
  except Exception as e:
160
- return f"⚠️ LLM Error: {e}\nGroundedness: 0%"
161
 
162
 
163
- # ================= Analytics ================= #
 
 
 
164
 
165
  def classify_query(q):
166
  q = q.lower()
167
- if "how" in q:
168
- return "how-to"
169
- if "error" in q or "fail" in q:
170
- return "debug"
171
  return "general"
172
 
173
 
174
- METRICS = {"q": [], "lat": [], "tok": [], "g": [], "r": [], "c": [], "t": []}
175
-
176
-
177
- # ================= Chat Handler ================= #
178
-
179
  def answer_question(query, history):
180
  start = time.time()
181
  docs = hybrid_search(query)
@@ -184,21 +173,20 @@ def answer_question(query, history):
184
  reply = "Not found in docs.\nGroundedness: 0%"
185
  return history + [
186
  {"role": "user", "content": query},
187
- {"role": "assistant", "content": reply}
188
  ], ""
189
 
 
190
  ctx = ""
191
  sources = []
192
- scores = []
193
  for i, d in enumerate(docs, 1):
194
- label = f"[{i}]"
195
- ctx += f"{label} {d.page_content[:900]}\nSource: {d.metadata['url']}\n\n"
196
- sources.append(f"{label} β†’ {d.metadata['url']}")
197
  scores.append(d.metadata["rerank_score"])
198
 
199
  prompt = f"""
200
- Answer the question ONLY using the context below.
201
- Each sentence MUST end with a citation like [1].
202
 
203
  Question: {query}
204
 
@@ -214,13 +202,14 @@ End with: Groundedness: XX%
214
  grounded = 0
215
  m = re.search(r"Groundedness:\s*(\d+)%", answer)
216
  if m:
217
- grounded = int(m.group(1"))
218
 
219
  cites = len(set(re.findall(r"\[(\d+)\]", answer)))
220
- avg_score = sum(scores) / len(scores) if scores else 0
221
 
222
  final = answer + "\n\n---\nSources:\n" + "\n".join(sources)
223
 
 
224
  METRICS["q"].append(query)
225
  METRICS["lat"].append(latency)
226
  METRICS["tok"].append(len(answer.split()))
@@ -236,7 +225,7 @@ End with: Groundedness: XX%
236
 
237
  def update_dashboard():
238
  rows = list(zip(
239
- range(1, len(METRICS["q"])+1),
240
  METRICS["q"],
241
  METRICS["lat"],
242
  METRICS["tok"],
@@ -253,28 +242,32 @@ def update_dashboard():
253
  return rows, avgG, avgL, avgT
254
 
255
 
256
- # ================= UI ================= #
257
 
258
  with gr.Blocks(title="Kubernetes RAG Assistant") as app:
259
  gr.Markdown("# ☸ Kubernetes RAG Assistant")
260
 
261
  with gr.Tab("Chat"):
262
  chat = gr.Chatbot(height=450)
263
- user_in = gr.Textbox(label="Ask anything about Kubernetes")
264
- reset = gr.Button("Reset")
265
 
266
  user_in.submit(answer_question, [user_in, chat], [chat, user_in])
267
- reset.click(lambda: ([], ""), None, [chat, user_in])
268
 
269
  with gr.Tab("Analytics"):
270
- gr.Markdown("### πŸ“Š Analytics This Session")
271
- table = gr.DataFrame(headers=[
272
- "ID","Query","Latency","Tokens","Grounded","Rerank","Citations","Type"
273
- ], interactive=False)
 
 
 
 
274
  avgG = gr.Number(label="Avg Groundedness")
275
  avgL = gr.Number(label="Avg Latency")
276
  avgT = gr.Number(label="Avg Tokens")
277
- refresh = gr.Button("Refresh")
278
- refresh.click(update_dashboard, None, [table, avgG, avgL, avgT])
279
 
280
  app.launch()
 
33
  }
34
 
35
 
36
+ # ------------------ Knowledge Base ------------------ #
37
 
38
  def scrape_page(name, url):
39
  try:
40
+ r = requests.get(url, timeout=20)
41
+ r.raise_for_status()
42
+ soup = BeautifulSoup(r.text, "html.parser")
43
  content = soup.find("div", class_="td-content")
 
44
  if not content:
45
  return None
 
46
  text = content.get_text(separator="\n").strip()
47
  return Document(page_content=text, metadata={"doc_id": name, "url": url})
48
  except Exception as e:
 
51
 
52
 
53
  def build_or_load_kb():
 
54
  embedding_model = HuggingFaceEmbeddings(
55
  model_name="sentence-transformers/all-MiniLM-L6-v2"
56
  )
57
 
58
  if os.path.isdir(PERSIST_DIR):
59
+ print("[INFO] Loading existing DB...")
60
  vectordb = Chroma(
61
  embedding_function=embedding_model,
62
  persist_directory=PERSIST_DIR,
 
71
  print("[INFO] No DB found β€” scraping docs...")
72
  docs = []
73
  for name, url in URLS.items():
74
+ d = scrape_page(name, url)
75
+ if d:
76
+ docs.append(d)
77
  print(f"[INFO] Scraped {len(docs)} docs")
78
 
79
  splitter = RecursiveCharacterTextSplitter(chunk_size=900, chunk_overlap=200)
 
81
 
82
  vectordb = Chroma.from_documents(chunks, embedding_model, persist_directory=PERSIST_DIR)
83
  vectordb.persist()
84
+ print("[INFO] DB created.")
 
85
  return vectordb, chunks
86
 
87
 
88
  vectordb, chunks = build_or_load_kb()
89
 
 
 
 
90
  bm25 = BM25Okapi([c.page_content.split() for c in chunks])
91
  reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-12-v2")
92
 
 
106
  unique_docs = []
107
  seen = set()
108
  for doc in vector_results + bm_results:
109
+ key = (doc.metadata.get("doc_id"), doc.page_content[:60])
110
  if key not in seen:
111
  seen.add(key)
112
  unique_docs.append(doc)
 
114
  if not unique_docs:
115
  return []
116
 
117
+ pairs = [(query, doc.page_content) for doc in unique_docs]
118
+ scores = reranker.predict(pairs)
119
  ranked = sorted(zip(scores, unique_docs), reverse=True)[:top_k]
120
 
121
+ for score, doc in ranked:
122
+ doc.metadata["rerank_score"] = float(score)
123
 
124
+ return [doc for score, doc in ranked]
125
 
126
 
127
+ # ------------------ LLM ------------------ #
128
 
129
  def call_llm(prompt):
130
  api_key = os.getenv("OPENROUTER_API_KEY")
131
  if not api_key:
132
+ return "⚠ Missing OPENROUTER_API_KEY\nGroundedness: 0%"
133
 
134
  try:
135
  res = requests.post(
 
142
  json={
143
  "model": "meta-llama/llama-3.1-8b-instruct",
144
  "messages": [{"role": "user", "content": prompt}],
 
145
  "max_tokens": 400,
146
+ "temperature": 0.0,
147
  },
148
+ timeout=60
149
  )
150
  res.raise_for_status()
151
+ return res.json()["choices"][0]["message"]["content"]
 
152
  except Exception as e:
153
+ return f"⚠ LLM error: {e}\nGroundedness: 0%"
154
 
155
 
156
+ # ------------------ Chat + Metrics ------------------ #
157
+
158
+ METRICS = {"q": [], "lat": [], "tok": [], "g": [], "r": [], "c": [], "t": []}
159
+
160
 
161
  def classify_query(q):
162
  q = q.lower()
163
+ if "how" in q: return "how-to"
164
+ if "error" in q or "fail" in q: return "debug"
 
 
165
  return "general"
166
 
167
 
 
 
 
 
 
168
  def answer_question(query, history):
169
  start = time.time()
170
  docs = hybrid_search(query)
 
173
  reply = "Not found in docs.\nGroundedness: 0%"
174
  return history + [
175
  {"role": "user", "content": query},
176
+ {"role": "assistant", "content": reply},
177
  ], ""
178
 
179
+ scores = []
180
  ctx = ""
181
  sources = []
 
182
  for i, d in enumerate(docs, 1):
183
+ ctx += f"[{i}] {d.page_content[:900]}\nSource: {d.metadata['url']}\n\n"
184
+ sources.append(f"[{i}] β†’ {d.metadata['url']}")
 
185
  scores.append(d.metadata["rerank_score"])
186
 
187
  prompt = f"""
188
+ Answer using ONLY the context below.
189
+ Each sentence MUST include citation like [1].
190
 
191
  Question: {query}
192
 
 
202
  grounded = 0
203
  m = re.search(r"Groundedness:\s*(\d+)%", answer)
204
  if m:
205
+ grounded = int(m.group(1))
206
 
207
  cites = len(set(re.findall(r"\[(\d+)\]", answer)))
208
+ avg_score = sum(scores) / len(scores)
209
 
210
  final = answer + "\n\n---\nSources:\n" + "\n".join(sources)
211
 
212
+ # Log metrics correctly
213
  METRICS["q"].append(query)
214
  METRICS["lat"].append(latency)
215
  METRICS["tok"].append(len(answer.split()))
 
225
 
226
  def update_dashboard():
227
  rows = list(zip(
228
+ range(1, len(METRICS["q"]) + 1),
229
  METRICS["q"],
230
  METRICS["lat"],
231
  METRICS["tok"],
 
242
  return rows, avgG, avgL, avgT
243
 
244
 
245
+ # ------------------ UI ------------------ #
246
 
247
  with gr.Blocks(title="Kubernetes RAG Assistant") as app:
248
  gr.Markdown("# ☸ Kubernetes RAG Assistant")
249
 
250
  with gr.Tab("Chat"):
251
  chat = gr.Chatbot(height=450)
252
+ user_in = gr.Textbox(label="Ask about Kubernetes")
253
+ clear = gr.Button("Clear")
254
 
255
  user_in.submit(answer_question, [user_in, chat], [chat, user_in])
256
+ clear.click(lambda: ([], ""), None, [chat, user_in])
257
 
258
  with gr.Tab("Analytics"):
259
+ gr.Markdown("### πŸ“Š Query Analytics")
260
+ table = gr.DataFrame(
261
+ headers=[
262
+ "ID", "Query", "Latency", "Tokens",
263
+ "Groundedness", "Rerank Score", "Citations", "Type",
264
+ ],
265
+ interactive=False
266
+ )
267
  avgG = gr.Number(label="Avg Groundedness")
268
  avgL = gr.Number(label="Avg Latency")
269
  avgT = gr.Number(label="Avg Tokens")
270
+ update = gr.Button("Refresh Dashboard")
271
+ update.click(update_dashboard, None, [table, avgG, avgL, avgT])
272
 
273
  app.launch()