Prakyath01 commited on
Commit
fd8c579
·
verified ·
1 Parent(s): 1267521

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +183 -95
app.py CHANGED
@@ -31,75 +31,88 @@ URLS = {
31
  "autoscaling": "https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/",
32
  }
33
 
 
 
 
34
  def scrape_page(name, url):
35
  try:
36
  r = requests.get(url, timeout=20)
 
37
  soup = BeautifulSoup(r.text, "html.parser")
38
  content = soup.find("div", class_="td-content")
39
  if not content:
40
  return None
41
  text = content.get_text(separator="\n").strip()
42
  return Document(page_content=text, metadata={"doc_id": name, "url": url})
43
- except:
 
44
  return None
45
 
 
46
  def build_or_load_kb():
47
  embedding_model = HuggingFaceEmbeddings(
48
  model_name="sentence-transformers/all-MiniLM-L6-v2"
49
  )
50
-
 
51
  if os.path.isdir(PERSIST_DIR):
 
52
  vectordb = Chroma(
53
  embedding_function=embedding_model,
54
- persist_directory=PERSIST_DIR
55
  )
56
- data = vectordb._collection.get(include=["documents", "metadatas"])
57
  chunks = [
58
  Document(page_content=doc, metadata=meta)
59
- for doc, meta in zip(data["documents"], data["metadatas"])
60
  ]
61
  return vectordb, chunks
62
 
 
 
63
  docs = []
64
  for name, url in URLS.items():
65
  d = scrape_page(name, url)
66
  if d:
67
  docs.append(d)
 
68
 
69
  splitter = RecursiveCharacterTextSplitter(chunk_size=900, chunk_overlap=200)
70
  chunks = splitter.split_documents(docs)
71
 
72
  vectordb = Chroma.from_documents(
73
- chunks,
74
- embedding_model,
75
- persist_directory=PERSIST_DIR
76
  )
77
  return vectordb, chunks
78
 
 
79
  vectordb, chunks = build_or_load_kb()
80
 
 
 
81
  bm25_corpus = [doc.page_content.split() for doc in chunks]
82
  bm25 = BM25Okapi(bm25_corpus)
83
-
84
  reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-12-v2")
85
 
86
  retriever = vectordb.as_retriever(
87
  search_type="similarity_score_threshold",
88
- search_kwargs={"k": 8, "score_threshold": 0.4},
89
  )
90
 
91
- def hybrid_search(query):
92
- vresults = retriever.invoke(query)
93
- tokens = query.lower().split()
94
- bm_scores = bm25.get_scores(tokens)
95
- bm_ranked = sorted(zip(bm_scores, chunks), key=lambda x: x[0], reverse=True)
96
- bmresults = [d for _, d in bm_ranked[:5]]
97
 
98
- combined = vresults + bmresults
 
 
 
 
 
 
 
 
99
  seen = set()
100
  unique = []
101
  for d in combined:
102
- key = (d.metadata.get("doc_id"), d.page_content[:50])
103
  if key not in seen:
104
  seen.add(key)
105
  unique.append(d)
@@ -107,124 +120,199 @@ def hybrid_search(query):
107
  if not unique:
108
  return []
109
 
110
- pairs = [(query, d.page_content) for d in unique]
111
  scores = reranker.predict(pairs)
112
- ranked = sorted(zip(scores, unique), key=lambda x: x[0], reverse=True)[:5]
 
 
 
 
113
 
114
- for s, d in ranked:
115
- d.metadata["rerank_score"] = float(s)
116
 
117
- return [d for _, d in ranked]
118
 
119
- def call_llm(prompt):
120
  api_key = os.getenv("OPENROUTER_API_KEY")
121
  if not api_key:
122
- return "⚠️ Missing OPENROUTER_API_KEY environment variable.\nGroundedness: 0%"
123
 
124
  try:
125
- res = requests.post(
126
  "https://openrouter.ai/api/v1/chat/completions",
127
  headers={
128
  "Authorization": f"Bearer {api_key}",
129
  "HTTP-Referer": "https://huggingface.co/",
130
- "X-Title": "Kubernetes RAG Assistant"
131
  },
132
  json={
133
  "model": "meta-llama/llama-3.1-8b-instruct",
134
  "messages": [{"role": "user", "content": prompt}],
135
- "max_tokens": 300,
136
- "temperature": 0.2
137
- }
 
138
  )
139
- res.raise_for_status()
140
- return res.json()["choices"][0]["message"]["content"]
 
141
  except Exception as e:
142
- return f"⚠️ LLM Error: {e}\nGroundedness: 0%"
 
 
 
 
143
 
144
- def build_context(query):
145
  docs = hybrid_search(query)
146
  if not docs:
147
  return "", [], []
148
- ctx, srcs, scores = "", [], []
 
149
  for i, d in enumerate(docs, start=1):
150
- ctx += f"[{i}] {d.page_content[:900]}\nSource: {d.metadata['url']}\n\n"
151
- srcs.append(f"[{i}] {d.metadata['url']}")
 
152
  scores.append(d.metadata["rerank_score"])
153
- return ctx, srcs, scores
 
 
 
 
 
 
 
 
 
 
154
 
155
  def init_metrics():
156
- return {"q":[], "lat":[], "tok":[], "g":[], "cit":[], "r":[], "type":[]}
 
 
 
 
 
 
 
157
 
158
- def classify(q):
159
- q=q.lower()
160
- return "how-to" if "how" in q else ("debug" if "error" in q else "general")
 
161
 
162
- def answer(q, history, metrics):
163
- if metrics is None: metrics = init_metrics()
164
- start = time.time()
165
 
166
- ctx, srcs, scores = build_context(q)
167
  if not ctx:
168
- txt = "Not in docs.\nGroundedness: 0%"
169
- history.append((q, txt))
170
- return history,"",metrics
171
-
172
- prompt = f"""Use context ONLY. Cite every sentence as [n].
173
- User question: {q}
 
 
 
174
 
175
  Context:
176
  {ctx}
177
- Groundedness MUST appear as: Groundedness: XX%"""
178
- txt = call_llm(prompt)
179
 
 
 
 
180
  latency = time.time() - start
181
- grounded = int(re.search(r"Groundedness:\s*(\d+)%", txt).group(1)) if "Groundedness" in txt else 0
182
- tokens = len(txt.split())
183
- cites = len(set(re.findall(r"\[(\d+)\]", txt)))
184
- avg = sum(scores)/len(scores)
185
-
186
- final = txt+"\n\nSources:\n"+"\n".join(srcs)
187
- history.append((q, final))
188
-
189
- metrics["q"].append(q)
190
- metrics["lat"].append(latency)
191
- metrics["tok"].append(tokens)
192
- metrics["g"].append(grounded)
193
- metrics["cit"].append(cites)
194
- metrics["r"].append(avg)
195
- metrics["type"].append(classify(q))
196
-
197
- return history,"",metrics
198
-
199
- def render(metrics):
200
- if len(metrics["q"])==0: return [],0,0,0
201
- rows=[[
202
- i+1, metrics["q"][i], round(metrics["lat"][i],3),
203
- metrics["tok"][i], metrics["g"][i],
204
- round(metrics["r"][i],2), metrics["cit"][i], metrics["type"][i]
205
- ] for i in range(len(metrics["q"]))]
206
- avgL=sum(metrics["g"])/len(metrics["g"])
207
- avgG=sum(metrics["lat"])/len(metrics["lat"])
208
- avgT=sum(metrics["tok"])/len(metrics["tok"])
209
- return rows,avgL,avgG,avgT
210
-
211
- metrics_state = gr.State(init_metrics())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
 
213
  with gr.Blocks(title="Kubernetes RAG Assistant") as app:
214
  gr.Markdown("# ☸ Kubernetes RAG Assistant")
 
215
  with gr.Tab("Chat"):
216
- chat = gr.Chatbot()
217
- inp = gr.Textbox(label="Ask anything about Kubernetes")
218
- clear= gr.Button("Reset")
219
- inp.submit(answer,[inp,chat,metrics_state],[chat,inp,metrics_state])
220
- clear.click(lambda: ([], "", init_metrics()), None, [chat,inp,metrics_state])
 
221
 
222
  with gr.Tab("Analytics"):
223
- table = gr.DataFrame(headers=["ID","Query","Latency","Tokens","Grounded","Relevance","Citations","Type"])
224
- avgL = gr.Number(label="Avg Groundedness")
225
- avgG = gr.Number(label="Avg Latency")
226
- avgT = gr.Number(label="Avg Tokens")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  refresh = gr.Button("Update Dashboard")
228
- refresh.click(render,[metrics_state],[table,avgL,avgG,avgT])
229
 
230
  app.launch()
 
31
  "autoscaling": "https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/",
32
  }
33
 
34
+
35
+ # ----------------- SCRAPING + KB ----------------- #
36
+
37
  def scrape_page(name, url):
38
  try:
39
  r = requests.get(url, timeout=20)
40
+ r.raise_for_status()
41
  soup = BeautifulSoup(r.text, "html.parser")
42
  content = soup.find("div", class_="td-content")
43
  if not content:
44
  return None
45
  text = content.get_text(separator="\n").strip()
46
  return Document(page_content=text, metadata={"doc_id": name, "url": url})
47
+ except Exception as e:
48
+ print(f"[ERROR] scraping {url}: {e}")
49
  return None
50
 
51
+
52
  def build_or_load_kb():
53
  embedding_model = HuggingFaceEmbeddings(
54
  model_name="sentence-transformers/all-MiniLM-L6-v2"
55
  )
56
+
57
+ # If DB exists, load it
58
  if os.path.isdir(PERSIST_DIR):
59
+ print("[INFO] Loading existing Chroma DB")
60
  vectordb = Chroma(
61
  embedding_function=embedding_model,
62
+ persist_directory=PERSIST_DIR,
63
  )
64
+ raw = vectordb._collection.get(include=["documents", "metadatas"])
65
  chunks = [
66
  Document(page_content=doc, metadata=meta)
67
+ for doc, meta in zip(raw["documents"], raw["metadatas"])
68
  ]
69
  return vectordb, chunks
70
 
71
+ # Else: scrape + build
72
+ print("[INFO] No DB found, scraping docs...")
73
  docs = []
74
  for name, url in URLS.items():
75
  d = scrape_page(name, url)
76
  if d:
77
  docs.append(d)
78
+ print(f"[INFO] Scraped {len(docs)} docs")
79
 
80
  splitter = RecursiveCharacterTextSplitter(chunk_size=900, chunk_overlap=200)
81
  chunks = splitter.split_documents(docs)
82
 
83
  vectordb = Chroma.from_documents(
84
+ chunks, embedding_model, persist_directory=PERSIST_DIR
 
 
85
  )
86
  return vectordb, chunks
87
 
88
+
89
  vectordb, chunks = build_or_load_kb()
90
 
91
+ # ----------------- HYBRID SEARCH ----------------- #
92
+
93
  bm25_corpus = [doc.page_content.split() for doc in chunks]
94
  bm25 = BM25Okapi(bm25_corpus)
 
95
  reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-12-v2")
96
 
97
  retriever = vectordb.as_retriever(
98
  search_type="similarity_score_threshold",
99
+ search_kwargs={"k": 8, "score_threshold": 0.35},
100
  )
101
 
 
 
 
 
 
 
102
 
103
+ def hybrid_search(query, top_k=5):
104
+ vector_results = retriever.invoke(query)
105
+
106
+ tokenized_query = query.lower().split()
107
+ bm25_scores = bm25.get_scores(tokenized_query)
108
+ bm25_ranked = sorted(zip(bm25_scores, chunks), key=lambda x: x[0], reverse=True)
109
+ bm25_results = [d for _, d in bm25_ranked[:top_k]]
110
+
111
+ combined = vector_results + bm25_results
112
  seen = set()
113
  unique = []
114
  for d in combined:
115
+ key = (d.metadata.get("doc_id"), d.page_content[:80])
116
  if key not in seen:
117
  seen.add(key)
118
  unique.append(d)
 
120
  if not unique:
121
  return []
122
 
123
+ pairs = [(query, doc.page_content) for doc in unique]
124
  scores = reranker.predict(pairs)
125
+ ranked = sorted(zip(scores, unique), key=lambda x: x[0], reverse=True)[:top_k]
126
+
127
+ for s, doc in ranked:
128
+ doc.metadata["rerank_score"] = float(s)
129
+ return [doc for _, doc in ranked]
130
 
 
 
131
 
132
+ # ----------------- LLM CALL ----------------- #
133
 
134
+ def call_llm(prompt: str) -> str:
135
  api_key = os.getenv("OPENROUTER_API_KEY")
136
  if not api_key:
137
+ return "⚠️ Missing OPENROUTER_API_KEY in Space secrets.\nGroundedness: 0%"
138
 
139
  try:
140
+ r = requests.post(
141
  "https://openrouter.ai/api/v1/chat/completions",
142
  headers={
143
  "Authorization": f"Bearer {api_key}",
144
  "HTTP-Referer": "https://huggingface.co/",
145
+ "X-Title": "Kubernetes RAG Assistant",
146
  },
147
  json={
148
  "model": "meta-llama/llama-3.1-8b-instruct",
149
  "messages": [{"role": "user", "content": prompt}],
150
+ "max_tokens": 400,
151
+ "temperature": 0.0,
152
+ },
153
+ timeout=60,
154
  )
155
+ r.raise_for_status()
156
+ data = r.json()
157
+ return data["choices"][0]["message"]["content"]
158
  except Exception as e:
159
+ print("[ERROR] LLM:", e)
160
+ return f"⚠️ LLM error: {e}\nGroundedness: 0%"
161
+
162
+
163
+ # ----------------- CONTEXT + METRICS ----------------- #
164
 
165
+ def build_context(query: str):
166
  docs = hybrid_search(query)
167
  if not docs:
168
  return "", [], []
169
+
170
+ context, sources, scores = "", [], []
171
  for i, d in enumerate(docs, start=1):
172
+ label = f"[{i}]"
173
+ context += f"{label} {d.page_content[:900]}\nSource: {d.metadata['url']}\n\n"
174
+ sources.append(f"{label} → {d.metadata['url']}")
175
  scores.append(d.metadata["rerank_score"])
176
+ return context, sources, scores
177
+
178
+
179
+ def classify_query(q: str) -> str:
180
+ q = q.lower()
181
+ if "how" in q:
182
+ return "how-to"
183
+ if "error" in q or "fail" in q:
184
+ return "debug"
185
+ return "general"
186
+
187
 
188
  def init_metrics():
189
+ return {"q": [], "lat": [], "tok": [], "g": [], "r": [], "c": [], "t": []}
190
+
191
+
192
+ # global analytics, no gr.State
193
+ METRICS = init_metrics()
194
+
195
+
196
+ # ----------------- CHAT HANDLER ----------------- #
197
 
198
+ def answer_question(query, history):
199
+ global METRICS
200
+ if METRICS is None:
201
+ METRICS = init_metrics()
202
 
203
+ start = time.time()
204
+ ctx, sources, scores = build_context(query)
 
205
 
 
206
  if not ctx:
207
+ reply = "Not in docs or insufficient context.\nGroundedness: 0%"
208
+ history.append((query, reply))
209
+ return history, ""
210
+
211
+ prompt = f"""
212
+ Use ONLY the context below to answer.
213
+ Every sentence MUST end with a citation like [1].
214
+
215
+ Question: {query}
216
 
217
  Context:
218
  {ctx}
 
 
219
 
220
+ At the end add a line: Groundedness: XX%
221
+ """
222
+ answer = call_llm(prompt)
223
  latency = time.time() - start
224
+
225
+ # robust groundedness parsing
226
+ grounded = 0
227
+ m = re.search(r"Groundedness:\s*(\d+)%", answer)
228
+ if m:
229
+ try:
230
+ grounded = int(m.group(1))
231
+ except ValueError:
232
+ grounded = 0
233
+
234
+ cites = len(set(re.findall(r"\[(\d+)\]", answer)))
235
+ avg_score = sum(scores) / len(scores) if scores else 0.0
236
+ tokens = len(answer.split()) + len(prompt.split())
237
+
238
+ alert = ""
239
+ if grounded < 70 or cites == 0:
240
+ alert = "⚠️ Low support from docs; please verify in official Kubernetes docs.\n\n"
241
+
242
+ final = alert + answer + "\n\n---\nSources:\n" + "\n".join(sources)
243
+
244
+ history.append((query, final))
245
+
246
+ METRICS["q"].append(query)
247
+ METRICS["lat"].append(latency)
248
+ METRICS["tok"].append(tokens)
249
+ METRICS["g"].append(grounded)
250
+ METRICS["r"].append(avg_score)
251
+ METRICS["c"].append(cites)
252
+ METRICS["t"].append(classify_query(query))
253
+
254
+ return history, ""
255
+
256
+
257
+ # ----------------- ANALYTICS HELPERS ----------------- #
258
+
259
+ def render_metrics():
260
+ if len(METRICS["q"]) == 0:
261
+ return [], 0.0, 0.0, 0.0
262
+
263
+ rows = []
264
+ for i, q in enumerate(METRICS["q"]):
265
+ rows.append([
266
+ i + 1,
267
+ q,
268
+ round(METRICS["lat"][i], 3),
269
+ METRICS["tok"][i],
270
+ METRICS["g"][i],
271
+ round(METRICS["r"][i], 3),
272
+ METRICS["c"][i],
273
+ METRICS["t"][i],
274
+ ])
275
+
276
+ avg_ground = sum(METRICS["g"]) / len(METRICS["g"])
277
+ avg_lat = sum(METRICS["lat"]) / len(METRICS["lat"])
278
+ avg_tok = sum(METRICS["tok"]) / len(METRICS["tok"])
279
+
280
+ return rows, avg_ground, avg_lat, avg_tok
281
+
282
+
283
+ # ----------------- GRADIO UI ----------------- #
284
 
285
  with gr.Blocks(title="Kubernetes RAG Assistant") as app:
286
  gr.Markdown("# ☸ Kubernetes RAG Assistant")
287
+
288
  with gr.Tab("Chat"):
289
+ chat = gr.Chatbot(height=450)
290
+ inp = gr.Textbox(label="Ask anything about Kubernetes")
291
+ clear_btn = gr.Button("Reset Conversation")
292
+
293
+ inp.submit(answer_question, [inp, chat], [chat, inp])
294
+ clear_btn.click(lambda: ([], ""), None, [chat, inp])
295
 
296
  with gr.Tab("Analytics"):
297
+ gr.Markdown("### 📊 Query Analytics (this session)")
298
+ table = gr.DataFrame(
299
+ headers=[
300
+ "ID",
301
+ "Query",
302
+ "Latency (s)",
303
+ "Tokens",
304
+ "Groundedness (%)",
305
+ "Avg Rerank Score",
306
+ "Citations",
307
+ "Type",
308
+ ],
309
+ interactive=False,
310
+ )
311
+ avgG = gr.Number(label="Avg Groundedness (%)")
312
+ avgL = gr.Number(label="Avg Latency (s)")
313
+ avgT = gr.Number(label="Avg Tokens")
314
+
315
  refresh = gr.Button("Update Dashboard")
316
+ refresh.click(render_metrics, None, [table, avgG, avgL, avgT])
317
 
318
  app.launch()