Prakyath01 commited on
Commit
176a09c
·
verified ·
1 Parent(s): f7f504f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +143 -89
app.py CHANGED
@@ -1,10 +1,18 @@
1
  import os
 
2
  import requests
3
  import gradio as gr
 
 
4
 
5
- # ---------------- RAG DOCUMENT SETUP ---------------- #
 
 
 
6
 
7
- K8S_DOC_URLS = {
 
 
8
  "pods": "https://kubernetes.io/docs/concepts/workloads/pods/",
9
  "deployments": "https://kubernetes.io/docs/concepts/workloads/controllers/deployment/",
10
  "services": "https://kubernetes.io/docs/concepts/services-networking/service/",
@@ -14,116 +22,162 @@ K8S_DOC_URLS = {
14
  "rbac": "https://kubernetes.io/docs/reference/access-authn-authz/rbac/",
15
  "persistent-volumes": "https://kubernetes.io/docs/concepts/storage/persistent-volumes/",
16
  "ingress": "https://kubernetes.io/docs/concepts/services-networking/ingress/",
17
- "autoscaling": "https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/"
18
  }
19
 
20
- def fetch_doc(url):
21
  try:
22
- response = requests.get(url, timeout=10)
23
- if response.status_code == 200:
24
- return response.text
25
- except:
26
- return ""
27
- return ""
28
-
29
- DOCUMENTS = [
30
- {"doc": name, "url": url, "text": fetch_doc(url)}
31
- for name, url in K8S_DOC_URLS.items()
32
- ]
33
-
34
- def search_docs(query, top_k=3):
35
- query = query.lower()
36
- matches = []
37
- for doc in DOCUMENTS:
38
- text = doc["text"].lower()
39
- if query in text:
40
- snippet_start = text.index(query)
41
- snippet_end = snippet_start + 350
42
- snippet = doc["text"][snippet_start:snippet_end].replace("\n", " ")
43
- matches.append((snippet, doc["url"], doc["doc"]))
44
- return matches[:top_k]
45
-
46
-
47
- # --------------- LLM CALL (OpenRouter) ---------------- #
48
-
49
- def call_llm(prompt):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  url = "https://openrouter.ai/api/v1/chat/completions"
51
  headers = {
52
- "Authorization": f"Bearer {os.getenv('OPENROUTER_API_KEY')}",
53
  "HTTP-Referer": "https://huggingface.co/",
54
  "X-Title": "Kubernetes RAG Assistant"
55
  }
56
-
57
- data = {
58
  "model": "meta-llama/llama-3.1-8b-instruct",
59
- "messages": [{"role": "user", "content": prompt}],
60
- "max_tokens": 350
 
 
 
 
61
  }
62
-
63
- res = requests.post(url, json=data, headers=headers)
64
- out = res.json()
65
-
66
- if "choices" in out:
67
- return out["choices"][0]["message"]["content"]
68
- print("DEBUG LLM Error:", out)
69
- return "⚠ Model error. Try again."
70
-
71
-
72
- # ----------- RAG + Prompt Construction ---------------- #
73
-
74
- def build_answer(query):
75
- results = search_docs(query)
76
- context = ""
77
- citations = []
78
-
79
- for i, (snippet, url, doc) in enumerate(results, start=1):
80
- label = f"[{i}]"
81
- context += f"{label}: {snippet}\n\n"
82
- citations.append(f"{label} → {url}")
83
-
84
- prompt = f"""
85
- Use the context below to answer the question clearly.
86
- Add citations like [1], [2] at the end of sentences.
87
-
88
- Context:
89
- {context}
90
-
91
- Question: {query}
92
- """
93
-
94
  answer = call_llm(prompt)
95
- citations_text = "\n".join(citations) or "No sources found."
96
-
97
- return answer, citations_text
98
 
99
-
100
- # ---------------------- UI --------------------------- #
101
 
102
  custom_css = """
103
  .source-box {
104
  font-size: 14px;
105
- background: #1b2733;
106
  padding: 10px;
107
  border-radius: 8px;
108
- color: #c9e2ff;
109
- border: 1px solid #4a90e2;
110
  }
111
  """
112
 
113
- with gr.Blocks(css=custom_css, theme="soft") as app:
114
-
115
- gr.HTML("""
116
- <h1 style='color:#326ce5; text-align:center;'>☸️ Kubernetes RAG Assistant</h1>
117
- <p style='text-align:center; font-size:17px; color:#ddd;'>Ask any Kubernetes question and get answers with docs citations 📌</p>
118
- """)
 
119
 
120
- question = gr.Textbox(label="Ask a Kubernetes Question:", placeholder="e.g., What is RBAC in Kubernetes?")
 
 
 
 
 
121
 
122
- answer = gr.Markdown(label="Answer")
123
- sources = gr.Markdown(label="Sources", elem_classes=["source-box"])
 
124
 
125
- submit = gr.Button("Ask ☸️")
 
126
 
127
- submit.click(build_answer, inputs=question, outputs=[answer, sources])
 
128
 
129
- app.launch()
 
1
  import os
2
+ import json
3
  import requests
4
  import gradio as gr
5
+ from bs4 import BeautifulSoup
6
+ from textwrap import shorten
7
 
8
+ from langchain_core.documents import Document
9
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
10
+ from langchain_community.embeddings import HuggingFaceEmbeddings
11
+ from langchain_community.vectorstores import Chroma
12
 
13
+ # ------------------ 1. SCRAPE K8S DOCS ------------------ #
14
+
15
+ URLS = {
16
  "pods": "https://kubernetes.io/docs/concepts/workloads/pods/",
17
  "deployments": "https://kubernetes.io/docs/concepts/workloads/controllers/deployment/",
18
  "services": "https://kubernetes.io/docs/concepts/services-networking/service/",
 
22
  "rbac": "https://kubernetes.io/docs/reference/access-authn-authz/rbac/",
23
  "persistent-volumes": "https://kubernetes.io/docs/concepts/storage/persistent-volumes/",
24
  "ingress": "https://kubernetes.io/docs/concepts/services-networking/ingress/",
25
+ "autoscaling": "https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/",
26
  }
27
 
28
+ def scrape_page(name, url):
29
  try:
30
+ r = requests.get(url, timeout=20)
31
+ soup = BeautifulSoup(r.text, "html.parser")
32
+ content = soup.find("div", class_="td-content")
33
+ if not content:
34
+ return None
35
+ text = content.get_text(separator="\n").strip()
36
+ return Document(
37
+ page_content=text,
38
+ metadata={"doc_id": name, "url": url}
39
+ )
40
+ except Exception as e:
41
+ print(f"Error scraping {name}: {e}")
42
+ return None
43
+
44
+ docs = []
45
+ for name, url in URLS.items():
46
+ d = scrape_page(name, url)
47
+ if d:
48
+ docs.append(d)
49
+
50
+ # ------------------ 2. CHUNK + EMBED + CHROMA ------------------ #
51
+
52
+ splitter = RecursiveCharacterTextSplitter(
53
+ chunk_size=800,
54
+ chunk_overlap=120
55
+ )
56
+ chunks = splitter.split_documents(docs)
57
+
58
+ embedding_model = HuggingFaceEmbeddings(
59
+ model_name="sentence-transformers/all-MiniLM-L6-v2"
60
+ )
61
+
62
+ vectordb = Chroma.from_documents(chunks, embedding_model)
63
+ retriever = vectordb.as_retriever(
64
+ search_type="similarity_score_threshold",
65
+ search_kwargs={"k": 5, "score_threshold": 0.4}
66
+ )
67
+
68
+ # ------------------ 3. RAG HELPERS ------------------ #
69
+
70
+ def build_context_with_citations(query: str):
71
+ retrieved = retriever.invoke(query)
72
+ context = ""
73
+ mapping = []
74
+ for i, d in enumerate(retrieved, start=1):
75
+ label = f"[{i}]"
76
+ context += (
77
+ f"{label} {d.page_content[:900]}\n"
78
+ f"Source: {d.metadata['url']}\n\n"
79
+ )
80
+ mapping.append({
81
+ "label": label,
82
+ "url": d.metadata["url"],
83
+ "doc": d.metadata["doc_id"],
84
+ "preview": shorten(d.page_content, width=200)
85
+ })
86
+ return context, mapping
87
+
88
+ def build_prompt(query, context, history_str: str):
89
+ return f"""
90
+ You are a Kubernetes expert assistant.
91
+
92
+ Follow these rules:
93
+ 1. Use ONLY the context below.
94
+ 2. Every factual statement MUST have citations like [1], [2].
95
+ 3. If the answer is not in the context, say: "Not in docs."
96
+
97
+ Conversation so far:
98
+ {history_str}
99
+
100
+ User question: {query}
101
+
102
+ Context:
103
+ {context}
104
+ """.strip()
105
+
106
+ # ------------------ 4. OPENROUTER LLM ------------------ #
107
+
108
+ def call_llm(prompt: str) -> str:
109
+ api_key = os.getenv("OPENROUTER_API_KEY", "")
110
+ if not api_key:
111
+ return "⚠ OPENROUTER_API_KEY is not set in this Space."
112
+
113
  url = "https://openrouter.ai/api/v1/chat/completions"
114
  headers = {
115
+ "Authorization": f"Bearer {api_key}",
116
  "HTTP-Referer": "https://huggingface.co/",
117
  "X-Title": "Kubernetes RAG Assistant"
118
  }
119
+ payload = {
 
120
  "model": "meta-llama/llama-3.1-8b-instruct",
121
+ "messages": [
122
+ {"role": "system", "content": "You answer only from provided context."},
123
+ {"role": "user", "content": prompt}
124
+ ],
125
+ "temperature": 0.0,
126
+ "max_tokens": 500
127
  }
128
+ resp = requests.post(url, headers=headers, json=payload, timeout=60)
129
+ data = resp.json()
130
+ if "choices" in data:
131
+ return data["choices"][0]["message"]["content"]
132
+ print("LLM error:", json.dumps(data, indent=2))
133
+ return "⚠ LLM error. Please try again."
134
+
135
+ def answer_question(query: str, history):
136
+ # history is list of [user, bot]
137
+ history_str = ""
138
+ for u, b in history[-4:]: # last 4 turns
139
+ history_str += f"User: {u}\nAssistant: {b}\n"
140
+ ctx, sources = build_context_with_citations(query)
141
+ prompt = build_prompt(query, ctx, history_str)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  answer = call_llm(prompt)
143
+ return answer, sources
 
 
144
 
145
+ # ------------------ 5. GRADIO CHAT UI ------------------ #
 
146
 
147
  custom_css = """
148
  .source-box {
149
  font-size: 14px;
150
+ background: #111827;
151
  padding: 10px;
152
  border-radius: 8px;
153
+ color: #d1e4ff;
154
+ border: 1px solid #2563eb;
155
  }
156
  """
157
 
158
+ def chat_fn(message, history):
159
+ answer, refs = answer_question(message, history)
160
+ src_lines = [f"{s['label']} – {s['url']}" for s in refs]
161
+ sources_text = "\n".join(src_lines) if src_lines else "No sources found."
162
+ full_answer = f"{answer}\n\n---\n**Sources**:\n{sources_text}"
163
+ history.append((message, answer))
164
+ return history, ""
165
 
166
+ with gr.Blocks(css=custom_css, theme="soft") as demo:
167
+ gr.HTML(
168
+ "<h1 style='text-align:center;color:#3b82f6;'>☸ Kubernetes RAG Assistant</h1>"
169
+ "<p style='text-align:center;color:#e5e7eb;'>Ask Kubernetes questions. "
170
+ "Answers are grounded in official docs and include citations.</p>"
171
+ )
172
 
173
+ chat = gr.Chatbot(label="Conversation", height=450)
174
+ msg = gr.Textbox(label="Your question", placeholder="e.g. What is a StatefulSet?")
175
+ clear = gr.Button("Clear Chat")
176
 
177
+ def respond(message, history):
178
+ return chat_fn(message, history)
179
 
180
+ msg.submit(respond, [msg, chat], [chat, msg])
181
+ clear.click(lambda: ([], ""), None, [chat, msg])
182
 
183
+ demo.launch()