Spaces:

Prakyath01
/

kubernetes-rag-assistant

Sleeping

App Files Files Community

Prakyath01 commited on 8 days ago

Commit

176a09c

verified ·

1 Parent(s): f7f504f

Update app.py

Browse files

Files changed (1) hide show

app.py +143 -89

app.py CHANGED Viewed

@@ -1,10 +1,18 @@
 import os
 import requests
 import gradio as gr
-# ---------------- RAG DOCUMENT SETUP ---------------- #
-K8S_DOC_URLS = {
     "pods": "https://kubernetes.io/docs/concepts/workloads/pods/",
     "deployments": "https://kubernetes.io/docs/concepts/workloads/controllers/deployment/",
     "services": "https://kubernetes.io/docs/concepts/services-networking/service/",
@@ -14,116 +22,162 @@ K8S_DOC_URLS = {
     "rbac": "https://kubernetes.io/docs/reference/access-authn-authz/rbac/",
     "persistent-volumes": "https://kubernetes.io/docs/concepts/storage/persistent-volumes/",
     "ingress": "https://kubernetes.io/docs/concepts/services-networking/ingress/",
-    "autoscaling": "https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/"
 }
-def fetch_doc(url):
     try:
-        response = requests.get(url, timeout=10)
-        if response.status_code == 200:
-            return response.text
-    except:
-        return ""
-    return ""
-DOCUMENTS = [
-    {"doc": name, "url": url, "text": fetch_doc(url)}
-    for name, url in K8S_DOC_URLS.items()
-]
-def search_docs(query, top_k=3):
-    query = query.lower()
-    matches = []
-    for doc in DOCUMENTS:
-        text = doc["text"].lower()
-        if query in text:
-            snippet_start = text.index(query)
-            snippet_end = snippet_start + 350
-            snippet = doc["text"][snippet_start:snippet_end].replace("\n", " ")
-            matches.append((snippet, doc["url"], doc["doc"]))
-    return matches[:top_k]
-# --------------- LLM CALL (OpenRouter) ---------------- #
-def call_llm(prompt):
     url = "https://openrouter.ai/api/v1/chat/completions"
     headers = {
-        "Authorization": f"Bearer {os.getenv('OPENROUTER_API_KEY')}",
         "HTTP-Referer": "https://huggingface.co/",
         "X-Title": "Kubernetes RAG Assistant"
     }
-    data = {
         "model": "meta-llama/llama-3.1-8b-instruct",
-        "messages": [{"role": "user", "content": prompt}],
-        "max_tokens": 350
     }
-    res = requests.post(url, json=data, headers=headers)
-    out = res.json()
-    if "choices" in out:
-        return out["choices"][0]["message"]["content"]
-    print("DEBUG LLM Error:", out)
-    return "⚠ Model error. Try again."
-# ----------- RAG + Prompt Construction ---------------- #
-def build_answer(query):
-    results = search_docs(query)
-    context = ""
-    citations = []
-    for i, (snippet, url, doc) in enumerate(results, start=1):
-        label = f"[{i}]"
-        context += f"{label}: {snippet}\n\n"
-        citations.append(f"{label} → {url}")
-    prompt = f"""
-    Use the context below to answer the question clearly.
-    Add citations like [1], [2] at the end of sentences.
-    Context:
-    {context}
-    Question: {query}
-    """
     answer = call_llm(prompt)
-    citations_text = "\n".join(citations) or "No sources found."
-    return answer, citations_text
-# ---------------------- UI --------------------------- #
 custom_css = """
 .source-box {
     font-size: 14px;
-    background: #1b2733;
     padding: 10px;
     border-radius: 8px;
-    color: #c9e2ff;
-    border: 1px solid #4a90e2;
 }
 """
-with gr.Blocks(css=custom_css, theme="soft") as app:
-    gr.HTML("""
-    <h1 style='color:#326ce5; text-align:center;'>☸️ Kubernetes RAG Assistant</h1>
-    <p style='text-align:center; font-size:17px; color:#ddd;'>Ask any Kubernetes question and get answers with docs citations 📌</p>
-    """)
-    question = gr.Textbox(label="Ask a Kubernetes Question:", placeholder="e.g., What is RBAC in Kubernetes?")
-    answer = gr.Markdown(label="Answer")
-    sources = gr.Markdown(label="Sources", elem_classes=["source-box"])
-    submit = gr.Button("Ask ☸️")
-    submit.click(build_answer, inputs=question, outputs=[answer, sources])
-app.launch()

 import os
+import json
 import requests
 import gradio as gr
+from bs4 import BeautifulSoup
+from textwrap import shorten
+from langchain_core.documents import Document
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_community.vectorstores import Chroma
+# ------------------ 1. SCRAPE K8S DOCS ------------------ #
+URLS = {
     "pods": "https://kubernetes.io/docs/concepts/workloads/pods/",
     "deployments": "https://kubernetes.io/docs/concepts/workloads/controllers/deployment/",
     "services": "https://kubernetes.io/docs/concepts/services-networking/service/",
     "rbac": "https://kubernetes.io/docs/reference/access-authn-authz/rbac/",
     "persistent-volumes": "https://kubernetes.io/docs/concepts/storage/persistent-volumes/",
     "ingress": "https://kubernetes.io/docs/concepts/services-networking/ingress/",
+    "autoscaling": "https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/",
 }
+def scrape_page(name, url):
     try:
+        r = requests.get(url, timeout=20)
+        soup = BeautifulSoup(r.text, "html.parser")
+        content = soup.find("div", class_="td-content")
+        if not content:
+            return None
+        text = content.get_text(separator="\n").strip()
+        return Document(
+            page_content=text,
+            metadata={"doc_id": name, "url": url}
+        )
+    except Exception as e:
+        print(f"Error scraping {name}: {e}")
+        return None
+docs = []
+for name, url in URLS.items():
+    d = scrape_page(name, url)
+    if d:
+        docs.append(d)
+# ------------------ 2. CHUNK + EMBED + CHROMA ------------------ #
+splitter = RecursiveCharacterTextSplitter(
+    chunk_size=800,
+    chunk_overlap=120
+)
+chunks = splitter.split_documents(docs)
+embedding_model = HuggingFaceEmbeddings(
+    model_name="sentence-transformers/all-MiniLM-L6-v2"
+)
+vectordb = Chroma.from_documents(chunks, embedding_model)
+retriever = vectordb.as_retriever(
+    search_type="similarity_score_threshold",
+    search_kwargs={"k": 5, "score_threshold": 0.4}
+)
+# ------------------ 3. RAG HELPERS ------------------ #
+def build_context_with_citations(query: str):
+    retrieved = retriever.invoke(query)
+    context = ""
+    mapping = []
+    for i, d in enumerate(retrieved, start=1):
+        label = f"[{i}]"
+        context += (
+            f"{label} {d.page_content[:900]}\n"
+            f"Source: {d.metadata['url']}\n\n"
+        )
+        mapping.append({
+            "label": label,
+            "url": d.metadata["url"],
+            "doc": d.metadata["doc_id"],
+            "preview": shorten(d.page_content, width=200)
+        })
+    return context, mapping
+def build_prompt(query, context, history_str: str):
+    return f"""
+You are a Kubernetes expert assistant.
+Follow these rules:
+1. Use ONLY the context below.
+2. Every factual statement MUST have citations like [1], [2].
+3. If the answer is not in the context, say: "Not in docs."
+Conversation so far:
+{history_str}
+User question: {query}
+Context:
+{context}
+""".strip()
+# ------------------ 4. OPENROUTER LLM ------------------ #
+def call_llm(prompt: str) -> str:
+    api_key = os.getenv("OPENROUTER_API_KEY", "")
+    if not api_key:
+        return "⚠ OPENROUTER_API_KEY is not set in this Space."
     url = "https://openrouter.ai/api/v1/chat/completions"
     headers = {
+        "Authorization": f"Bearer {api_key}",
         "HTTP-Referer": "https://huggingface.co/",
         "X-Title": "Kubernetes RAG Assistant"
     }
+    payload = {
         "model": "meta-llama/llama-3.1-8b-instruct",
+        "messages": [
+            {"role": "system", "content": "You answer only from provided context."},
+            {"role": "user", "content": prompt}
+        ],
+        "temperature": 0.0,
+        "max_tokens": 500
     }
+    resp = requests.post(url, headers=headers, json=payload, timeout=60)
+    data = resp.json()
+    if "choices" in data:
+        return data["choices"][0]["message"]["content"]
+    print("LLM error:", json.dumps(data, indent=2))
+    return "⚠ LLM error. Please try again."
+def answer_question(query: str, history):
+    # history is list of [user, bot]
+    history_str = ""
+    for u, b in history[-4:]:  # last 4 turns
+        history_str += f"User: {u}\nAssistant: {b}\n"
+    ctx, sources = build_context_with_citations(query)
+    prompt = build_prompt(query, ctx, history_str)
     answer = call_llm(prompt)
+    return answer, sources
+# ------------------ 5. GRADIO CHAT UI ------------------ #
 custom_css = """
 .source-box {
     font-size: 14px;
+    background: #111827;
     padding: 10px;
     border-radius: 8px;
+    color: #d1e4ff;
+    border: 1px solid #2563eb;
 }
 """
+def chat_fn(message, history):
+    answer, refs = answer_question(message, history)
+    src_lines = [f"{s['label']} – {s['url']}" for s in refs]
+    sources_text = "\n".join(src_lines) if src_lines else "No sources found."
+    full_answer = f"{answer}\n\n---\n**Sources**:\n{sources_text}"
+    history.append((message, answer))
+    return history, ""
+with gr.Blocks(css=custom_css, theme="soft") as demo:
+    gr.HTML(
+        "<h1 style='text-align:center;color:#3b82f6;'>☸ Kubernetes RAG Assistant</h1>"
+        "<p style='text-align:center;color:#e5e7eb;'>Ask Kubernetes questions. "
+        "Answers are grounded in official docs and include citations.</p>"
+    )
+    chat = gr.Chatbot(label="Conversation", height=450)
+    msg = gr.Textbox(label="Your question", placeholder="e.g. What is a StatefulSet?")
+    clear = gr.Button("Clear Chat")
+    def respond(message, history):
+        return chat_fn(message, history)
+    msg.submit(respond, [msg, chat], [chat, msg])
+    clear.click(lambda: ([], ""), None, [chat, msg])
+demo.launch()