import os import json import requests import gradio as gr from bs4 import BeautifulSoup from textwrap import shorten from langchain_core.documents import Document from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_community.vectorstores import Chroma # ------------------ 1. SCRAPE K8S DOCS ------------------ # URLS = { "pods": "https://kubernetes.io/docs/concepts/workloads/pods/", "deployments": "https://kubernetes.io/docs/concepts/workloads/controllers/deployment/", "services": "https://kubernetes.io/docs/concepts/services-networking/service/", "namespaces": "https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/", "nodes": "https://kubernetes.io/docs/concepts/architecture/nodes/", "statefulsets": "https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/", "rbac": "https://kubernetes.io/docs/reference/access-authn-authz/rbac/", "persistent-volumes": "https://kubernetes.io/docs/concepts/storage/persistent-volumes/", "ingress": "https://kubernetes.io/docs/concepts/services-networking/ingress/", "autoscaling": "https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/", } def scrape_page(name, url): try: r = requests.get(url, timeout=20) soup = BeautifulSoup(r.text, "html.parser") content = soup.find("div", class_="td-content") if not content: return None text = content.get_text(separator="\n").strip() return Document( page_content=text, metadata={"doc_id": name, "url": url} ) except Exception as e: print(f"Error scraping {name}: {e}") return None docs = [] for name, url in URLS.items(): d = scrape_page(name, url) if d: docs.append(d) # ------------------ 2. CHUNK + EMBED + CHROMA ------------------ # splitter = RecursiveCharacterTextSplitter( chunk_size=800, chunk_overlap=120 ) chunks = splitter.split_documents(docs) embedding_model = HuggingFaceEmbeddings( model_name="sentence-transformers/all-MiniLM-L6-v2" ) vectordb = Chroma.from_documents(chunks, embedding_model) retriever = vectordb.as_retriever( search_type="similarity_score_threshold", search_kwargs={"k": 5, "score_threshold": 0.4} ) # ------------------ 3. RAG HELPERS ------------------ # def build_context_with_citations(query: str): retrieved = retriever.invoke(query) context = "" mapping = [] for i, d in enumerate(retrieved, start=1): label = f"[{i}]" context += ( f"{label} {d.page_content[:900]}\n" f"Source: {d.metadata['url']}\n\n" ) mapping.append({ "label": label, "url": d.metadata["url"], "doc": d.metadata["doc_id"], "preview": shorten(d.page_content, width=200) }) return context, mapping def build_prompt(query, context, history_str: str): return f""" You are a Kubernetes expert assistant. Follow these rules: 1. Use ONLY the context below. 2. Every factual statement MUST have citations like [1], [2]. 3. If the answer is not in the context, say: "Not in docs." Conversation so far: {history_str} User question: {query} Context: {context} """.strip() # ------------------ 4. OPENROUTER LLM ------------------ # def call_llm(prompt: str) -> str: api_key = os.getenv("OPENROUTER_API_KEY", "") if not api_key: return "⚠ OPENROUTER_API_KEY is not set in this Space." url = "https://openrouter.ai/api/v1/chat/completions" headers = { "Authorization": f"Bearer {api_key}", "HTTP-Referer": "https://huggingface.co/", "X-Title": "Kubernetes RAG Assistant" } payload = { "model": "meta-llama/llama-3.1-8b-instruct", "messages": [ {"role": "system", "content": "You answer only from provided context."}, {"role": "user", "content": prompt} ], "temperature": 0.0, "max_tokens": 500 } resp = requests.post(url, headers=headers, json=payload, timeout=60) data = resp.json() if "choices" in data: return data["choices"][0]["message"]["content"] print("LLM error:", json.dumps(data, indent=2)) return "⚠ LLM error. Please try again." def answer_question(query: str, history): # history is list of [user, bot] history_str = "" for u, b in history[-4:]: # last 4 turns history_str += f"User: {u}\nAssistant: {b}\n" ctx, sources = build_context_with_citations(query) prompt = build_prompt(query, ctx, history_str) answer = call_llm(prompt) return answer, sources # ------------------ 5. GRADIO CHAT UI ------------------ # custom_css = """ .source-box { font-size: 14px; background: #111827; padding: 10px; border-radius: 8px; color: #d1e4ff; border: 1px solid #2563eb; } """ def chat_fn(message, history): answer, refs = answer_question(message, history) src_lines = [f"{s['label']} – {s['url']}" for s in refs] sources_text = "\n".join(src_lines) if src_lines else "No sources found." full_answer = f"{answer}\n\n---\n**Sources**:\n{sources_text}" history.append((message, answer)) return history, "" with gr.Blocks(css=custom_css, theme="soft") as demo: gr.HTML( "
Ask Kubernetes questions. " "Answers are grounded in official docs and include citations.
" ) chat = gr.Chatbot(label="Conversation", height=450) msg = gr.Textbox(label="Your question", placeholder="e.g. What is a StatefulSet?") clear = gr.Button("Clear Chat") def respond(message, history): return chat_fn(message, history) msg.submit(respond, [msg, chat], [chat, msg]) clear.click(lambda: ([], ""), None, [chat, msg]) demo.launch()