import os import requests import json from bs4 import BeautifulSoup from textwrap import shorten import gradio as gr from langchain_core.documents import Document from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_community.vectorstores import Chroma from langchain_community.embeddings import HuggingFaceEmbeddings # ----------------------- # 1. SCRAPE K8S DOCS # ----------------------- urls = { "pods": "https://kubernetes.io/docs/concepts/workloads/pods/", "deployments": "https://kubernetes.io/docs/concepts/workloads/controllers/deployment/", "services": "https://kubernetes.io/docs/concepts/services-networking/service/", "namespaces": "https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/", "nodes": "https://kubernetes.io/docs/concepts/architecture/nodes/", "statefulsets": "https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/", "rbac": "https://kubernetes.io/docs/reference/access-authn-authz/rbac/", "persistent-volumes": "https://kubernetes.io/docs/concepts/storage/persistent-volumes/", "ingress": "https://kubernetes.io/docs/concepts/services-networking/ingress/", "autoscaling": "https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/" } def scrape_docs(): docs = [] for name, url in urls.items(): try: r = requests.get(url, timeout=20) soup = BeautifulSoup(r.text, "html.parser") content = soup.find("div", class_="td-content") if not content: continue text = content.get_text(separator="\n").strip() docs.append(Document(page_content=text, metadata={"doc_id": name, "url": url})) except Exception: continue return docs docs = scrape_docs() # ----------------------- # 2. CHUNK + EMBED + VECTOR DB # ----------------------- splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100) chunks = splitter.split_documents(docs) embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") vectordb = Chroma.from_documents(chunks, embedding) retriever = vectordb.as_retriever( search_type="similarity_score_threshold", search_kwargs={"k": 5, "score_threshold": 0.4} ) # ----------------------- # 3. RAG HELPERS # ----------------------- def build_context_with_citations(query: str): retrieved_docs = retriever.invoke(query) context = "" mapping = [] for i, d in enumerate(retrieved_docs, start=1): label = f"[{i}]" context += f"{label} {d.page_content[:1000]}\n\nSource: {d.metadata['url']}\n\n" mapping.append({ "label": label, "url": d.metadata["url"], "doc": d.metadata["doc_id"], "preview": shorten(d.page_content, width=200) }) return context, mapping def build_prompt(query, context): return f""" You are a Kubernetes expert. Use ONLY the context below. Add citations like [1][2] after each fact. If not found, say: 'Not in docs'. QUESTION: {query} CONTEXT: {context} """.strip() # ----------------------- # 4. OPENROUTER LLM # ----------------------- import requests as req OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY", "") def call_llm(prompt: str) -> str: if not OPENROUTER_API_KEY: return "OpenRouter API key is not set. Please configure OPENROUTER_API_KEY in the Space settings." url = "https://openrouter.ai/api/v1/chat/completions" headers = { "Authorization": f"Bearer {OPENROUTER_API_KEY}", "Content-Type": "application/json" } data = { "model": "meta-llama/llama-3.1-8b-instruct", "messages": [ {"role": "system", "content": "You are a Kubernetes expert. Only use provided context."}, {"role": "user", "content": prompt} ], "temperature": 0.0 } response = req.post(url, headers=headers, data=json.dumps(data)) out = response.json() return out.get("choices", [{"message": {"content": "No response"}}])[0]["message"]["content"] def answer_question(query: str): context, sources = build_context_with_citations(query) prompt = build_prompt(query, context) answer = call_llm(prompt) return answer, sources # ----------------------- # 5. GRADIO CHAT APP # ----------------------- def chat_fn(message, history): answer, sources = answer_question(message) src_lines = [f"{s['label']} – {s['url']}" for s in sources] sources_text = "\n".join(src_lines) if src_lines else "No sources found." full_answer = f"{answer}\n\n---\nSources:\n{sources_text}" return full_answer demo = gr.ChatInterface( fn=chat_fn, title="Kubernetes RAG Assistant", description="Ask Kubernetes questions. Answers are grounded in official docs and include citations." ) def main(): return demo if __name__ == "__main__": demo.launch()