import os import json import requests import gradio as gr from bs4 import BeautifulSoup from langchain_core.documents import Document from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_huggingface import HuggingFaceEmbeddings from langchain_community.vectorstores import Chroma from rank_bm25 import BM25Okapi # <-- NEW Hybrid Search Import # ------------------This is SCRAPE KUBERNETES DOCS ------------------ # URLS = { "pods": "https://kubernetes.io/docs/concepts/workloads/pods/", "deployments": "https://kubernetes.io/docs/concepts/workloads/controllers/deployment/", "services": "https://kubernetes.io/docs/concepts/services-networking/service/", "namespaces": "https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/", "nodes": "https://kubernetes.io/docs/concepts/architecture/nodes/", "statefulsets": "https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/", "rbac": "https://kubernetes.io/docs/reference/access-authn-authz/rbac/", "persistent-volumes": "https://kubernetes.io/docs/concepts/storage/persistent-volumes/", "ingress": "https://kubernetes.io/docs/concepts/services-networking/ingress/", "autoscaling": "https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/", } def scrape_page(name, url): try: r = requests.get(url, timeout=20) soup = BeautifulSoup(r.text, "html.parser") content = soup.find("div", class_="td-content") if not content: return None text = content.get_text(separator="\n").strip() return Document(page_content=text, metadata={"doc_id": name, "url": url}) except: return None docs = [] for name, url in URLS.items(): d = scrape_page(name, url) if d: docs.append(d) # ------------------ CHUNK + EMBEDDINGS + VECTOR DB ------------------ # splitter = RecursiveCharacterTextSplitter(chunk_size=900, chunk_overlap=200) chunks = splitter.split_documents(docs) embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") vectordb = Chroma.from_documents(chunks, embedding_model) retriever = vectordb.as_retriever( search_type="similarity_score_threshold", search_kwargs={"k": 5, "score_threshold": 0.4} ) # ------------------ HYBRID SEARCH ------------------ # bm25_corpus = [doc.page_content.split() for doc in chunks] bm25 = BM25Okapi(bm25_corpus) def hybrid_search(query, top_k=5): # Vector Search vector_results = retriever.invoke(query) # BM25 Keyword Search tokenized_query = query.lower().split() bm25_scores = bm25.get_scores(tokenized_query) bm25_ranked = sorted(zip(bm25_scores, chunks), key=lambda x: x[0], reverse=True) bm25_results = [d for _, d in bm25_ranked[:top_k]] # Combine + Remove duplicates combined = vector_results + bm25_results unique = [] seen = set() for d in combined: key = (d.metadata["doc_id"], d.page_content[:50]) if key not in seen: seen.add(key) unique.append(d) return unique[:top_k] # ------------------ LLM CALL (OpenRouter) ------------------ # def call_llm(prompt): url = "https://openrouter.ai/api/v1/chat/completions" headers = { "Authorization": f"Bearer {os.getenv('OPENROUTER_API_KEY')}", "HTTP-Referer": "https://huggingface.co/", "X-Title": "Kubernetes RAG Assistant" } data = { "model": "meta-llama/llama-3.1-8b-instruct", "messages": [{"role": "user", "content": prompt}], "max_tokens": 400, "temperature": 0.0 } r = requests.post(url, headers=headers, json=data) res = r.json() if "choices" in res: return res["choices"][0]["message"]["content"] print("LLM ERROR:", res) return "⚠️ Model failed. Please retry." # ------------------ RAG + CITATIONS ------------------ # def build_context_with_citations(query): docs = hybrid_search(query) context = "" sources = [] for i, d in enumerate(docs, start=1): label = f"[{i}]" context += f"{label} {d.page_content[:900]}\nSource: {d.metadata['url']}\n\n" sources.append(f"{label} → {d.metadata['url']}") return context, sources def answer_question(query, history): context, sources = build_context_with_citations(query) prompt = f""" Answer using ONLY the context below. Every sentence MUST include citations like [1], [2]. If the answer is not in docs → respond "Not in docs." Question: {query} Context: {context} """ answer = call_llm(prompt) final = answer + "\n\n---\nSources:\n" + "\n".join(sources) history.append((query, final)) return history, "" # ------------------ GRADIO UI ------------------ # custom_css = """ .source-box { background: #1e293b; color: #dbeafe; padding: 10px; border-radius: 7px; border: 1px solid #3b82f6; } """ with gr.Blocks(theme="soft") as app: gr.HTML(f"") gr.HTML("
Semantic + Hybrid Search • Official K8s Docs Cited 📌
") chat = gr.Chatbot(label="Conversation", height=450) msg = gr.Textbox(label="Ask anything about Kubernetes…", placeholder="e.g., What is RBAC?") clear = gr.Button("Clear Conversation") msg.submit(answer_question, [msg, chat], [chat, msg]) clear.click(lambda: ([], ""), None, [chat, msg]) app.launch()