Spaces:
Sleeping
Sleeping
File size: 4,952 Bytes
4d068e8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
import os
import requests
import json
from bs4 import BeautifulSoup
from textwrap import shorten
import gradio as gr
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings
# -----------------------
# 1. SCRAPE K8S DOCS
# -----------------------
urls = {
"pods": "https://kubernetes.io/docs/concepts/workloads/pods/",
"deployments": "https://kubernetes.io/docs/concepts/workloads/controllers/deployment/",
"services": "https://kubernetes.io/docs/concepts/services-networking/service/",
"namespaces": "https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/",
"nodes": "https://kubernetes.io/docs/concepts/architecture/nodes/",
"statefulsets": "https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/",
"rbac": "https://kubernetes.io/docs/reference/access-authn-authz/rbac/",
"persistent-volumes": "https://kubernetes.io/docs/concepts/storage/persistent-volumes/",
"ingress": "https://kubernetes.io/docs/concepts/services-networking/ingress/",
"autoscaling": "https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/"
}
def scrape_docs():
docs = []
for name, url in urls.items():
try:
r = requests.get(url, timeout=20)
soup = BeautifulSoup(r.text, "html.parser")
content = soup.find("div", class_="td-content")
if not content:
continue
text = content.get_text(separator="\n").strip()
docs.append(Document(page_content=text, metadata={"doc_id": name, "url": url}))
except Exception:
continue
return docs
docs = scrape_docs()
# -----------------------
# 2. CHUNK + EMBED + VECTOR DB
# -----------------------
splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
chunks = splitter.split_documents(docs)
embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectordb = Chroma.from_documents(chunks, embedding)
retriever = vectordb.as_retriever(
search_type="similarity_score_threshold",
search_kwargs={"k": 5, "score_threshold": 0.4}
)
# -----------------------
# 3. RAG HELPERS
# -----------------------
def build_context_with_citations(query: str):
retrieved_docs = retriever.invoke(query)
context = ""
mapping = []
for i, d in enumerate(retrieved_docs, start=1):
label = f"[{i}]"
context += f"{label} {d.page_content[:1000]}\n\nSource: {d.metadata['url']}\n\n"
mapping.append({
"label": label,
"url": d.metadata["url"],
"doc": d.metadata["doc_id"],
"preview": shorten(d.page_content, width=200)
})
return context, mapping
def build_prompt(query, context):
return f"""
You are a Kubernetes expert.
Use ONLY the context below.
Add citations like [1][2] after each fact.
If not found, say: 'Not in docs'.
QUESTION:
{query}
CONTEXT:
{context}
""".strip()
# -----------------------
# 4. OPENROUTER LLM
# -----------------------
import requests as req
OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY", "")
def call_llm(prompt: str) -> str:
if not OPENROUTER_API_KEY:
return "OpenRouter API key is not set. Please configure OPENROUTER_API_KEY in the Space settings."
url = "https://openrouter.ai/api/v1/chat/completions"
headers = {
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
"Content-Type": "application/json"
}
data = {
"model": "meta-llama/llama-3.1-8b-instruct",
"messages": [
{"role": "system", "content": "You are a Kubernetes expert. Only use provided context."},
{"role": "user", "content": prompt}
],
"temperature": 0.0
}
response = req.post(url, headers=headers, data=json.dumps(data))
out = response.json()
return out.get("choices", [{"message": {"content": "No response"}}])[0]["message"]["content"]
def answer_question(query: str):
context, sources = build_context_with_citations(query)
prompt = build_prompt(query, context)
answer = call_llm(prompt)
return answer, sources
# -----------------------
# 5. GRADIO CHAT APP
# -----------------------
def chat_fn(message, history):
answer, sources = answer_question(message)
src_lines = [f"{s['label']} – {s['url']}" for s in sources]
sources_text = "\n".join(src_lines) if src_lines else "No sources found."
full_answer = f"{answer}\n\n---\nSources:\n{sources_text}"
return full_answer
demo = gr.ChatInterface(
fn=chat_fn,
title="Kubernetes RAG Assistant",
description="Ask Kubernetes questions. Answers are grounded in official docs and include citations."
)
def main():
return demo
if __name__ == "__main__":
demo.launch()
|