Spaces:
Sleeping
Sleeping
File size: 4,794 Bytes
4d068e8 176a09c 4d068e8 176a09c 4d068e8 176a09c 4419533 176a09c 4d068e8 4419533 176a09c 4d068e8 176a09c 4d068e8 176a09c f7f504f 176a09c 4419533 176a09c 4419533 176a09c 4419533 176a09c 4419533 176a09c 4419533 176a09c 4419533 4d068e8 4419533 f7f504f 4d068e8 4419533 4d068e8 4419533 176a09c 4d068e8 4419533 4d068e8 4419533 f7f504f 4419533 f7f504f 4419533 f7f504f 4419533 f7f504f 4419533 f7f504f 4419533 f7f504f 176a09c 4419533 176a09c f7f504f 4419533 176a09c f7f504f 4419533 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
import os
import json
import requests
import gradio as gr
from bs4 import BeautifulSoup
from textwrap import shorten
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
# ------------------ SCRAPE KUBERNETES DOCS ------------------ #
URLS = {
"pods": "https://kubernetes.io/docs/concepts/workloads/pods/",
"deployments": "https://kubernetes.io/docs/concepts/workloads/controllers/deployment/",
"services": "https://kubernetes.io/docs/concepts/services-networking/service/",
"namespaces": "https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/",
"nodes": "https://kubernetes.io/docs/concepts/architecture/nodes/",
"statefulsets": "https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/",
"rbac": "https://kubernetes.io/docs/reference/access-authn-authz/rbac/",
"persistent-volumes": "https://kubernetes.io/docs/concepts/storage/persistent-volumes/",
"ingress": "https://kubernetes.io/docs/concepts/services-networking/ingress/",
"autoscaling": "https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/",
}
def scrape_page(name, url):
try:
r = requests.get(url, timeout=20)
soup = BeautifulSoup(r.text, "html.parser")
content = soup.find("div", class_="td-content")
if not content:
return None
text = content.get_text(separator="\n").strip()
return Document(
page_content=text,
metadata={"doc_id": name, "url": url}
)
except:
return None
docs = []
for name, url in URLS.items():
d = scrape_page(name, url)
if d:
docs.append(d)
# ------------------ CHUNK + EMBEDDINGS + VECTOR DB ------------------ #
splitter = RecursiveCharacterTextSplitter(chunk_size=900, chunk_overlap=200)
chunks = splitter.split_documents(docs)
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectordb = Chroma.from_documents(chunks, embedding_model)
retriever = vectordb.as_retriever(
search_type="similarity_score_threshold",
search_kwargs={"k": 5, "score_threshold": 0.4}
)
# ------------------ LLM CALL (OPENROUTER) ------------------ #
def call_llm(prompt):
url = "https://openrouter.ai/api/v1/chat/completions"
headers = {
"Authorization": f"Bearer {os.getenv('OPENROUTER_API_KEY')}",
"HTTP-Referer": "https://huggingface.co/",
"X-Title": "Kubernetes RAG Assistant"
}
data = {
"model": "meta-llama/llama-3.1-8b-instruct",
"messages": [{"role": "user", "content": prompt}],
"max_tokens": 400,
"temperature": 0.0,
}
r = requests.post(url, headers=headers, json=data)
res = r.json()
if "choices" in res:
return res["choices"][0]["message"]["content"]
print("π¨ LLM ERROR:", res)
return "β οΈ Error: No response from model"
# ------------------ BUILD ANSWER WITH CITATIONS ------------------ #
def build_context_with_citations(query):
docs = retriever.invoke(query)
context = ""
sources = []
for i, d in enumerate(docs, start=1):
label = f"[{i}]"
context += f"{label} {d.page_content[:900]}\nSource: {d.metadata['url']}\n\n"
sources.append(f"{label} β {d.metadata['url']}")
return context, sources
def answer_question(query, history):
context, sources = build_context_with_citations(query)
prompt = f"""
Answer the question strictly using the context below.
Every sentence must include citation like [1], [2].
If missing info β reply: "Not in docs."
Question: {query}
Context:
{context}
"""
answer = call_llm(prompt)
src = "\n".join(sources) if sources else "No sources available."
history.append((query, answer + "\n\n---\nSources:\n" + src))
return history, ""
# ------------------ GRADIO UI ------------------ #
custom_css = """
.source-box {
background: #1e293b;
padding: 10px;
border-radius: 8px;
color: #dbeafe;
border: 1px solid #3b82f6;
}
"""
with gr.Blocks(theme="soft") as app:
gr.HTML(f"<style>{custom_css}</style>")
gr.HTML("<h1 style='text-align:center;color:#3b82f6'>βΈ Kubernetes RAG Assistant</h1>"
"<p style='text-align:center;color:#cbd5e1'>Ask Kubernetes questions β answers include official docs citations π</p>")
chat = gr.Chatbot(label="Conversation", height=450)
msg = gr.Textbox(label="Ask a question...", placeholder="What is a pod?")
clear = gr.Button("Clear Chat")
msg.submit(answer_question, [msg, chat], [chat, msg])
clear.click(lambda: ([], ""), None, [chat, msg])
app.launch()
|