File size: 4,794 Bytes
4d068e8
176a09c
4d068e8
 
176a09c
 
4d068e8
176a09c
 
4419533
176a09c
4d068e8
4419533
176a09c
 
4d068e8
 
 
 
 
 
 
 
 
176a09c
4d068e8
 
176a09c
f7f504f
176a09c
 
 
 
 
 
 
 
 
 
4419533
176a09c
 
 
 
 
 
 
 
4419533
176a09c
4419533
176a09c
 
4419533
176a09c
 
 
 
 
 
 
4419533
176a09c
4419533
4d068e8
 
4419533
f7f504f
 
4d068e8
4419533
4d068e8
4419533
 
176a09c
4d068e8
4419533
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4d068e8
4419533
 
 
f7f504f
4419533
f7f504f
 
 
4419533
f7f504f
 
4419533
 
f7f504f
 
 
4419533
 
f7f504f
4419533
 
f7f504f
176a09c
4419533
176a09c
f7f504f
4419533
176a09c
f7f504f
4419533
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import os
import json
import requests
import gradio as gr
from bs4 import BeautifulSoup
from textwrap import shorten

from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma

# ------------------ SCRAPE KUBERNETES DOCS ------------------ #

URLS = {
    "pods": "https://kubernetes.io/docs/concepts/workloads/pods/",
    "deployments": "https://kubernetes.io/docs/concepts/workloads/controllers/deployment/",
    "services": "https://kubernetes.io/docs/concepts/services-networking/service/",
    "namespaces": "https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/",
    "nodes": "https://kubernetes.io/docs/concepts/architecture/nodes/",
    "statefulsets": "https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/",
    "rbac": "https://kubernetes.io/docs/reference/access-authn-authz/rbac/",
    "persistent-volumes": "https://kubernetes.io/docs/concepts/storage/persistent-volumes/",
    "ingress": "https://kubernetes.io/docs/concepts/services-networking/ingress/",
    "autoscaling": "https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/",
}

def scrape_page(name, url):
    try:
        r = requests.get(url, timeout=20)
        soup = BeautifulSoup(r.text, "html.parser")
        content = soup.find("div", class_="td-content")
        if not content:
            return None
        text = content.get_text(separator="\n").strip()
        return Document(
            page_content=text,
            metadata={"doc_id": name, "url": url}
        )
    except:
        return None

docs = []
for name, url in URLS.items():
    d = scrape_page(name, url)
    if d:
        docs.append(d)

# ------------------ CHUNK + EMBEDDINGS + VECTOR DB ------------------ #

splitter = RecursiveCharacterTextSplitter(chunk_size=900, chunk_overlap=200)
chunks = splitter.split_documents(docs)

embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

vectordb = Chroma.from_documents(chunks, embedding_model)
retriever = vectordb.as_retriever(
    search_type="similarity_score_threshold",
    search_kwargs={"k": 5, "score_threshold": 0.4}
)

# ------------------ LLM CALL (OPENROUTER) ------------------ #

def call_llm(prompt):
    url = "https://openrouter.ai/api/v1/chat/completions"
    headers = {
        "Authorization": f"Bearer {os.getenv('OPENROUTER_API_KEY')}",
        "HTTP-Referer": "https://huggingface.co/",
        "X-Title": "Kubernetes RAG Assistant"
    }
    data = {
        "model": "meta-llama/llama-3.1-8b-instruct",
        "messages": [{"role": "user", "content": prompt}],
        "max_tokens": 400,
        "temperature": 0.0,
    }
    r = requests.post(url, headers=headers, json=data)
    res = r.json()
    if "choices" in res:
        return res["choices"][0]["message"]["content"]
    print("🚨 LLM ERROR:", res)
    return "⚠️ Error: No response from model"

# ------------------ BUILD ANSWER WITH CITATIONS ------------------ #

def build_context_with_citations(query):
    docs = retriever.invoke(query)
    context = ""
    sources = []
    for i, d in enumerate(docs, start=1):
        label = f"[{i}]"
        context += f"{label} {d.page_content[:900]}\nSource: {d.metadata['url']}\n\n"
        sources.append(f"{label} β†’ {d.metadata['url']}")
    return context, sources

def answer_question(query, history):
    context, sources = build_context_with_citations(query)
    prompt = f"""
Answer the question strictly using the context below.
Every sentence must include citation like [1], [2].
If missing info β†’ reply: "Not in docs."

Question: {query}

Context:
{context}
"""
    answer = call_llm(prompt)
    src = "\n".join(sources) if sources else "No sources available."
    history.append((query, answer + "\n\n---\nSources:\n" + src))
    return history, ""

# ------------------ GRADIO UI ------------------ #

custom_css = """
.source-box {
    background: #1e293b;
    padding: 10px;
    border-radius: 8px;
    color: #dbeafe;
    border: 1px solid #3b82f6;
}
"""

with gr.Blocks(theme="soft") as app:
    gr.HTML(f"<style>{custom_css}</style>")

    gr.HTML("<h1 style='text-align:center;color:#3b82f6'>☸ Kubernetes RAG Assistant</h1>"
            "<p style='text-align:center;color:#cbd5e1'>Ask Kubernetes questions β€” answers include official docs citations πŸ“Œ</p>")

    chat = gr.Chatbot(label="Conversation", height=450)
    msg = gr.Textbox(label="Ask a question...", placeholder="What is a pod?")
    clear = gr.Button("Clear Chat")

    msg.submit(answer_question, [msg, chat], [chat, msg])
    clear.click(lambda: ([], ""), None, [chat, msg])

app.launch()