Prakyath01 commited on
Commit
4419533
Β·
verified Β·
1 Parent(s): 176a09c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -102
app.py CHANGED
@@ -7,10 +7,10 @@ from textwrap import shorten
7
 
8
  from langchain_core.documents import Document
9
  from langchain_text_splitters import RecursiveCharacterTextSplitter
10
- from langchain_community.embeddings import HuggingFaceEmbeddings
11
  from langchain_community.vectorstores import Chroma
12
 
13
- # ------------------ 1. SCRAPE K8S DOCS ------------------ #
14
 
15
  URLS = {
16
  "pods": "https://kubernetes.io/docs/concepts/workloads/pods/",
@@ -37,8 +37,7 @@ def scrape_page(name, url):
37
  page_content=text,
38
  metadata={"doc_id": name, "url": url}
39
  )
40
- except Exception as e:
41
- print(f"Error scraping {name}: {e}")
42
  return None
43
 
44
  docs = []
@@ -47,17 +46,12 @@ for name, url in URLS.items():
47
  if d:
48
  docs.append(d)
49
 
50
- # ------------------ 2. CHUNK + EMBED + CHROMA ------------------ #
51
 
52
- splitter = RecursiveCharacterTextSplitter(
53
- chunk_size=800,
54
- chunk_overlap=120
55
- )
56
  chunks = splitter.split_documents(docs)
57
 
58
- embedding_model = HuggingFaceEmbeddings(
59
- model_name="sentence-transformers/all-MiniLM-L6-v2"
60
- )
61
 
62
  vectordb = Chroma.from_documents(chunks, embedding_model)
63
  retriever = vectordb.as_retriever(
@@ -65,119 +59,80 @@ retriever = vectordb.as_retriever(
65
  search_kwargs={"k": 5, "score_threshold": 0.4}
66
  )
67
 
68
- # ------------------ 3. RAG HELPERS ------------------ #
69
-
70
- def build_context_with_citations(query: str):
71
- retrieved = retriever.invoke(query)
72
- context = ""
73
- mapping = []
74
- for i, d in enumerate(retrieved, start=1):
75
- label = f"[{i}]"
76
- context += (
77
- f"{label} {d.page_content[:900]}\n"
78
- f"Source: {d.metadata['url']}\n\n"
79
- )
80
- mapping.append({
81
- "label": label,
82
- "url": d.metadata["url"],
83
- "doc": d.metadata["doc_id"],
84
- "preview": shorten(d.page_content, width=200)
85
- })
86
- return context, mapping
87
-
88
- def build_prompt(query, context, history_str: str):
89
- return f"""
90
- You are a Kubernetes expert assistant.
91
-
92
- Follow these rules:
93
- 1. Use ONLY the context below.
94
- 2. Every factual statement MUST have citations like [1], [2].
95
- 3. If the answer is not in the context, say: "Not in docs."
96
-
97
- Conversation so far:
98
- {history_str}
99
-
100
- User question: {query}
101
-
102
- Context:
103
- {context}
104
- """.strip()
105
-
106
- # ------------------ 4. OPENROUTER LLM ------------------ #
107
-
108
- def call_llm(prompt: str) -> str:
109
- api_key = os.getenv("OPENROUTER_API_KEY", "")
110
- if not api_key:
111
- return "⚠ OPENROUTER_API_KEY is not set in this Space."
112
 
 
113
  url = "https://openrouter.ai/api/v1/chat/completions"
114
  headers = {
115
- "Authorization": f"Bearer {api_key}",
116
  "HTTP-Referer": "https://huggingface.co/",
117
  "X-Title": "Kubernetes RAG Assistant"
118
  }
119
- payload = {
120
  "model": "meta-llama/llama-3.1-8b-instruct",
121
- "messages": [
122
- {"role": "system", "content": "You answer only from provided context."},
123
- {"role": "user", "content": prompt}
124
- ],
125
  "temperature": 0.0,
126
- "max_tokens": 500
127
  }
128
- resp = requests.post(url, headers=headers, json=payload, timeout=60)
129
- data = resp.json()
130
- if "choices" in data:
131
- return data["choices"][0]["message"]["content"]
132
- print("LLM error:", json.dumps(data, indent=2))
133
- return "⚠ LLM error. Please try again."
134
-
135
- def answer_question(query: str, history):
136
- # history is list of [user, bot]
137
- history_str = ""
138
- for u, b in history[-4:]: # last 4 turns
139
- history_str += f"User: {u}\nAssistant: {b}\n"
140
- ctx, sources = build_context_with_citations(query)
141
- prompt = build_prompt(query, ctx, history_str)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  answer = call_llm(prompt)
143
- return answer, sources
 
 
144
 
145
- # ------------------ 5. GRADIO CHAT UI ------------------ #
146
 
147
  custom_css = """
148
  .source-box {
149
- font-size: 14px;
150
- background: #111827;
151
  padding: 10px;
152
  border-radius: 8px;
153
- color: #d1e4ff;
154
- border: 1px solid #2563eb;
155
  }
156
  """
157
 
158
- def chat_fn(message, history):
159
- answer, refs = answer_question(message, history)
160
- src_lines = [f"{s['label']} – {s['url']}" for s in refs]
161
- sources_text = "\n".join(src_lines) if src_lines else "No sources found."
162
- full_answer = f"{answer}\n\n---\n**Sources**:\n{sources_text}"
163
- history.append((message, answer))
164
- return history, ""
165
 
166
- with gr.Blocks(css=custom_css, theme="soft") as demo:
167
- gr.HTML(
168
- "<h1 style='text-align:center;color:#3b82f6;'>☸ Kubernetes RAG Assistant</h1>"
169
- "<p style='text-align:center;color:#e5e7eb;'>Ask Kubernetes questions. "
170
- "Answers are grounded in official docs and include citations.</p>"
171
- )
172
 
173
  chat = gr.Chatbot(label="Conversation", height=450)
174
- msg = gr.Textbox(label="Your question", placeholder="e.g. What is a StatefulSet?")
175
  clear = gr.Button("Clear Chat")
176
 
177
- def respond(message, history):
178
- return chat_fn(message, history)
179
-
180
- msg.submit(respond, [msg, chat], [chat, msg])
181
  clear.click(lambda: ([], ""), None, [chat, msg])
182
 
183
- demo.launch()
 
7
 
8
  from langchain_core.documents import Document
9
  from langchain_text_splitters import RecursiveCharacterTextSplitter
10
+ from langchain_huggingface import HuggingFaceEmbeddings
11
  from langchain_community.vectorstores import Chroma
12
 
13
+ # ------------------ SCRAPE KUBERNETES DOCS ------------------ #
14
 
15
  URLS = {
16
  "pods": "https://kubernetes.io/docs/concepts/workloads/pods/",
 
37
  page_content=text,
38
  metadata={"doc_id": name, "url": url}
39
  )
40
+ except:
 
41
  return None
42
 
43
  docs = []
 
46
  if d:
47
  docs.append(d)
48
 
49
+ # ------------------ CHUNK + EMBEDDINGS + VECTOR DB ------------------ #
50
 
51
+ splitter = RecursiveCharacterTextSplitter(chunk_size=900, chunk_overlap=200)
 
 
 
52
  chunks = splitter.split_documents(docs)
53
 
54
+ embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
 
 
55
 
56
  vectordb = Chroma.from_documents(chunks, embedding_model)
57
  retriever = vectordb.as_retriever(
 
59
  search_kwargs={"k": 5, "score_threshold": 0.4}
60
  )
61
 
62
+ # ------------------ LLM CALL (OPENROUTER) ------------------ #
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
+ def call_llm(prompt):
65
  url = "https://openrouter.ai/api/v1/chat/completions"
66
  headers = {
67
+ "Authorization": f"Bearer {os.getenv('OPENROUTER_API_KEY')}",
68
  "HTTP-Referer": "https://huggingface.co/",
69
  "X-Title": "Kubernetes RAG Assistant"
70
  }
71
+ data = {
72
  "model": "meta-llama/llama-3.1-8b-instruct",
73
+ "messages": [{"role": "user", "content": prompt}],
74
+ "max_tokens": 400,
 
 
75
  "temperature": 0.0,
 
76
  }
77
+ r = requests.post(url, headers=headers, json=data)
78
+ res = r.json()
79
+ if "choices" in res:
80
+ return res["choices"][0]["message"]["content"]
81
+ print("🚨 LLM ERROR:", res)
82
+ return "⚠️ Error: No response from model"
83
+
84
+ # ------------------ BUILD ANSWER WITH CITATIONS ------------------ #
85
+
86
+ def build_context_with_citations(query):
87
+ docs = retriever.invoke(query)
88
+ context = ""
89
+ sources = []
90
+ for i, d in enumerate(docs, start=1):
91
+ label = f"[{i}]"
92
+ context += f"{label} {d.page_content[:900]}\nSource: {d.metadata['url']}\n\n"
93
+ sources.append(f"{label} β†’ {d.metadata['url']}")
94
+ return context, sources
95
+
96
+ def answer_question(query, history):
97
+ context, sources = build_context_with_citations(query)
98
+ prompt = f"""
99
+ Answer the question strictly using the context below.
100
+ Every sentence must include citation like [1], [2].
101
+ If missing info β†’ reply: "Not in docs."
102
+
103
+ Question: {query}
104
+
105
+ Context:
106
+ {context}
107
+ """
108
  answer = call_llm(prompt)
109
+ src = "\n".join(sources) if sources else "No sources available."
110
+ history.append((query, answer + "\n\n---\nSources:\n" + src))
111
+ return history, ""
112
 
113
+ # ------------------ GRADIO UI ------------------ #
114
 
115
  custom_css = """
116
  .source-box {
117
+ background: #1e293b;
 
118
  padding: 10px;
119
  border-radius: 8px;
120
+ color: #dbeafe;
121
+ border: 1px solid #3b82f6;
122
  }
123
  """
124
 
125
+ with gr.Blocks(theme="soft") as app:
126
+ gr.HTML(f"<style>{custom_css}</style>")
 
 
 
 
 
127
 
128
+ gr.HTML("<h1 style='text-align:center;color:#3b82f6'>☸ Kubernetes RAG Assistant</h1>"
129
+ "<p style='text-align:center;color:#cbd5e1'>Ask Kubernetes questions β€” answers include official docs citations πŸ“Œ</p>")
 
 
 
 
130
 
131
  chat = gr.Chatbot(label="Conversation", height=450)
132
+ msg = gr.Textbox(label="Ask a question...", placeholder="What is a pod?")
133
  clear = gr.Button("Clear Chat")
134
 
135
+ msg.submit(answer_question, [msg, chat], [chat, msg])
 
 
 
136
  clear.click(lambda: ([], ""), None, [chat, msg])
137
 
138
+ app.launch()