MukulRay commited on
Commit
3898be2
Β·
1 Parent(s): e3bae7a

feat: RAG Upgrade++

Browse files
Files changed (1) hide show
  1. rag.py +140 -23
rag.py CHANGED
@@ -1,7 +1,9 @@
1
  import os
2
  import logging
 
3
 
4
  import torch
 
5
  from dotenv import load_dotenv
6
 
7
  load_dotenv()
@@ -20,32 +22,97 @@ EMBED_MODEL = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6
20
  PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
21
  PINECONE_INDEX = os.getenv("PINECONE_INDEX", "llmops-rag")
22
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
23
- GROQ_MODEL = os.getenv("GROQ_MODEL", "llama-3.3-70b-versatile")
24
 
25
- PROMPT_TEMPLATE = """You are Akasha, the Irminsul terminal for Genshin Impact.
26
- Answer using ONLY the context provided. Be thorough and structured.
27
 
28
- For BUILD questions, always cover:
29
- - Artifact sets with set bonus explained
30
- - Stats: Sands/Goblet/Circlet main stats + substat priority with thresholds
31
- - Weapons: BiS and F2P alternatives with reasoning
32
- - Teams: 2-3 strong compositions with role explanation
33
- - Playstyle notes: rotations, synergies, what makes this build work
34
 
35
- For LORE questions: relationships, motivations, key events, story significance.
36
- For MECHANICS: exact multipliers, how it interacts with reactions, practical examples.
37
 
38
- CRITICAL: If the context does not contain enough information to answer confidently,
39
- say exactly: "The Irminsul has limited records on this β€” my knowledge may be incomplete."
40
- Never invent stats, story details, or character abilities not present in the context.
 
 
 
41
 
42
- Context:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  {context}
44
 
45
  Question: {question}
46
 
47
  Akasha:"""
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  def _build_groq_llm():
50
  from langchain_groq import ChatGroq
51
 
@@ -56,8 +123,8 @@ def _build_groq_llm():
56
  return ChatGroq(
57
  api_key=GROQ_API_KEY,
58
  model_name=GROQ_MODEL,
59
- temperature=0.2,
60
- max_tokens=1024,
61
  )
62
 
63
 
@@ -95,7 +162,7 @@ def _build_local_llm():
95
  "text-generation",
96
  model=model,
97
  tokenizer=tokenizer,
98
- max_new_tokens=256,
99
  do_sample=False,
100
  temperature=None,
101
  top_p=None,
@@ -113,10 +180,12 @@ class RAGChain:
113
  def __init__(self):
114
  self.ready = False
115
  self.chain = None
 
116
  self.vectorstore = None
 
117
 
118
  def load(self):
119
- llm = _build_groq_llm() if LLM_BACKEND == "groq" else _build_local_llm()
120
 
121
  logger.info("Connecting to Pinecone...")
122
  embeddings = HuggingFaceEmbeddings(model_name=EMBED_MODEL)
@@ -130,20 +199,68 @@ class RAGChain:
130
  input_variables=["context", "question"],
131
  )
132
  self.chain = RetrievalQA.from_chain_type(
133
- llm=llm,
134
  chain_type="stuff",
135
- retriever=self.vectorstore.as_retriever(search_kwargs={"k": 8}),
 
 
136
  return_source_documents=True,
137
  chain_type_kwargs={"prompt": prompt},
138
  )
 
139
  self.ready = True
140
- logger.info(f"RAG chain ready β€” backend: {LLM_BACKEND}")
141
 
142
- def query(self, question: str, top_k: int = 3) -> tuple[str, list[str]]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  if not self.ready:
144
  raise RuntimeError("RAG chain is not loaded.")
145
 
146
  self.chain.retriever.search_kwargs["k"] = top_k
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  result = self.chain.invoke({"query": question})
148
  answer = result["result"].strip().replace("</s>", "").strip()
149
  sources = [
 
1
  import os
2
  import logging
3
+ import re
4
 
5
  import torch
6
+ import requests
7
  from dotenv import load_dotenv
8
 
9
  load_dotenv()
 
22
  PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
23
  PINECONE_INDEX = os.getenv("PINECONE_INDEX", "llmops-rag")
24
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
25
+ GROQ_MODEL = os.getenv("GROQ_MODEL", "llama-3.3-70b-versatile")
26
 
27
+ # Minimum Pinecone score to trust corpus β€” below this triggers web fallback
28
+ CORPUS_CONFIDENCE_THRESHOLD = 0.35
29
 
30
+ PROMPT_TEMPLATE = """You are Akasha β€” the living memory of Teyvat, an omniscient Genshin Impact assistant with the depth of a master theorycafter and the storytelling of a lore scholar.
 
 
 
 
 
31
 
32
+ You have access to peer-reviewed theorycrafting data, exact game stats, and synthesized knowledge across all of Teyvat's history.
 
33
 
34
+ ANSWER RULES:
35
+ - Be thorough, specific, and structured. Never give one-liners for complex questions.
36
+ - Use exact numbers from the context β€” ER thresholds, EM values, CRIT ratios, multipliers.
37
+ - If context is thin on a topic, say: "The Irminsul's records on this are limited." Then share what you do know.
38
+ - Never invent stats, story details, or abilities not present in the context.
39
+ - Write like an expert who genuinely loves the game β€” not a generic AI assistant.
40
 
41
+ FORMAT GUIDE by question type:
42
+
43
+ For BUILD questions:
44
+ **[Character] β€” [Role] Build**
45
+ **How it works:** [brief kit explanation β€” what makes this character deal damage or provide value]
46
+ **Artifacts:** [set name] β€” [why this set, what the bonus does for this character]
47
+ **Main stats:** Sands: [stat] | Goblet: [stat] | Circlet: [stat]
48
+ **Substat priority:** [ordered list with thresholds e.g. ER β‰₯180% β†’ EM β†’ CRIT 1:2]
49
+ **Weapons:** BiS: [weapon + why] | F2P: [weapon + why]
50
+ **Teams:** [2-3 comps with role explanation]
51
+ **Notes:** [rotation tips, constellation breakpoints, common mistakes]
52
+
53
+ For LORE questions:
54
+ Answer in flowing prose. Cover: who they are, their motivations, key relationships, their role in the story, and what makes them memorable. Include specific quest/event references where available.
55
+
56
+ For MECHANICS questions:
57
+ Explain the concept clearly, give the exact formula or interaction, then a practical example showing when/why it matters in actual gameplay.
58
+
59
+ ---
60
+ Context from Irminsul records:
61
  {context}
62
 
63
  Question: {question}
64
 
65
  Akasha:"""
66
 
67
+ WEB_PROMPT_TEMPLATE = """You are Akasha β€” the Genshin Impact assistant. The Irminsul's local records didn't have strong coverage for this query, so you retrieved live data from trusted sources.
68
+
69
+ Trusted source data:
70
+ {context}
71
+
72
+ Answer the question thoroughly using this data. Follow the same format rules:
73
+ - Builds: cover artifacts, stats, weapons, teams
74
+ - Lore: prose with relationships and story significance
75
+ - Mechanics: formula + practical example
76
+
77
+ Question: {question}
78
+
79
+ Akasha (from live sources):"""
80
+
81
+
82
+ def _fetch_wiki_page(character_name: str) -> str:
83
+ """Fetch a character page from wiki.gg as web fallback."""
84
+ slug = character_name.lower().replace(" ", "_")
85
+ urls = [
86
+ f"https://genshin-impact.fandom.com/wiki/{character_name.replace(' ', '_')}",
87
+ f"https://game8.co/games/Genshin-Impact/archives/search?q={character_name}+build",
88
+ ]
89
+ headers = {"User-Agent": "Irminsul-RAG/1.0 (Genshin Impact assistant; educational)"}
90
+
91
+ for url in urls:
92
+ try:
93
+ r = requests.get(url, headers=headers, timeout=8)
94
+ if r.status_code == 200:
95
+ text = r.text
96
+ # Strip HTML tags
97
+ text = re.sub(r'<[^>]+>', ' ', text)
98
+ # Strip excessive whitespace
99
+ text = re.sub(r'\s{3,}', '\n\n', text)
100
+ # Return first 4000 chars of meaningful content
101
+ return text[:4000].strip()
102
+ except Exception as e:
103
+ logger.warning(f"Web fallback failed for {url}: {e}")
104
+ return ""
105
+
106
+
107
+ def _extract_subject(query: str) -> str:
108
+ """Best-effort extract character/topic name from query for web fallback."""
109
+ query = query.lower()
110
+ for word in ["build", "lore", "skill", "burst", "talent", "team", "artifact",
111
+ "who is", "tell me about", "what is", "how does", "explain"]:
112
+ query = query.replace(word, "")
113
+ return query.strip().title()
114
+
115
+
116
  def _build_groq_llm():
117
  from langchain_groq import ChatGroq
118
 
 
123
  return ChatGroq(
124
  api_key=GROQ_API_KEY,
125
  model_name=GROQ_MODEL,
126
+ temperature=0.3,
127
+ max_tokens=1500,
128
  )
129
 
130
 
 
162
  "text-generation",
163
  model=model,
164
  tokenizer=tokenizer,
165
+ max_new_tokens=512,
166
  do_sample=False,
167
  temperature=None,
168
  top_p=None,
 
180
  def __init__(self):
181
  self.ready = False
182
  self.chain = None
183
+ self.web_chain = None
184
  self.vectorstore = None
185
+ self.llm = None
186
 
187
  def load(self):
188
+ self.llm = _build_groq_llm() if LLM_BACKEND == "groq" else _build_local_llm()
189
 
190
  logger.info("Connecting to Pinecone...")
191
  embeddings = HuggingFaceEmbeddings(model_name=EMBED_MODEL)
 
199
  input_variables=["context", "question"],
200
  )
201
  self.chain = RetrievalQA.from_chain_type(
202
+ llm=self.llm,
203
  chain_type="stuff",
204
+ retriever=self.vectorstore.as_retriever(
205
+ search_kwargs={"k": 8}
206
+ ),
207
  return_source_documents=True,
208
  chain_type_kwargs={"prompt": prompt},
209
  )
210
+
211
  self.ready = True
212
+ logger.info(f"RAG chain ready β€” backend: {LLM_BACKEND}, model: {GROQ_MODEL}")
213
 
214
+ def _corpus_has_coverage(self, question: str) -> tuple[bool, list]:
215
+ """Check if Pinecone has meaningful coverage for this query."""
216
+ try:
217
+ docs_with_scores = self.vectorstore.similarity_search_with_score(
218
+ question, k=3
219
+ )
220
+ if not docs_with_scores:
221
+ return False, []
222
+ top_score = docs_with_scores[0][1]
223
+ logger.info(f"Top Pinecone score: {top_score:.3f}")
224
+ # Pinecone cosine: higher = more similar
225
+ has_coverage = top_score >= CORPUS_CONFIDENCE_THRESHOLD
226
+ return has_coverage, [doc for doc, _ in docs_with_scores]
227
+ except Exception as e:
228
+ logger.warning(f"Coverage check failed: {e}")
229
+ return True, [] # fail open β€” try corpus anyway
230
+
231
+ def query(self, question: str, top_k: int = 8) -> tuple[str, list[str]]:
232
  if not self.ready:
233
  raise RuntimeError("RAG chain is not loaded.")
234
 
235
  self.chain.retriever.search_kwargs["k"] = top_k
236
+
237
+ # Check corpus coverage
238
+ has_coverage, _ = self._corpus_has_coverage(question)
239
+
240
+ if not has_coverage:
241
+ logger.info("Low corpus coverage β€” attempting web fallback")
242
+ subject = _extract_subject(question)
243
+ web_content = _fetch_wiki_page(subject)
244
+
245
+ if web_content:
246
+ # Answer from web data using the LLM directly
247
+ web_prompt = PromptTemplate(
248
+ template=WEB_PROMPT_TEMPLATE,
249
+ input_variables=["context", "question"],
250
+ )
251
+ from langchain_core.output_parsers import StrOutputParser
252
+ web_chain = web_prompt | self.llm | StrOutputParser()
253
+ try:
254
+ answer = web_chain.invoke({
255
+ "context": web_content,
256
+ "question": question,
257
+ })
258
+ answer = answer.strip().replace("</s>", "").strip()
259
+ return answer, ["web: wiki.gg/game8.co (live)"]
260
+ except Exception as e:
261
+ logger.warning(f"Web chain failed: {e}")
262
+
263
+ # Default: corpus RAG
264
  result = self.chain.invoke({"query": question})
265
  answer = result["result"].strip().replace("</s>", "").strip()
266
  sources = [