Update src/qa.py
Browse files
src/qa.py
CHANGED
|
@@ -268,15 +268,37 @@ def retrieve_chunks(query: str, index, chunks: list, top_k: int = 7,
|
|
| 268 |
|
| 269 |
|
| 270 |
# ==========================================================
|
| 271 |
-
# 8️⃣ Answer Generation
|
| 272 |
-
# ==========================================================
|
| 273 |
-
# ==========================================================
|
| 274 |
-
# 8️⃣ Answer Generation (Lazy GPT-4o Initialization + Language-Aware)
|
| 275 |
# ==========================================================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 276 |
def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = False, doc_lang: str = "en"):
|
| 277 |
"""
|
| 278 |
Generates an answer using GPT-4o (SAP Gen AI Hub proxy).
|
| 279 |
-
Now supports Hindi or English response formatting automatically
|
|
|
|
| 280 |
"""
|
| 281 |
if not retrieved_chunks:
|
| 282 |
return "Sorry, I couldn’t find relevant information in the document."
|
|
@@ -287,10 +309,20 @@ def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = F
|
|
| 287 |
except Exception:
|
| 288 |
return "⚠️ GPT-4o not initialized. Check credentials or rebuild the Space."
|
| 289 |
|
| 290 |
-
#
|
|
|
|
|
|
|
| 291 |
context = "\n".join(f"[Chunk {i+1}] {chunk.strip()}" for i, chunk in enumerate(retrieved_chunks))
|
| 292 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 293 |
# 🌐 Language-specific prompt logic
|
|
|
|
| 294 |
if doc_lang == "hi":
|
| 295 |
# Hindi-language response
|
| 296 |
prompt = (
|
|
@@ -302,21 +334,31 @@ def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = F
|
|
| 302 |
)
|
| 303 |
else:
|
| 304 |
# Default English prompts
|
| 305 |
-
prompt = (REASONING_PROMPT if reasoning_mode else STRICT_PROMPT).format(
|
|
|
|
|
|
|
| 306 |
|
| 307 |
-
#
|
|
|
|
|
|
|
| 308 |
messages = [
|
| 309 |
-
{
|
| 310 |
-
"
|
| 311 |
-
"
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 316 |
{"role": "user", "content": prompt},
|
| 317 |
]
|
| 318 |
|
| 319 |
-
#
|
|
|
|
|
|
|
| 320 |
try:
|
| 321 |
response = chat_llm_local.invoke(messages)
|
| 322 |
return response.content.strip()
|
|
@@ -325,7 +367,6 @@ def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = F
|
|
| 325 |
return "⚠️ Error: Could not generate an answer."
|
| 326 |
|
| 327 |
|
| 328 |
-
|
| 329 |
# ==========================================================
|
| 330 |
# 9️⃣ Generic Text Generation Helper
|
| 331 |
# ==========================================================
|
|
|
|
| 268 |
|
| 269 |
|
| 270 |
# ==========================================================
|
| 271 |
+
# 8️⃣ Answer Generation (Lazy GPT-4o Initialization + Language-Aware + Token-Safe)
|
|
|
|
|
|
|
|
|
|
| 272 |
# ==========================================================
|
| 273 |
+
def truncate_context(context_text: str, max_tokens: int = 100000, model: str = "gpt-4o") -> str:
|
| 274 |
+
"""
|
| 275 |
+
Truncate context to stay safely within model limits (~128k tokens).
|
| 276 |
+
Keeps only the earliest tokens up to max_tokens.
|
| 277 |
+
"""
|
| 278 |
+
try:
|
| 279 |
+
import tiktoken
|
| 280 |
+
enc = tiktoken.encoding_for_model(model)
|
| 281 |
+
except Exception:
|
| 282 |
+
try:
|
| 283 |
+
import tiktoken
|
| 284 |
+
enc = tiktoken.get_encoding("cl100k_base")
|
| 285 |
+
except Exception:
|
| 286 |
+
# crude fallback — approximate truncation
|
| 287 |
+
return context_text[: max_tokens * 4]
|
| 288 |
+
|
| 289 |
+
tokens = enc.encode(context_text)
|
| 290 |
+
if len(tokens) > max_tokens:
|
| 291 |
+
truncated = enc.decode(tokens[:max_tokens])
|
| 292 |
+
print(f"⚠️ Context truncated from {len(tokens):,} → {max_tokens:,} tokens.")
|
| 293 |
+
return truncated
|
| 294 |
+
return context_text
|
| 295 |
+
|
| 296 |
+
|
| 297 |
def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = False, doc_lang: str = "en"):
|
| 298 |
"""
|
| 299 |
Generates an answer using GPT-4o (SAP Gen AI Hub proxy).
|
| 300 |
+
Now supports Hindi or English response formatting automatically,
|
| 301 |
+
with safe context truncation to prevent token overflow.
|
| 302 |
"""
|
| 303 |
if not retrieved_chunks:
|
| 304 |
return "Sorry, I couldn’t find relevant information in the document."
|
|
|
|
| 309 |
except Exception:
|
| 310 |
return "⚠️ GPT-4o not initialized. Check credentials or rebuild the Space."
|
| 311 |
|
| 312 |
+
# ----------------------------------------------------------
|
| 313 |
+
# 🧩 Build and clean context (deduplicate + truncate safely)
|
| 314 |
+
# ----------------------------------------------------------
|
| 315 |
context = "\n".join(f"[Chunk {i+1}] {chunk.strip()}" for i, chunk in enumerate(retrieved_chunks))
|
| 316 |
|
| 317 |
+
# Remove duplicate lines to save tokens
|
| 318 |
+
context = "\n".join(dict.fromkeys(context.splitlines()))
|
| 319 |
+
|
| 320 |
+
# Truncate to stay within GPT-4o 128k context limit
|
| 321 |
+
context = truncate_context(context, 100000)
|
| 322 |
+
|
| 323 |
+
# ----------------------------------------------------------
|
| 324 |
# 🌐 Language-specific prompt logic
|
| 325 |
+
# ----------------------------------------------------------
|
| 326 |
if doc_lang == "hi":
|
| 327 |
# Hindi-language response
|
| 328 |
prompt = (
|
|
|
|
| 334 |
)
|
| 335 |
else:
|
| 336 |
# Default English prompts
|
| 337 |
+
prompt = (REASONING_PROMPT if reasoning_mode else STRICT_PROMPT).format(
|
| 338 |
+
context=context, query=query
|
| 339 |
+
)
|
| 340 |
|
| 341 |
+
# ----------------------------------------------------------
|
| 342 |
+
# 💬 System + user messages
|
| 343 |
+
# ----------------------------------------------------------
|
| 344 |
messages = [
|
| 345 |
+
{
|
| 346 |
+
"role": "system",
|
| 347 |
+
"content": (
|
| 348 |
+
"You are an expert enterprise documentation assistant. "
|
| 349 |
+
"When reasoning_mode is off, stay strictly factual and concise. "
|
| 350 |
+
"When reasoning_mode is on, combine insights across chunks logically "
|
| 351 |
+
"and explain briefly. "
|
| 352 |
+
"If the answer is not in the document, reply exactly: "
|
| 353 |
+
"'I don't know based on the provided document.'"
|
| 354 |
+
),
|
| 355 |
+
},
|
| 356 |
{"role": "user", "content": prompt},
|
| 357 |
]
|
| 358 |
|
| 359 |
+
# ----------------------------------------------------------
|
| 360 |
+
# 🧠 Generate answer safely
|
| 361 |
+
# ----------------------------------------------------------
|
| 362 |
try:
|
| 363 |
response = chat_llm_local.invoke(messages)
|
| 364 |
return response.content.strip()
|
|
|
|
| 367 |
return "⚠️ Error: Could not generate an answer."
|
| 368 |
|
| 369 |
|
|
|
|
| 370 |
# ==========================================================
|
| 371 |
# 9️⃣ Generic Text Generation Helper
|
| 372 |
# ==========================================================
|