Update src/qa.py
Browse files
src/qa.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
"""
|
| 2 |
-
qa.py β GPT-4o (SAP Gen AI Hub) + ReRank Retrieval
|
| 3 |
--------------------------------------------------
|
| 4 |
β
Semantic retrieval (FAISS + cosine re-rank + neighbor fill)
|
| 5 |
β
Bullet-aware similarity boost for procedural chunks
|
|
@@ -7,7 +7,7 @@ qa.py β GPT-4o (SAP Gen AI Hub) + ReRank Retrieval + PRF Query Expansion
|
|
| 7 |
β
Smart factual mode (fast)
|
| 8 |
β
Deep reasoning mode (ChatGPT-like)
|
| 9 |
β
genai_generate() helper for suggestions
|
| 10 |
-
β
|
| 11 |
"""
|
| 12 |
|
| 13 |
import os
|
|
@@ -16,13 +16,12 @@ import json
|
|
| 16 |
import pickle
|
| 17 |
import hashlib
|
| 18 |
import numpy as np
|
| 19 |
-
from collections import Counter
|
| 20 |
from sentence_transformers import SentenceTransformer
|
| 21 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 22 |
from gen_ai_hub.proxy.core.proxy_clients import get_proxy_client
|
| 23 |
from gen_ai_hub.proxy.langchain.openai import ChatOpenAI
|
| 24 |
|
| 25 |
-
print("β
qa.py (GPT-4o via Gen AI Hub + Bullet-Aware Retrieval +
|
| 26 |
|
| 27 |
# ==========================================================
|
| 28 |
# π§± Permanent Embeddings Cache Directory
|
|
@@ -58,14 +57,17 @@ os.environ.update({
|
|
| 58 |
# 2οΈβ£ Embedding Model (E5-small-v2)
|
| 59 |
# ==========================================================
|
| 60 |
try:
|
| 61 |
-
_query_model = SentenceTransformer(
|
|
|
|
|
|
|
|
|
|
| 62 |
print("β
Loaded embedding model: intfloat/e5-small-v2 (fast mode)")
|
| 63 |
except Exception as e:
|
| 64 |
print(f"β οΈ Embedding load failed ({e}), using MiniLM fallback")
|
| 65 |
_query_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", cache_folder=CACHE_DIR)
|
| 66 |
|
| 67 |
# ==========================================================
|
| 68 |
-
# 3οΈβ£ GPT-4o via SAP Gen AI Hub β Lazy
|
| 69 |
# ==========================================================
|
| 70 |
CRED_PATH = os.path.join(os.path.dirname(__file__), "GEN AI HUB PROXY.json")
|
| 71 |
_chat_llm = None
|
|
@@ -101,8 +103,9 @@ def get_chat_llm(model_name: str = "gpt-4o", temperature: float = 0.3, max_token
|
|
| 101 |
_chat_llm = None
|
| 102 |
raise
|
| 103 |
|
|
|
|
| 104 |
# ==========================================================
|
| 105 |
-
# 4οΈβ£ Embedding Generator (
|
| 106 |
# ==========================================================
|
| 107 |
def embed_chunks(chunks, batch_size: int = 32):
|
| 108 |
if not chunks:
|
|
@@ -162,8 +165,9 @@ def cache_embeddings(file_name: str, chunks, embed_func, chunk_size: int = None,
|
|
| 162 |
_clean_old_caches(base_name, keep_latest=5)
|
| 163 |
return embeddings
|
| 164 |
|
|
|
|
| 165 |
# ==========================================================
|
| 166 |
-
# 6οΈβ£ Prompt Templates
|
| 167 |
# ==========================================================
|
| 168 |
STRICT_PROMPT = (
|
| 169 |
"You are an enterprise documentation assistant.\n"
|
|
@@ -172,8 +176,8 @@ STRICT_PROMPT = (
|
|
| 172 |
"When the answer focuses on a single concept, definition, or explanation, write it as a clear and compact paragraph.\n"
|
| 173 |
"Keep the tone professional and concise. Do not invent facts outside the provided content.\n"
|
| 174 |
"Do not mention or refer to internal elements such as 'chunks', 'chunk numbers', 'passages', or 'sections of the document'.\n"
|
| 175 |
-
"If
|
| 176 |
-
"If nothing
|
| 177 |
"'I don't know based on the provided document.'\n\n"
|
| 178 |
"Context:\n{context}\n\nQuestion: {query}\nAnswer:"
|
| 179 |
)
|
|
@@ -183,36 +187,12 @@ REASONING_PROMPT = (
|
|
| 183 |
"Think step by step and synthesize information even if scattered across chunks.\n"
|
| 184 |
"Base your answer primarily on the CONTEXT, but if multiple partial clues exist, combine them logically.\n"
|
| 185 |
"You may fill reasonable gaps with general knowledge to form a complete answer.\n"
|
| 186 |
-
"Do not mention or refer to internal elements such as 'chunks', 'chunk numbers',
|
| 187 |
"If absolutely nothing in the document relates, say exactly:\n"
|
| 188 |
"'I don't know based on the provided document.'\n\n"
|
| 189 |
"Context:\n{context}\n\nQuestion: {query}\nLet's reason step-by-step:\nAnswer:"
|
| 190 |
)
|
| 191 |
|
| 192 |
-
# ==========================================================
|
| 193 |
-
# πΉ NEW: Lightweight PRF Query Expansion
|
| 194 |
-
# ==========================================================
|
| 195 |
-
def expand_query_embedding(query, model, index, chunks, topN=40, alpha=0.75):
|
| 196 |
-
"""
|
| 197 |
-
Expands the query embedding slightly using top candidate chunks (PRF-style).
|
| 198 |
-
Helps when query wording differs from document phrasing.
|
| 199 |
-
"""
|
| 200 |
-
try:
|
| 201 |
-
q_emb = model.encode([f"query: {query}"], convert_to_numpy=True, normalize_embeddings=True)[0]
|
| 202 |
-
D, I = index.search(np.array([q_emb]).astype("float32"), topN)
|
| 203 |
-
texts = " ".join(chunks[i] for i in I[0] if i >= 0)
|
| 204 |
-
words = re.findall(r"[A-Za-z]{4,}", texts)
|
| 205 |
-
common = [w for w, _ in Counter(words).most_common(6) if w.lower() not in query.lower()]
|
| 206 |
-
if not common:
|
| 207 |
-
return q_emb
|
| 208 |
-
e_emb = model.encode([f"passage: {' '.join(common)}"], convert_to_numpy=True, normalize_embeddings=True)[0]
|
| 209 |
-
combined = alpha * q_emb + (1 - alpha) * e_emb
|
| 210 |
-
combined /= np.linalg.norm(combined)
|
| 211 |
-
print(f"π Query expanded with: {common}")
|
| 212 |
-
return combined
|
| 213 |
-
except Exception as e:
|
| 214 |
-
print(f"β οΈ Query expansion skipped due to error: {e}")
|
| 215 |
-
return q_emb
|
| 216 |
|
| 217 |
# ==========================================================
|
| 218 |
# 7οΈβ£ Retrieval β FAISS + Bullet-Aware Re-rank + Neighbor Fill
|
|
@@ -227,8 +207,11 @@ def retrieve_chunks(query: str, index, chunks: list, top_k: int = 7,
|
|
| 227 |
return []
|
| 228 |
|
| 229 |
try:
|
| 230 |
-
|
| 231 |
-
|
|
|
|
|
|
|
|
|
|
| 232 |
|
| 233 |
if hasattr(index, "d") and q_emb.shape[0] != index.d:
|
| 234 |
print(f"β οΈ FAISS dimension mismatch: index={index.d}, query={q_emb.shape[0]}")
|
|
@@ -248,6 +231,7 @@ def retrieve_chunks(query: str, index, chunks: list, top_k: int = 7,
|
|
| 248 |
normalize_embeddings=True,
|
| 249 |
)
|
| 250 |
sims = cosine_similarity([q_emb], doc_embs)[0]
|
|
|
|
| 251 |
boosted_sims = []
|
| 252 |
for idx, sim in zip(candidate_indices, sims):
|
| 253 |
text = chunks[idx].strip()
|
|
@@ -276,6 +260,7 @@ def retrieve_chunks(query: str, index, chunks: list, top_k: int = 7,
|
|
| 276 |
print(f"β οΈ Retrieval error: {repr(e)}")
|
| 277 |
return []
|
| 278 |
|
|
|
|
| 279 |
# ==========================================================
|
| 280 |
# 8οΈβ£ Answer Generation
|
| 281 |
# ==========================================================
|
|
@@ -301,6 +286,7 @@ def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = F
|
|
| 301 |
"'I don't know based on the provided document.'"},
|
| 302 |
{"role": "user", "content": prompt},
|
| 303 |
]
|
|
|
|
| 304 |
try:
|
| 305 |
response = chat_llm_local.invoke(messages)
|
| 306 |
return response.content.strip()
|
|
@@ -308,6 +294,7 @@ def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = F
|
|
| 308 |
print(f"β οΈ GPT-4o generation failed: {e}")
|
| 309 |
return "β οΈ Error: Could not generate an answer."
|
| 310 |
|
|
|
|
| 311 |
# ==========================================================
|
| 312 |
# 9οΈβ£ Generic Text Generation Helper
|
| 313 |
# ==========================================================
|
|
@@ -329,6 +316,7 @@ def genai_generate(prompt: str) -> str:
|
|
| 329 |
print(f"β οΈ genai_generate() failed: {e}")
|
| 330 |
return "β οΈ Unable to generate response."
|
| 331 |
|
|
|
|
| 332 |
# ==========================================================
|
| 333 |
# π Local Test
|
| 334 |
# ==========================================================
|
|
@@ -344,6 +332,7 @@ if __name__ == "__main__":
|
|
| 344 |
|
| 345 |
embeddings = embed_chunks(dummy_chunks)
|
| 346 |
index = build_faiss_index(embeddings)
|
|
|
|
| 347 |
query = "What are the prerequisites for commerce automation?"
|
| 348 |
retrieved = retrieve_chunks(query, index, dummy_chunks)
|
| 349 |
print("π Retrieved:", retrieved)
|
|
|
|
| 1 |
"""
|
| 2 |
+
qa.py β GPT-4o (SAP Gen AI Hub) + ReRank Retrieval (Stable Strict)
|
| 3 |
--------------------------------------------------
|
| 4 |
β
Semantic retrieval (FAISS + cosine re-rank + neighbor fill)
|
| 5 |
β
Bullet-aware similarity boost for procedural chunks
|
|
|
|
| 7 |
β
Smart factual mode (fast)
|
| 8 |
β
Deep reasoning mode (ChatGPT-like)
|
| 9 |
β
genai_generate() helper for suggestions
|
| 10 |
+
β
Slightly softened Strict Prompt for better partial-context answers
|
| 11 |
"""
|
| 12 |
|
| 13 |
import os
|
|
|
|
| 16 |
import pickle
|
| 17 |
import hashlib
|
| 18 |
import numpy as np
|
|
|
|
| 19 |
from sentence_transformers import SentenceTransformer
|
| 20 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 21 |
from gen_ai_hub.proxy.core.proxy_clients import get_proxy_client
|
| 22 |
from gen_ai_hub.proxy.langchain.openai import ChatOpenAI
|
| 23 |
|
| 24 |
+
print("β
qa.py (GPT-4o via Gen AI Hub + Bullet-Aware Retrieval + Cache) loaded from:", __file__)
|
| 25 |
|
| 26 |
# ==========================================================
|
| 27 |
# π§± Permanent Embeddings Cache Directory
|
|
|
|
| 57 |
# 2οΈβ£ Embedding Model (E5-small-v2)
|
| 58 |
# ==========================================================
|
| 59 |
try:
|
| 60 |
+
_query_model = SentenceTransformer(
|
| 61 |
+
"intfloat/e5-small-v2",
|
| 62 |
+
cache_folder=CACHE_DIR
|
| 63 |
+
)
|
| 64 |
print("β
Loaded embedding model: intfloat/e5-small-v2 (fast mode)")
|
| 65 |
except Exception as e:
|
| 66 |
print(f"β οΈ Embedding load failed ({e}), using MiniLM fallback")
|
| 67 |
_query_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", cache_folder=CACHE_DIR)
|
| 68 |
|
| 69 |
# ==========================================================
|
| 70 |
+
# 3οΈβ£ GPT-4o via SAP Gen AI Hub β Lazy Initialization
|
| 71 |
# ==========================================================
|
| 72 |
CRED_PATH = os.path.join(os.path.dirname(__file__), "GEN AI HUB PROXY.json")
|
| 73 |
_chat_llm = None
|
|
|
|
| 103 |
_chat_llm = None
|
| 104 |
raise
|
| 105 |
|
| 106 |
+
|
| 107 |
# ==========================================================
|
| 108 |
+
# 4οΈβ£ Embedding Generator (Batch-Optimized)
|
| 109 |
# ==========================================================
|
| 110 |
def embed_chunks(chunks, batch_size: int = 32):
|
| 111 |
if not chunks:
|
|
|
|
| 165 |
_clean_old_caches(base_name, keep_latest=5)
|
| 166 |
return embeddings
|
| 167 |
|
| 168 |
+
|
| 169 |
# ==========================================================
|
| 170 |
+
# 6οΈβ£ Prompt Templates (Improved Strict)
|
| 171 |
# ==========================================================
|
| 172 |
STRICT_PROMPT = (
|
| 173 |
"You are an enterprise documentation assistant.\n"
|
|
|
|
| 176 |
"When the answer focuses on a single concept, definition, or explanation, write it as a clear and compact paragraph.\n"
|
| 177 |
"Keep the tone professional and concise. Do not invent facts outside the provided content.\n"
|
| 178 |
"Do not mention or refer to internal elements such as 'chunks', 'chunk numbers', 'passages', or 'sections of the document'.\n"
|
| 179 |
+
"If only partial or indirect clues are available (e.g., related words, hints, or contextual evidence), infer cautiously and start the response with 'Based on the available information,'.\n"
|
| 180 |
+
"If absolutely nothing in the CONTEXT relates to the question, reply exactly:\n"
|
| 181 |
"'I don't know based on the provided document.'\n\n"
|
| 182 |
"Context:\n{context}\n\nQuestion: {query}\nAnswer:"
|
| 183 |
)
|
|
|
|
| 187 |
"Think step by step and synthesize information even if scattered across chunks.\n"
|
| 188 |
"Base your answer primarily on the CONTEXT, but if multiple partial clues exist, combine them logically.\n"
|
| 189 |
"You may fill reasonable gaps with general knowledge to form a complete answer.\n"
|
| 190 |
+
"Do not mention or refer to internal elements such as 'chunks', 'chunk numbers', or 'sections of the document'.\n"
|
| 191 |
"If absolutely nothing in the document relates, say exactly:\n"
|
| 192 |
"'I don't know based on the provided document.'\n\n"
|
| 193 |
"Context:\n{context}\n\nQuestion: {query}\nLet's reason step-by-step:\nAnswer:"
|
| 194 |
)
|
| 195 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
|
| 197 |
# ==========================================================
|
| 198 |
# 7οΈβ£ Retrieval β FAISS + Bullet-Aware Re-rank + Neighbor Fill
|
|
|
|
| 207 |
return []
|
| 208 |
|
| 209 |
try:
|
| 210 |
+
q_emb = _query_model.encode(
|
| 211 |
+
[f"query: {query.strip()}"],
|
| 212 |
+
convert_to_numpy=True,
|
| 213 |
+
normalize_embeddings=True
|
| 214 |
+
)[0]
|
| 215 |
|
| 216 |
if hasattr(index, "d") and q_emb.shape[0] != index.d:
|
| 217 |
print(f"β οΈ FAISS dimension mismatch: index={index.d}, query={q_emb.shape[0]}")
|
|
|
|
| 231 |
normalize_embeddings=True,
|
| 232 |
)
|
| 233 |
sims = cosine_similarity([q_emb], doc_embs)[0]
|
| 234 |
+
|
| 235 |
boosted_sims = []
|
| 236 |
for idx, sim in zip(candidate_indices, sims):
|
| 237 |
text = chunks[idx].strip()
|
|
|
|
| 260 |
print(f"β οΈ Retrieval error: {repr(e)}")
|
| 261 |
return []
|
| 262 |
|
| 263 |
+
|
| 264 |
# ==========================================================
|
| 265 |
# 8οΈβ£ Answer Generation
|
| 266 |
# ==========================================================
|
|
|
|
| 286 |
"'I don't know based on the provided document.'"},
|
| 287 |
{"role": "user", "content": prompt},
|
| 288 |
]
|
| 289 |
+
|
| 290 |
try:
|
| 291 |
response = chat_llm_local.invoke(messages)
|
| 292 |
return response.content.strip()
|
|
|
|
| 294 |
print(f"β οΈ GPT-4o generation failed: {e}")
|
| 295 |
return "β οΈ Error: Could not generate an answer."
|
| 296 |
|
| 297 |
+
|
| 298 |
# ==========================================================
|
| 299 |
# 9οΈβ£ Generic Text Generation Helper
|
| 300 |
# ==========================================================
|
|
|
|
| 316 |
print(f"β οΈ genai_generate() failed: {e}")
|
| 317 |
return "β οΈ Unable to generate response."
|
| 318 |
|
| 319 |
+
|
| 320 |
# ==========================================================
|
| 321 |
# π Local Test
|
| 322 |
# ==========================================================
|
|
|
|
| 332 |
|
| 333 |
embeddings = embed_chunks(dummy_chunks)
|
| 334 |
index = build_faiss_index(embeddings)
|
| 335 |
+
|
| 336 |
query = "What are the prerequisites for commerce automation?"
|
| 337 |
retrieved = retrieve_chunks(query, index, dummy_chunks)
|
| 338 |
print("π Retrieved:", retrieved)
|