Update src/qa.py
Browse files
src/qa.py
CHANGED
|
@@ -182,34 +182,35 @@ def cache_embeddings(file_name: str, chunks, embed_func, chunk_size: int = None,
|
|
| 182 |
return embeddings
|
| 183 |
|
| 184 |
# ==========================================================
|
| 185 |
-
# 6️⃣ Prompt Templates (
|
| 186 |
# ==========================================================
|
| 187 |
-
|
| 188 |
STRICT_PROMPT = (
|
| 189 |
"You are an enterprise documentation assistant.\n"
|
| 190 |
"Use all relevant information from the CONTEXT below.\n"
|
| 191 |
-
"
|
| 192 |
-
"
|
| 193 |
-
"
|
|
|
|
|
|
|
| 194 |
"If the answer cannot be found even after considering all chunks, say exactly:\n"
|
| 195 |
"'I don't know based on the provided document.'\n\n"
|
| 196 |
"Context:\n{context}\n\nQuestion: {query}\nAnswer:"
|
| 197 |
-
|
| 198 |
)
|
| 199 |
|
| 200 |
-
|
| 201 |
REASONING_PROMPT = (
|
| 202 |
"You are an expert enterprise assistant capable of reasoning.\n"
|
| 203 |
"Think step by step and synthesize information even if scattered across chunks.\n"
|
| 204 |
"Base your answer primarily on the CONTEXT, but if multiple partial clues exist, combine them logically.\n"
|
| 205 |
"You may fill reasonable gaps with general knowledge to form a complete answer.\n"
|
|
|
|
|
|
|
|
|
|
| 206 |
"If absolutely nothing in the document relates, say exactly:\n"
|
| 207 |
"'I don't know based on the provided document.'\n\n"
|
| 208 |
"Context:\n{context}\n\nQuestion: {query}\nLet's reason step-by-step:\nAnswer:"
|
| 209 |
)
|
| 210 |
|
| 211 |
|
| 212 |
-
|
| 213 |
# ==========================================================
|
| 214 |
# 7️⃣ Retrieval — FAISS + Bullet-Aware Re-rank + Neighbor Fill
|
| 215 |
# ==========================================================
|
|
|
|
| 182 |
return embeddings
|
| 183 |
|
| 184 |
# ==========================================================
|
| 185 |
+
# 6️⃣ Prompt Templates (update to forbid internal metadata)
|
| 186 |
# ==========================================================
|
|
|
|
| 187 |
STRICT_PROMPT = (
|
| 188 |
"You are an enterprise documentation assistant.\n"
|
| 189 |
"Use all relevant information from the CONTEXT below.\n"
|
| 190 |
+
"If multiple related points appear across chunks, combine them into one clear answer.\n"
|
| 191 |
+
"Keep the answer concise but complete. Do not invent facts outside the provided content.\n"
|
| 192 |
+
"Do NOT mention any internal retrieval metadata, chunk identifiers, vector IDs, or system-level labels "
|
| 193 |
+
"(for example, do NOT say 'Chunk 7' or 'chunk 3:'). If you need to refer to the document, say "
|
| 194 |
+
"'the document' or quote the excerpt directly.\n"
|
| 195 |
"If the answer cannot be found even after considering all chunks, say exactly:\n"
|
| 196 |
"'I don't know based on the provided document.'\n\n"
|
| 197 |
"Context:\n{context}\n\nQuestion: {query}\nAnswer:"
|
|
|
|
| 198 |
)
|
| 199 |
|
|
|
|
| 200 |
REASONING_PROMPT = (
|
| 201 |
"You are an expert enterprise assistant capable of reasoning.\n"
|
| 202 |
"Think step by step and synthesize information even if scattered across chunks.\n"
|
| 203 |
"Base your answer primarily on the CONTEXT, but if multiple partial clues exist, combine them logically.\n"
|
| 204 |
"You may fill reasonable gaps with general knowledge to form a complete answer.\n"
|
| 205 |
+
"Do NOT mention any internal retrieval metadata, chunk identifiers, vector IDs, or system-level labels "
|
| 206 |
+
"(for example, do NOT say 'Chunk 7' or 'chunk 3:'). If you need to reference source text, say 'the document' "
|
| 207 |
+
"or provide a short quoted excerpt.\n"
|
| 208 |
"If absolutely nothing in the document relates, say exactly:\n"
|
| 209 |
"'I don't know based on the provided document.'\n\n"
|
| 210 |
"Context:\n{context}\n\nQuestion: {query}\nLet's reason step-by-step:\nAnswer:"
|
| 211 |
)
|
| 212 |
|
| 213 |
|
|
|
|
| 214 |
# ==========================================================
|
| 215 |
# 7️⃣ Retrieval — FAISS + Bullet-Aware Re-rank + Neighbor Fill
|
| 216 |
# ==========================================================
|