feat: export LLM synthesis context to cache for debugging
Browse filesSave full LLM context to _cache/llm_context_TIMESTAMP.txt:
- System prompt
- User prompt with evidence
- Raw evidence items
- LLM's final answer
Helps debug why LLM fails despite having correct evidence.
Co-Authored-By: Claude <noreply@anthropic.com>
- src/agent/llm_client.py +47 -0
src/agent/llm_client.py
CHANGED
|
@@ -1107,6 +1107,44 @@ Examples of bad answers (too verbose):
|
|
| 1107 |
|
| 1108 |
Extract the factoid answer from the evidence above. Return only the factoid, nothing else."""
|
| 1109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1110 |
logger.info(f"[synthesize_answer_hf] Calling HuggingFace for answer synthesis")
|
| 1111 |
|
| 1112 |
messages = [
|
|
@@ -1137,6 +1175,15 @@ Extract the factoid answer from the evidence above. Return only the factoid, not
|
|
| 1137 |
answer = response.choices[0].message.content.strip()
|
| 1138 |
logger.info(f"[synthesize_answer_hf] Generated answer: {answer}")
|
| 1139 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1140 |
return answer
|
| 1141 |
|
| 1142 |
|
|
|
|
| 1107 |
|
| 1108 |
Extract the factoid answer from the evidence above. Return only the factoid, nothing else."""
|
| 1109 |
|
| 1110 |
+
# ============================================================================
|
| 1111 |
+
# SAVE LLM CONTEXT TO CACHE - For debugging and comparison
|
| 1112 |
+
# ============================================================================
|
| 1113 |
+
from pathlib import Path
|
| 1114 |
+
import datetime
|
| 1115 |
+
|
| 1116 |
+
cache_dir = Path("_cache")
|
| 1117 |
+
cache_dir.mkdir(exist_ok=True)
|
| 1118 |
+
|
| 1119 |
+
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 1120 |
+
context_file = cache_dir / f"llm_context_{timestamp}.txt"
|
| 1121 |
+
|
| 1122 |
+
with open(context_file, "w", encoding="utf-8") as f:
|
| 1123 |
+
f.write("=" * 80 + "\n")
|
| 1124 |
+
f.write("LLM SYNTHESIS CONTEXT\n")
|
| 1125 |
+
f.write("=" * 80 + "\n")
|
| 1126 |
+
f.write(f"Timestamp: {datetime.datetime.now().isoformat()}\n")
|
| 1127 |
+
f.write(f"Question: {question}\n")
|
| 1128 |
+
f.write(f"Evidence items: {len(evidence)}\n")
|
| 1129 |
+
f.write("\n" + "=" * 80 + "\n")
|
| 1130 |
+
f.write("SYSTEM PROMPT:\n")
|
| 1131 |
+
f.write("=" * 80 + "\n")
|
| 1132 |
+
f.write(system_prompt)
|
| 1133 |
+
f.write("\n" + "=" * 80 + "\n")
|
| 1134 |
+
f.write("USER PROMPT:\n")
|
| 1135 |
+
f.write("=" * 80 + "\n")
|
| 1136 |
+
f.write(user_prompt)
|
| 1137 |
+
f.write("\n" + "=" * 80 + "\n")
|
| 1138 |
+
f.write("EVIDENCE ITEMS:\n")
|
| 1139 |
+
f.write("=" * 80 + "\n")
|
| 1140 |
+
for i, ev in enumerate(evidence):
|
| 1141 |
+
f.write(f"\n--- Evidence {i+1}/{len(evidence)} ---\n")
|
| 1142 |
+
f.write(ev)
|
| 1143 |
+
f.write("\n" + "=" * 80 + "\n")
|
| 1144 |
+
|
| 1145 |
+
logger.info(f"[synthesize_answer_hf] LLM context saved to: {context_file}")
|
| 1146 |
+
# ============================================================================
|
| 1147 |
+
|
| 1148 |
logger.info(f"[synthesize_answer_hf] Calling HuggingFace for answer synthesis")
|
| 1149 |
|
| 1150 |
messages = [
|
|
|
|
| 1175 |
answer = response.choices[0].message.content.strip()
|
| 1176 |
logger.info(f"[synthesize_answer_hf] Generated answer: {answer}")
|
| 1177 |
|
| 1178 |
+
# Append answer to context file
|
| 1179 |
+
with open(context_file, "a", encoding="utf-8") as f:
|
| 1180 |
+
f.write("\n" + "=" * 80 + "\n")
|
| 1181 |
+
f.write("LLM ANSWER:\n")
|
| 1182 |
+
f.write("=" * 80 + "\n")
|
| 1183 |
+
f.write(answer)
|
| 1184 |
+
f.write("\n" + "=" * 80 + "\n")
|
| 1185 |
+
logger.info(f"[synthesize_answer_hf] Answer appended to context file")
|
| 1186 |
+
|
| 1187 |
return answer
|
| 1188 |
|
| 1189 |
|