MukulRay commited on
Commit
b2fc5a3
·
1 Parent(s): cd9075d

Phase 1.2: fix STALE-before-CONTRADICTED bug — both checks now run, CONTRADICTED wins when both fire

Browse files
Files changed (2) hide show
  1. CHANGELOG.md +9 -0
  2. src/agents/critic.py +35 -33
CHANGELOG.md CHANGED
@@ -2,6 +2,15 @@
2
 
3
  ## [Unreleased]
4
 
 
 
 
 
 
 
 
 
 
5
  ### Phase 0 — Branch Setup
6
  - Created v2-edge-reliability branch from main
7
  - Added CHANGELOG.md
 
2
 
3
  ## [Unreleased]
4
 
5
+ ### Phase 1 — Integrity + Critic Fix
6
+ - 1.1: Archived patch_contradiction.py to eval/archived/ with README
7
+ - 1.2: Fixed critic_node — STALE and CONTRADICTED checks now run in parallel
8
+ - Removed short-circuit: STALE no longer blocks CONTRADICTED
9
+ - When both fire, CONTRADICTED wins (richer signal)
10
+ - Removed **state spread from all return paths for consistency
11
+ - All 5 return paths now have identical key shape
12
+ - Added missing retry_count to FORCED_PASS path
13
+
14
  ### Phase 0 — Branch Setup
15
  - Created v2-edge-reliability branch from main
16
  - Added CHANGELOG.md
src/agents/critic.py CHANGED
@@ -157,10 +157,10 @@ def critic_node(state: ResearchState) -> ResearchState:
157
  if retry_count >= 2:
158
  logger.info("Critic: max retries reached, forcing PASS")
159
  return {
160
- **state,
161
  "critic_verdict": Verdict.FORCED_PASS,
162
  "critic_notes": "Max retries reached. Passing with available evidence.",
163
  "rewritten_questions": [],
 
164
  "calibration_bin": Verdict.FORCED_PASS,
165
  }
166
 
@@ -169,7 +169,6 @@ def critic_node(state: ResearchState) -> ResearchState:
169
  logger.info(f"Critic: insufficient papers ({len(papers)})")
170
  rewritten = _rewrite_questions(state.get("sub_questions") or [], "broaden")
171
  return {
172
- **state,
173
  "critic_verdict": Verdict.INSUFFICIENT,
174
  "critic_notes": f"Only {len(papers)} papers retrieved. Need at least 3.",
175
  "rewritten_questions": rewritten,
@@ -183,7 +182,6 @@ def critic_node(state: ResearchState) -> ResearchState:
183
  logger.info("Critic: insufficient high-score papers")
184
  rewritten = _rewrite_questions(state.get("sub_questions") or [], "broaden")
185
  return {
186
- **state,
187
  "critic_verdict": Verdict.INSUFFICIENT,
188
  "critic_notes": "Fewer than 3 papers with hybrid_score >= 0.40.",
189
  "rewritten_questions": rewritten,
@@ -191,41 +189,45 @@ def critic_node(state: ResearchState) -> ResearchState:
191
  "calibration_bin": Verdict.INSUFFICIENT,
192
  }
193
 
194
- # STALE evidence too old
195
  mean_age = _mean_age_months(papers)
196
- if mean_age > 24:
197
- logger.info(f"Critic: evidence is stale (mean age: {mean_age:.1f} months)")
198
- rewritten = _rewrite_questions(state.get("sub_questions") or [], "recent")
199
- return {
200
- **state,
201
- "critic_verdict": Verdict.STALE,
202
- "critic_notes": f"Mean paper age is {mean_age:.0f} months. Evidence may be outdated.",
203
- "rewritten_questions": rewritten,
204
- "retry_count": retry_count + 1,
205
- "calibration_bin": Verdict.STALE,
206
- }
207
 
208
- # CONTRADICTED — papers disagree
209
  contradictions = _detect_contradictions(papers)
210
- if contradictions:
211
- pa_title, pb_title, reason = contradictions[0]
212
- logger.info("Critic: contradiction detected")
213
- rewritten = _rewrite_questions(state.get("sub_questions") or [], "probe_contradiction")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
  return {
215
- **state,
216
- "critic_verdict": Verdict.CONTRADICTED,
217
- "critic_notes": f"Contradiction: '{pa_title[:50]}' vs '{pb_title[:50]}'. {reason}",
218
- "rewritten_questions": rewritten,
219
- "retry_count": retry_count + 1,
220
- "calibration_bin": Verdict.CONTRADICTED,
221
  }
222
 
223
- # PASS
224
- logger.info("Critic: evidence passes")
 
225
  return {
226
- **state,
227
- "critic_verdict": Verdict.PASS,
228
- "critic_notes": f"Evidence is recent (mean age: {mean_age:.0f} months), sufficient ({len(papers)} papers), no contradictions detected.",
229
- "rewritten_questions": [],
230
- "calibration_bin": Verdict.PASS,
231
  }
 
157
  if retry_count >= 2:
158
  logger.info("Critic: max retries reached, forcing PASS")
159
  return {
 
160
  "critic_verdict": Verdict.FORCED_PASS,
161
  "critic_notes": "Max retries reached. Passing with available evidence.",
162
  "rewritten_questions": [],
163
+ "retry_count": retry_count,
164
  "calibration_bin": Verdict.FORCED_PASS,
165
  }
166
 
 
169
  logger.info(f"Critic: insufficient papers ({len(papers)})")
170
  rewritten = _rewrite_questions(state.get("sub_questions") or [], "broaden")
171
  return {
 
172
  "critic_verdict": Verdict.INSUFFICIENT,
173
  "critic_notes": f"Only {len(papers)} papers retrieved. Need at least 3.",
174
  "rewritten_questions": rewritten,
 
182
  logger.info("Critic: insufficient high-score papers")
183
  rewritten = _rewrite_questions(state.get("sub_questions") or [], "broaden")
184
  return {
 
185
  "critic_verdict": Verdict.INSUFFICIENT,
186
  "critic_notes": "Fewer than 3 papers with hybrid_score >= 0.40.",
187
  "rewritten_questions": rewritten,
 
189
  "calibration_bin": Verdict.INSUFFICIENT,
190
  }
191
 
192
+ # --- Run STALE and CONTRADICTED checks in parallel (both always run) ---
193
  mean_age = _mean_age_months(papers)
194
+ is_stale = mean_age > 24
 
 
 
 
 
 
 
 
 
 
195
 
 
196
  contradictions = _detect_contradictions(papers)
197
+ is_contradicted = len(contradictions) > 0
198
+
199
+ # --- Combine signals: CONTRADICTED wins when both fire ---
200
+ if is_contradicted and is_stale:
201
+ verdict = Verdict.CONTRADICTED
202
+ contradiction_details = "; ".join(f"'{c[0]}' vs '{c[1]}': {c[2]}" for c in contradictions)
203
+ notes = f"CONTRADICTED (also stale, mean age {mean_age:.0f} months). Contradictions found: {contradiction_details}"
204
+ strategy = "probe_contradiction"
205
+ elif is_contradicted:
206
+ verdict = Verdict.CONTRADICTED
207
+ contradiction_details = "; ".join(f"'{c[0]}' vs '{c[1]}': {c[2]}" for c in contradictions)
208
+ notes = f"Contradictions found: {contradiction_details}"
209
+ strategy = "probe_contradiction"
210
+ elif is_stale:
211
+ verdict = Verdict.STALE
212
+ notes = f"Evidence is stale (mean age {mean_age:.0f} months > 24 month threshold)"
213
+ strategy = "recent"
214
+ else:
215
+ # PASS — all checks clear
216
  return {
217
+ "critic_verdict": Verdict.PASS,
218
+ "critic_notes": f"Evidence passes all checks (mean age {mean_age:.0f} months, {len(papers)} papers, no contradictions detected)",
219
+ "retry_count": retry_count,
220
+ "rewritten_questions": [],
221
+ "calibration_bin": Verdict.PASS,
 
222
  }
223
 
224
+ # --- Non-PASS path: rewrite questions and return ---
225
+ sub_questions = state.get("sub_questions") or []
226
+ rewritten = _rewrite_questions(sub_questions, strategy)
227
  return {
228
+ "critic_verdict": verdict,
229
+ "critic_notes": notes,
230
+ "rewritten_questions": rewritten,
231
+ "retry_count": retry_count + 1,
232
+ "calibration_bin": verdict,
233
  }