import json import re from typing import Any, Dict, Optional, List # If prompts.py doesn't exist, keep a safe fallback try: from prompts import ALLOWED_LABELS # type: ignore except Exception: ALLOWED_LABELS = [ "none", "faulty generalization", "false causality", "circular reasoning", "ad populum", "ad hominem", "fallacy of logic", "appeal to emotion", "false dilemma", "equivocation", "fallacy of extension", "fallacy of relevance", "fallacy of credibility", "miscellaneous", "intentional" ] # ---------------------------- # Robust JSON extraction # ---------------------------- def stop_at_complete_json(text: str) -> Optional[str]: start = text.find("{") if start == -1: return None depth = 0 in_str = False esc = False for i in range(start, len(text)): ch = text[i] if in_str: if esc: esc = False elif ch == "\\": esc = True elif ch == '"': in_str = False continue if ch == '"': in_str = True continue if ch == "{": depth += 1 elif ch == "}": depth -= 1 if depth == 0: return text[start : i + 1] return None def extract_first_json_obj(s: str) -> Optional[Dict[str, Any]]: cut = stop_at_complete_json(s) or s start = cut.find("{") end = cut.rfind("}") if start == -1 or end == -1 or end <= start: return None cand = cut[start : end + 1].strip() try: return json.loads(cand) except Exception: return None # ---------------------------- # Extra robustness: remove stray unquoted fields (e.g., `confidence: 0.75`) # that sometimes appear outside JSON strings due to generation glitches. # ---------------------------- def _remove_unquoted_confidence_field(json_text: str) -> str: """ Removes an unquoted trailing field like `confidence: 0.75` that appears outside strings in otherwise-valid JSON output. This is a targeted fix for common LLM glitches and intentionally conservative (only triggers when we are NOT inside a quoted string). """ out_chars: List[str] = [] i = 0 in_str = False esc = False def _pop_trailing_ws_and_optional_comma(): # remove trailing whitespace while out_chars and out_chars[-1].isspace(): out_chars.pop() # remove trailing comma (and whitespace before it) if out_chars and out_chars[-1] == ",": out_chars.pop() while out_chars and out_chars[-1].isspace(): out_chars.pop() while i < len(json_text): ch = json_text[i] if in_str: out_chars.append(ch) if esc: esc = False elif ch == "\\": # escape esc = True elif ch == '"': in_str = False i += 1 continue if ch == '"': in_str = True out_chars.append(ch) i += 1 continue # Detect an unquoted `confidence: ` outside strings. # Only remove if followed by a number and then a delimiter (`,` or `}`). if json_text.startswith("confidence", i): j = i + len("confidence") while j < len(json_text) and json_text[j].isspace(): j += 1 if j < len(json_text) and json_text[j] == ":": j += 1 while j < len(json_text) and json_text[j].isspace(): j += 1 # parse a simple number if j < len(json_text) and json_text[j] in "+-": j += 1 has_digit = False while j < len(json_text) and json_text[j].isdigit(): has_digit = True j += 1 if j < len(json_text) and json_text[j] == ".": j += 1 while j < len(json_text) and json_text[j].isdigit(): has_digit = True j += 1 if has_digit: k = j while k < len(json_text) and json_text[k].isspace(): k += 1 if k < len(json_text) and json_text[k] in {",", "}"}: _pop_trailing_ws_and_optional_comma() i = k # keep delimiter continue out_chars.append(ch) i += 1 return "".join(out_chars) def extract_json_obj_robust(s: str) -> Optional[Dict[str, Any]]: """ Extract and parse the first JSON object from a model output string. - Cuts at the first complete `{...}` (brace-balanced while respecting strings). - Repairs a common glitch: an unquoted trailing `confidence: `. - Returns a dict if parsing succeeds, else None. """ cut = stop_at_complete_json(s) or s start = cut.find("{") end = cut.rfind("}") if start == -1 or end == -1 or end <= start: return None cand = cut[start : end + 1].strip() cand = cand.replace("```json", "").replace("```", "").strip() cand = _remove_unquoted_confidence_field(cand) try: return json.loads(cand) except Exception: return None # ---------------------------- # Post-processing: remove template sentence # ---------------------------- _TEMPLATE_RE = re.compile( r"\bthe input contains fallacious reasoning consistent with the predicted type\(s\)\b\.?", flags=re.IGNORECASE, ) def strip_template_sentence(text: str) -> str: if not isinstance(text, str): return "" out = _TEMPLATE_RE.sub("", text) out = out.replace("..", ".").strip() out = re.sub(r"\s{2,}", " ", out) out = re.sub(r"^\s*[\-–—:;\.\s]+", "", out).strip() return out # ---------------------------- # Output sanitation / validation # ---------------------------- def _clamp01(x: Any, default: float = 0.5) -> float: try: v = float(x) except Exception: return default return 0.0 if v < 0.0 else (1.0 if v > 1.0 else v) def _is_allowed_label(lbl: Any) -> bool: return isinstance(lbl, str) and lbl in ALLOWED_LABELS and lbl != "none" def sanitize_analyze_output(obj: Dict[str, Any], input_text: str) -> Dict[str, Any]: has_fallacy = bool(obj.get("has_fallacy", False)) fallacies_in = obj.get("fallacies", []) if not isinstance(fallacies_in, list): fallacies_in = [] fallacies_out = [] for f in fallacies_in: if not isinstance(f, dict): continue f_type = f.get("type") if not _is_allowed_label(f_type): continue conf = _clamp01(f.get("confidence", 0.5)) conf = float(f"{conf:.2f}") ev = f.get("evidence_quotes", []) if not isinstance(ev, list): ev = [] ev_clean: List[str] = [] for q in ev: if not isinstance(q, str): continue qq = q.strip() if not qq: continue if qq in input_text: if len(qq) <= 240: ev_clean.append(qq) else: short = qq[:240] ev_clean.append(short if short in input_text else qq) rationale = strip_template_sentence(str(f.get("rationale", "")).strip()) fallacies_out.append( { "type": f_type, "confidence": conf, "evidence_quotes": ev_clean[:3], "rationale": rationale, } ) overall = strip_template_sentence(str(obj.get("overall_explanation", "")).strip()) if len(fallacies_out) == 0: has_fallacy = False return { "has_fallacy": has_fallacy, "fallacies": fallacies_out, "overall_explanation": overall, } # ---------------------------- # Replace helpers # ---------------------------- def occurrence_index(text: str, sub: str, occurrence: int) -> int: if occurrence < 0: return -1 start = 0 for _ in range(occurrence + 1): idx = text.find(sub, start) if idx == -1: return -1 start = idx + max(1, len(sub)) return idx def replace_nth(text: str, old: str, new: str, occurrence: int) -> Dict[str, Any]: idx = occurrence_index(text, old, occurrence) if idx == -1: return {"ok": False, "error": "quote_not_found"} return { "ok": True, "rewritten_text": text[:idx] + new + text[idx + len(old) :], "start_char": idx, "end_char": idx + len(new), "old_start_char": idx, "old_end_char": idx + len(old), }