""" debug_combative.py Run a fixed conversation through every combative stage and save outputs. Usage: python debug_combative.py # Diogenes, all scenarios python debug_combative.py nietzsche # Nietzsche, all scenarios python debug_combative.py diogenes nietzsche # both, all scenarios python debug_combative.py diogenes:kindness # Diogenes, one scenario python debug_combative.py nietzsche:equality # Nietzsche, one scenario Results are saved to debug_combative__.json and printed to stdout. """ import json import os import sys from datetime import datetime # Force flat mode locally if no Anthropic key is present import config # noqa: F401 — ensures load_dotenv() runs first if not os.environ.get("ANTHROPIC_API_KEY"): import llm_client llm_client._llm_mode.set("flat") from dialogue_combative_helper import ( call_opening_strike_llm, call_press_llm, call_name_contradiction_llm, call_force_choice_llm, call_escalate_llm, call_verdict_llm, call_combative_close_llm, ) # --------------------------------------------------------------------------- # Test scenarios # Each scenario: initial_msg + 6 user replies # [0] B trigger — defend with new framing # [1] C trigger — reveal a contradiction # [2] D trigger — commit to a position # [3] E1 trigger — hold ground # [4] E2 trigger — continue holding # [5] G trigger — react to verdict # --------------------------------------------------------------------------- SCENARIOS = { "kindness": { "label": "Kindness vs Honesty", "initial": ( "I believe being kind to everyone is always the right thing to do, " "even if it means not saying what you really think." ), "replies": [ "Honesty without compassion is just cruelty. You have to choose the right moment and the right words.", "Sometimes protecting someone's feelings is more important than a hard truth they're not ready to hear.", "There's a difference between choosing not to speak and lying. Silence is not deception.", "I stand by it. Some truths do more harm than good when delivered to the wrong person at the wrong time.", "I still think the timing and the relationship matter more than the raw fact.", "Fair enough. I see your point.", ], }, "wealth": { "label": "Wealth and Freedom", "initial": ( "I work hard to build a successful career and earn a good salary. " "Financial security gives me freedom and respect." ), "replies": [ "Money gives me the ability to choose how I live. Without it I have no real freedom.", "I don't work for money itself — I work for what it represents: stability and recognition from others.", "There's a difference between being wealthy and being greedy. I'm building something meaningful, not just accumulating.", "I stand by it. Financial security removes anxiety and lets me focus on what actually matters to me.", "I still think security is the foundation for everything else, including living virtuously.", "Fair point. Maybe I conflate freedom with comfort.", ], }, "equality": { "label": "Equality and Rights", "initial": ( "I believe all people are equal and deserve the same rights and opportunities " "regardless of their natural abilities or achievements." ), "replies": [ # B: defend with data "Equality is the foundation of a just society. Without it we just have the law of the strongest.", # C: crack — equal rights AND merit-based reward are incompatible under same 'deserve' "I believe in equal rights, but I also think people who work harder and show more discipline deserve better outcomes. That is only fair.", # D: forced to pick one "Rights should be equal even if outcomes aren't — that's what fairness means to me.", # E1 "I stand by equal rights as a principle, regardless of ability or what someone has achieved.", # E2 "I still think the alternative — a society ordered by hierarchy of ability — leads to oppression and cruelty.", # G "I see your point, but I still think equality is the less dangerous principle to build on.", ], }, } # Which scenarios each character should run by default CHARACTER_SCENARIOS = { "diogenes": ["kindness", "wealth"], "nietzsche": ["kindness", "equality"], } def run_scenario(character_id: str, scenario_key: str) -> dict: scenario = SCENARIOS[scenario_key] initial_msg = scenario["initial"] user_replies = scenario["replies"] results = { "character": character_id, "scenario": scenario_key, "label": scenario["label"], "timestamp": datetime.now().isoformat(), "stages": {}, } history = [] def record(stage: str, user_in: str, reply: str, extra: dict = None): entry = {"user": user_in, "reply": reply} if extra: entry.update(extra) results["stages"][stage] = entry print(f"\n{'='*60}") print(f" STAGE {stage} [{character_id} / {scenario_key}]") print(f" USER : {user_in}") print(f" REPLY: {reply}") if extra: print(f" META : {extra}") history.append({"role": "user", "content": user_in}) history.append({"role": "assistant", "content": reply}) # Stage A reply_a = call_opening_strike_llm( initial_msg, history, character_id=character_id ) record("A", initial_msg, reply_a) # Stage B result_b = call_press_llm( user_replies[0], history, socratic_topic=initial_msg, character_id=character_id, ) record("B", user_replies[0], result_b.get("reply", ""), {"contradiction_visible": result_b.get("contradiction_visible"), "position_strength": result_b.get("user_position_strength")}) # Stage C reply_c = call_name_contradiction_llm( user_replies[1], history, socratic_topic=initial_msg, character_id=character_id, ) record("C", user_replies[1], reply_c) # Stage D reply_d = call_force_choice_llm( user_replies[2], history, socratic_topic=initial_msg, character_id=character_id, ) record("D", user_replies[2], reply_d) # Stage E — two turns for i, turn_msg in enumerate(user_replies[3:5], start=1): result_e = call_escalate_llm( turn_msg, history, socratic_topic=initial_msg, e_turn_count=i, character_id=character_id, ) record(f"E{i}", turn_msg, result_e.get("reply", ""), {"holding_ground": result_e.get("user_holding_ground"), "ready_for_verdict": result_e.get("ready_for_verdict")}) # Stage F — verdict mock_dialogue = {"socratic_topic": initial_msg, "e_turn_count": 2} result_f = call_verdict_llm( user_replies[4], history, dialogue=mock_dialogue, character_id=character_id, ) record("F", user_replies[4], result_f.get("reply", ""), {"verdict_type": result_f.get("verdict_type")}) # Stage G — close mock_dialogue["verdict_type"] = result_f.get("verdict_type", "evaded") mock_dialogue["dialogue_narrative"] = mock_dialogue["verdict_type"] result_g = call_combative_close_llm( user_replies[5], history, dialogue=mock_dialogue, character_id=character_id, ) record("G", user_replies[5], result_g.get("reply", ""), {"closing_mode": result_g.get("closing_mode")}) return results def main(): args = sys.argv[1:] if len(sys.argv) > 1 else ["diogenes"] runs = [] # list of (character_id, scenario_key) for arg in args: if ":" in arg: character_id, scenario_key = arg.split(":", 1) runs.append((character_id, scenario_key)) else: character_id = arg for scenario_key in CHARACTER_SCENARIOS.get(character_id, ["kindness"]): runs.append((character_id, scenario_key)) for character_id, scenario_key in runs: print(f"\n{'#'*60}") print(f" {character_id.upper()} — {SCENARIOS[scenario_key]['label']}") print(f"{'#'*60}") results = run_scenario(character_id, scenario_key) out_path = f"debug_combative_{character_id}_{scenario_key}.json" with open(out_path, "w", encoding="utf-8") as f: json.dump(results, f, indent=2, ensure_ascii=False) print(f"\nSaved to {out_path}") if __name__ == "__main__": main()