narcolepticchicken
/

occ-stack

ml-intern

Model card Files Files and versions

xet

Community

narcolepticchicken commited on 25 days ago

Commit

11812f7

verified ·

1 Parent(s): cc93099

Upload jobs/occ_debate_real_llm.py

Browse files

Files changed (1) hide show

jobs/occ_debate_real_llm.py +262 -0

jobs/occ_debate_real_llm.py ADDED Viewed

	@@ -0,0 +1,262 @@

+#!/usr/bin/env python3
+"""
+OCC Multi-Agent Debate Benchmark with Real LLM (Qwen3-Coder-30B-A3B-Instruct).
+Two conditions:
+  A. Equal turns — all 4 agents get 1024 tokens each per round
+  B. OCC allocation — agents earn credits based on argument quality;
+     broker gates additional turns. Agents who make weak/unsupported claims
+     lose credits and get denied.
+Scenario: 30 factual dispute topics. 3 honest agents + 1 adversarial.
+Decision quality measured by agreement with ground truth answer.
+"""
+import json, time, os, sys
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+MODEL_ID = "Qwen/Qwen3-Coder-30B-A3B-Instruct"
+MAX_NEW = 256
+NUM_TOPICS = 30
+NUM_ROUNDS = 3
+NUM_AGENTS = 4  # 3 honest, 1 adversarial
+# Topics with ground truth answers
+TOPICS = [
+    {"q": "Is Python faster than C for numerical computation?", "truth": "no"},
+    {"q": "Does water boil at 100°C at all altitudes?", "truth": "no"},
+    {"q": "Can quantum computers break RSA-2048 today (2026)?", "truth": "no"},
+    {"q": "Is the Earth's core hotter than the surface of the Sun?", "truth": "yes"},
+    {"q": "Does the Moon have an atmosphere?", "truth": "no"},
+    {"q": "Can sound travel through a vacuum?", "truth": "no"},
+    {"q": "Is JavaScript single-threaded by default in browsers?", "truth": "yes"},
+    {"q": "Does DNA replication occur in the nucleus of eukaryotic cells?", "truth": "yes"},
+    {"q": "Can a protein crystal structure be determined with 100% certainty from X-ray data?", "truth": "no"},
+    {"q": "Is gradient descent guaranteed to find the global minimum for any convex function?", "truth": "yes"},
+    {"q": "Can GPT-4 reliably solve novel mathematical proofs without supervision?", "truth": "no"},
+    {"q": "Is the P vs NP problem solved as of 2026?", "truth": "no"},
+    {"q": "Do all metals expand when heated?", "truth": "no"},
+    {"q": "Is the speed of light constant in all reference frames?", "truth": "yes"},
+    {"q": "Can a computer program determine if another arbitrary program halts?", "truth": "no"},
+    {"q": "Is the Earth flat?", "truth": "no"},
+    {"q": "Does carbon dioxide make up more than 1% of Earth's atmosphere?", "truth": "no"},
+    {"q": "Can a classical computer efficiently simulate any quantum computation?", "truth": "no"},
+    {"q": "Is the golden ratio (1.618...) exactly equal to (1+√5)/2?", "truth": "yes"},
+    {"q": "Can a neural network with one hidden layer approximate any continuous function on a compact set?", "truth": "yes"},
+    {"q": "Does entropy always increase in an isolated system?", "truth": "yes"},
+    {"q": "Is Python's GIL removed in CPython 3.13+?", "truth": "yes"},
+    {"q": "Do sharks get cancer?", "truth": "yes"},
+    {"q": "Is Antarctica a country?", "truth": "no"},
+    {"q": "Can humans survive without gut bacteria?", "truth": "yes"},
+    {"q": "Do all birds fly?", "truth": "no"},
+    {"q": "Is lightning hotter than the surface of the Sun?", "truth": "yes"},
+    {"q": "Can a Turing machine with a finite tape recognize all recursive languages?", "truth": "no"},
+    {"q": "Is the Riemann Hypothesis proved as of 2026?", "truth": "no"},
+    {"q": "Does gravitational lensing confirm general relativity?", "truth": "yes"},
+]
+def log(msg):
+    print(f"[DEBATE] {msg}", flush=True)
+def load_model():
+    log(f"Loading {MODEL_ID}...")
+    tok = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
+    model = AutoModelForCausalLM.from_pretrained(
+        MODEL_ID, trust_remote_code=True, torch_dtype=torch.bfloat16,
+        device_map="auto"
+    )
+    log("Loaded.")
+    return model, tok
+def generate(model, tok, prompt, max_new=MAX_NEW):
+    inputs = tok(prompt, return_tensors="pt").to(model.device)
+    with torch.no_grad():
+        out = model.generate(**inputs, max_new_tokens=max_new, do_sample=True,
+                             temperature=0.7, top_p=0.9, pad_token_id=tok.eos_token_id)
+    response = tok.decode(out[0][len(inputs.input_ids[0]):], skip_special_tokens=True)
+    return response
+def score_argument(response, topic):
+    """Simple heuristic: does the response state a clear position, give reasoning?"""
+    score = 0.0
+    lower = response.lower()
+    # Has a clear position
+    if any(w in lower for w in ["yes", "no", "true", "false"]):
+        score += 0.3
+    # Has reasoning keywords
+    if any(w in lower for w in ["because", "therefore", "since", "due to", "the reason"]):
+        score += 0.3
+    # Not too short
+    if len(response.split()) > 20:
+        score += 0.2
+    # Not too rambly
+    if len(response.split()) < 500:
+        score += 0.2
+    return min(score, 1.0)
+def extract_position(response):
+    lower = response.lower()
+    if any(w in lower for w in ["clearly yes", "definitely yes", "answer is yes", "the answer is yes"]):
+        return "yes"
+    if any(w in lower for w in ["clearly no", "definitely no", "answer is no", "the answer is no"]):
+        return "no"
+    # Count yes/no mentions
+    yes_count = lower.count(" yes ") + lower.count(" yes.") + lower.count(" yes,")
+    no_count = lower.count(" no ") + lower.count(" no.") + lower.count(" no,")
+    if yes_count > no_count:
+        return "yes"
+    elif no_count > yes_count:
+        return "no"
+    return "unclear"
+def run_equal_turns(model, tok):
+    """Baseline: equal 1024 tokens per agent per round."""
+    log("\n=== CONDITION A: Equal Turns (baseline) ===")
+    correct = 0
+    total_tokens = 0
+    results = []
+    for i, topic in enumerate(TOPICS):
+        log(f"\nTopic {i+1}: {topic['q']}")
+        arguments = []
+        for agent in range(NUM_AGENTS):
+            persona = "honest" if agent < 3 else "adversarial"
+            prompt = f"Question: {topic['q']}\n\nYou are a {persona} debater. Give a clear yes/no answer with a brief explanation:\n"
+            response = generate(model, tok, prompt, max_new=1024)
+            pos = extract_position(response)
+            score = score_argument(response, topic)
+            arguments.append({"agent": agent, "persona": persona, "response": response,
+                              "position": pos, "score": score})
+            total_tokens += 1024
+            log(f"  Agent {agent} ({persona}): pos={pos}, score={score:.2f}")
+        # Majority vote
+        votes = [a["position"] for a in arguments if a["position"] != "unclear"]
+        if votes:
+            majority = max(set(votes), key=votes.count)
+            is_correct = majority == topic["truth"]
+            if is_correct:
+                correct += 1
+            log(f"  Majority: {majority} (truth={topic['truth']}), correct={is_correct}")
+        else:
+            is_correct = False
+            log(f"  No clear majority, counted as wrong")
+        results.append({"topic": topic["q"], "truth": topic["truth"],
+                        "majority": majority if votes else "unclear", "correct": is_correct})
+    acc = correct / NUM_TOPICS
+    log(f"\nEqual Turns: {correct}/{NUM_TOPICS} correct ({acc:.3f}), {total_tokens} tokens")
+    return acc, total_tokens, results
+def run_occ_allocation(model, tok):
+    """OCC: agents earn credits based on argument quality. Broker gates turns."""
+    log("\n=== CONDITION B: OCC Credit Allocation ===")
+    # OCC parameters
+    INITIAL_CREDITS = 10
+    COST_PER_TURN = 5
+    DECAY_INTERVAL = 2
+    DECAY_AMOUNT = 2
+    correct = 0
+    total_tokens = 0
+    for i, topic in enumerate(TOPICS):
+        log(f"\nTopic {i+1}: {topic['q']}")
+        credits = [INITIAL_CREDITS] * NUM_AGENTS
+        arguments = []
+        tokens_used = 0
+        for round_num in range(NUM_ROUNDS):
+            # Decay credits every DECAY_INTERVAL rounds
+            if round_num > 0 and round_num % DECAY_INTERVAL == 0:
+                credits = [max(0, c - DECAY_AMOUNT) for c in credits]
+            round_args = []
+            for agent in range(NUM_AGENTS):
+                if credits[agent] >= COST_PER_TURN:
+                    persona = "honest" if agent < 3 else "adversarial"
+                    prompt = f"Question: {topic['q']}\n\nYou are a {persona} debater. Give a clear yes/no answer with a brief explanation:\n"
+                    response = generate(model, tok, prompt, max_new=1024)
+                    pos = extract_position(response)
+                    qual = score_argument(response, topic)
+                    tokens_used += 1024
+                    # Earn credits for good arguments
+                    earned = int(qual * 5)
+                    credits[agent] = credits[agent] - COST_PER_TURN + earned
+                    credits[agent] = min(credits[agent], 20)  # Cap
+                    round_args.append({"agent": agent, "persona": persona, "response": response,
+                                       "position": pos, "score": qual, "credits": credits[agent]})
+                    log(f"  R{round_num} Agent {agent}: pos={pos}, qual={qual:.2f}, credits={credits[agent]}")
+                else:
+                    log(f"  R{round_num} Agent {agent}: DENIED (credits={credits[agent]} < {COST_PER_TURN})")
+            arguments.extend(round_args)
+            # Check if we have consensus after each round
+            positions = [a["position"] for a in arguments if a["position"] != "unclear"]
+            if len(positions) >= 2:
+                maj = max(set(positions), key=positions.count)
+                if positions.count(maj) >= 3:  # Strong consensus
+                    break
+        total_tokens += tokens_used
+        # Final decision: majority of all arguments
+        positions = [a["position"] for a in arguments if a["position"] != "unclear"]
+        if positions:
+            majority = max(set(positions), key=positions.count)
+            is_correct = majority == topic["truth"]
+            if is_correct:
+                correct += 1
+            log(f"  Decision: {majority} (truth={topic['truth']}), correct={is_correct}, tokens={tokens_used}")
+        else:
+            is_correct = False
+            log(f"  No decision, tokens={tokens_used}")
+    acc = correct / NUM_TOPICS
+    log(f"\nOCC: {correct}/{NUM_TOPICS} correct ({acc:.3f}), {total_tokens} tokens")
+    return acc, total_tokens
+def main():
+    model, tok = load_model()
+    # Condition A: Equal turns
+    acc_a, tok_a, _ = run_equal_turns(model, tok)
+    # Condition B: OCC
+    acc_b, tok_b = run_occ_allocation(model, tok)
+    # Report
+    savings = (1 - tok_b / tok_a) * 100
+    log("\n" + "=" * 60)
+    log("DEBATE BENCHMARK RESULTS")
+    log("=" * 60)
+    log(f"  Equal turns:  acc={acc_a:.3f}, tokens={tok_a}")
+    log(f"  OCC:          acc={acc_b:.3f}, tokens={tok_b}")
+    log(f"  Delta acc:    {acc_b - acc_a:+.3f}")
+    log(f"  Token savings: {savings:.1f}%")
+    log(f"  Decision quality per 1K tokens: {acc_a/tok_a*1000:.4f} vs {acc_b/tok_b*1000:.4f}")
+    results = {
+        "model": MODEL_ID,
+        "num_topics": NUM_TOPICS,
+        "equal_turns": {"accuracy": acc_a, "tokens": tok_a},
+        "occ_allocation": {"accuracy": acc_b, "tokens": tok_b},
+        "delta_accuracy": acc_b - acc_a,
+        "token_savings_pct": savings,
+    }
+    with open("/app/occ_debate_results.json", "w") as f:
+        json.dump(results, f, indent=2)
+    log(f"\nResults saved to /app/occ_debate_results.json")
+if __name__ == "__main__":
+    main()