|
|
|
|
|
""" |
|
|
Grogu Science MoE - Full Collaborative Debate System |
|
|
|
|
|
This script implements the full 4-agent debate mechanism: |
|
|
1. Grogu (General Learning Agent) |
|
|
2. Physics Specialist |
|
|
3. Chemistry Specialist |
|
|
4. Biology Specialist |
|
|
|
|
|
The debate proceeds in 4 phases: |
|
|
- Phase 1: Independent reasoning (all agents answer alone) |
|
|
- Phase 2: Collaborative debate (agents see others' answers) |
|
|
- Phase 3: Tree-of-Thoughts synthesis (Grogu synthesizes all perspectives) |
|
|
- Phase 4: Confidence-weighted voting (final answer selection) |
|
|
""" |
|
|
|
|
|
import torch |
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
from peft import PeftModel |
|
|
from typing import Dict, List, Optional, Tuple |
|
|
from collections import Counter |
|
|
import json |
|
|
import argparse |
|
|
from pathlib import Path |
|
|
|
|
|
|
|
|
class DebateAgent: |
|
|
"""A single agent in the debate system.""" |
|
|
|
|
|
def __init__( |
|
|
self, |
|
|
name: str, |
|
|
model: AutoModelForCausalLM, |
|
|
tokenizer: AutoTokenizer, |
|
|
specialty: Optional[str] = None |
|
|
): |
|
|
self.name = name |
|
|
self.model = model |
|
|
self.tokenizer = tokenizer |
|
|
self.specialty = specialty or "general" |
|
|
|
|
|
def generate( |
|
|
self, |
|
|
prompt: str, |
|
|
max_new_tokens: int = 512, |
|
|
temperature: float = 0.1 |
|
|
) -> str: |
|
|
"""Generate a response.""" |
|
|
inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device) |
|
|
|
|
|
with torch.no_grad(): |
|
|
outputs = self.model.generate( |
|
|
**inputs, |
|
|
max_new_tokens=max_new_tokens, |
|
|
temperature=temperature, |
|
|
do_sample=temperature > 0, |
|
|
pad_token_id=self.tokenizer.eos_token_id |
|
|
) |
|
|
|
|
|
return self.tokenizer.decode( |
|
|
outputs[0][inputs.input_ids.shape[1]:], |
|
|
skip_special_tokens=True |
|
|
).strip() |
|
|
|
|
|
|
|
|
class GroguDebateOrchestrator: |
|
|
""" |
|
|
Orchestrates the multi-agent debate process. |
|
|
|
|
|
This is the full system that achieved 98% on MMLU-Pro |
|
|
and 99% on GPQA Diamond. |
|
|
""" |
|
|
|
|
|
def __init__( |
|
|
self, |
|
|
grogu_path: str, |
|
|
physics_path: Optional[str] = None, |
|
|
chemistry_path: Optional[str] = None, |
|
|
biology_path: Optional[str] = None, |
|
|
nemotron_base: str = "base-models/nemotron-qwen-1.5b", |
|
|
qwen_base: str = "base-models/qwen2.5-1.5b-instruct" |
|
|
): |
|
|
""" |
|
|
Initialize all agents in the debate system. |
|
|
|
|
|
Args: |
|
|
grogu_path: Path to Grogu LoRA adapter |
|
|
physics_path: Path to Physics LoRA adapter |
|
|
chemistry_path: Path to Chemistry LoRA adapter |
|
|
biology_path: Path to Biology LoRA adapter |
|
|
nemotron_base: Path to Nemotron-Qwen-1.5B base model (for Grogu & Biology) |
|
|
qwen_base: Path to Qwen2.5-1.5B-Instruct base model (for Physics & Chemistry) |
|
|
""" |
|
|
self.agents: Dict[str, DebateAgent] = {} |
|
|
self.nemotron_base = nemotron_base |
|
|
self.qwen_base = qwen_base |
|
|
|
|
|
print("Initializing Grogu Debate System...") |
|
|
print(f" Nemotron base: {nemotron_base}") |
|
|
print(f" Qwen base: {qwen_base}") |
|
|
|
|
|
|
|
|
print("Loading Grogu (General Agent)...") |
|
|
tokenizer = AutoTokenizer.from_pretrained(nemotron_base) |
|
|
grogu_base_model = AutoModelForCausalLM.from_pretrained( |
|
|
nemotron_base, |
|
|
torch_dtype=torch.float16, |
|
|
device_map="auto" |
|
|
) |
|
|
grogu = PeftModel.from_pretrained(grogu_base_model, grogu_path) |
|
|
self.agents["grogu"] = DebateAgent("grogu", grogu, tokenizer, "general") |
|
|
|
|
|
|
|
|
if physics_path: |
|
|
print("Loading Physics Specialist...") |
|
|
self.agents["physics"] = self._load_specialist( |
|
|
"physics", physics_path, qwen_base |
|
|
) |
|
|
|
|
|
if chemistry_path: |
|
|
print("Loading Chemistry Specialist...") |
|
|
self.agents["chemistry"] = self._load_specialist( |
|
|
"chemistry", chemistry_path, qwen_base |
|
|
) |
|
|
|
|
|
if biology_path: |
|
|
print("Loading Biology Specialist...") |
|
|
self.agents["biology"] = self._load_specialist( |
|
|
"biology", biology_path, nemotron_base |
|
|
) |
|
|
|
|
|
print(f"Loaded {len(self.agents)} agents: {list(self.agents.keys())}") |
|
|
|
|
|
def _load_specialist( |
|
|
self, |
|
|
name: str, |
|
|
lora_path: str, |
|
|
base_model: str |
|
|
) -> DebateAgent: |
|
|
"""Load a domain specialist with LoRA weights.""" |
|
|
tokenizer = AutoTokenizer.from_pretrained(base_model) |
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
|
base_model, |
|
|
torch_dtype=torch.float16, |
|
|
device_map="auto" |
|
|
) |
|
|
model = PeftModel.from_pretrained(model, lora_path) |
|
|
return DebateAgent(name, model, tokenizer, name) |
|
|
|
|
|
def _extract_answer(self, response: str) -> str: |
|
|
"""Extract A/B/C/D from a response.""" |
|
|
response_upper = response.upper() |
|
|
|
|
|
patterns = ["THE ANSWER IS", "ANSWER:", "FINAL ANSWER:", "THEREFORE"] |
|
|
for pattern in patterns: |
|
|
if pattern in response_upper: |
|
|
idx = response_upper.find(pattern) |
|
|
after = response_upper[idx:idx+50] |
|
|
for char in ['A', 'B', 'C', 'D']: |
|
|
if char in after: |
|
|
return char |
|
|
|
|
|
for char in ['A', 'B', 'C', 'D']: |
|
|
if f"({char})" in response_upper or f" {char} " in response_upper: |
|
|
return char |
|
|
|
|
|
return "A" |
|
|
|
|
|
def run_debate( |
|
|
self, |
|
|
question: str, |
|
|
options: Dict[str, str] |
|
|
) -> Dict: |
|
|
""" |
|
|
Run the full 4-phase debate process. |
|
|
|
|
|
Returns detailed results including all agent answers and mind changes. |
|
|
""" |
|
|
results = { |
|
|
"question": question, |
|
|
"options": options, |
|
|
"round1_answers": {}, |
|
|
"round2_answers": {}, |
|
|
"mind_changes": [], |
|
|
"synthesis_answer": None, |
|
|
"final_answer": None, |
|
|
"confidence": 0.0 |
|
|
} |
|
|
|
|
|
formatted_options = "\n".join([f"{k}) {v}" for k, v in options.items()]) |
|
|
|
|
|
|
|
|
print("\n[Phase 1: Independent Reasoning]") |
|
|
|
|
|
for name, agent in self.agents.items(): |
|
|
prompt = self._get_round1_prompt(question, formatted_options, agent.specialty) |
|
|
response = agent.generate(prompt) |
|
|
answer = self._extract_answer(response) |
|
|
results["round1_answers"][name] = { |
|
|
"answer": answer, |
|
|
"reasoning": response |
|
|
} |
|
|
print(f" {name}: {answer}") |
|
|
|
|
|
|
|
|
print("\n[Phase 2: Collaborative Debate]") |
|
|
|
|
|
|
|
|
r1_context = self._format_debate_context(results["round1_answers"]) |
|
|
|
|
|
for name, agent in self.agents.items(): |
|
|
prompt = self._get_round2_prompt( |
|
|
question, formatted_options, r1_context, agent.specialty |
|
|
) |
|
|
response = agent.generate(prompt) |
|
|
answer = self._extract_answer(response) |
|
|
results["round2_answers"][name] = { |
|
|
"answer": answer, |
|
|
"reasoning": response |
|
|
} |
|
|
|
|
|
|
|
|
r1_answer = results["round1_answers"][name]["answer"] |
|
|
if answer != r1_answer: |
|
|
results["mind_changes"].append(f"{name}: {r1_answer}->{answer}") |
|
|
print(f" {name}: {answer} (changed from {r1_answer})") |
|
|
else: |
|
|
print(f" {name}: {answer}") |
|
|
|
|
|
|
|
|
print("\n[Phase 3: Synthesis]") |
|
|
|
|
|
if "grogu" in self.agents: |
|
|
synthesis_prompt = self._get_synthesis_prompt( |
|
|
question, formatted_options, results |
|
|
) |
|
|
synthesis_response = self.agents["grogu"].generate( |
|
|
synthesis_prompt, max_new_tokens=1024 |
|
|
) |
|
|
results["synthesis_answer"] = self._extract_answer(synthesis_response) |
|
|
print(f" Synthesis: {results['synthesis_answer']}") |
|
|
|
|
|
|
|
|
print("\n[Phase 4: Voting]") |
|
|
|
|
|
votes = Counter() |
|
|
for name, data in results["round2_answers"].items(): |
|
|
weight = 1.2 if name != "grogu" else 1.0 |
|
|
votes[data["answer"]] += weight |
|
|
|
|
|
|
|
|
if results["synthesis_answer"]: |
|
|
votes[results["synthesis_answer"]] += 1.5 |
|
|
|
|
|
results["final_answer"] = votes.most_common(1)[0][0] |
|
|
total_votes = sum(votes.values()) |
|
|
results["confidence"] = votes[results["final_answer"]] / total_votes |
|
|
|
|
|
print(f" Final: {results['final_answer']} (confidence: {results['confidence']:.2f})") |
|
|
|
|
|
return results |
|
|
|
|
|
def _get_round1_prompt( |
|
|
self, |
|
|
question: str, |
|
|
options: str, |
|
|
specialty: str |
|
|
) -> str: |
|
|
"""Generate the round 1 prompt for independent reasoning.""" |
|
|
specialty_intro = { |
|
|
"general": "You are an expert reasoning agent.", |
|
|
"physics": "You are a physics expert with deep knowledge of quantum mechanics, thermodynamics, and classical physics.", |
|
|
"chemistry": "You are a chemistry expert specializing in organic, inorganic, and physical chemistry.", |
|
|
"biology": "You are a biology expert with expertise in molecular biology, genetics, and biochemistry." |
|
|
} |
|
|
|
|
|
return f"""{specialty_intro.get(specialty, specialty_intro['general'])} |
|
|
|
|
|
Answer this question step by step. Think carefully about each option. |
|
|
|
|
|
Question: {question} |
|
|
|
|
|
Options: |
|
|
{options} |
|
|
|
|
|
Provide your reasoning and end with "The answer is [A/B/C/D]".""" |
|
|
|
|
|
def _get_round2_prompt( |
|
|
self, |
|
|
question: str, |
|
|
options: str, |
|
|
r1_context: str, |
|
|
specialty: str |
|
|
) -> str: |
|
|
"""Generate the round 2 prompt including other agents' answers.""" |
|
|
return f"""You are participating in a collaborative debate to answer this question. |
|
|
|
|
|
Question: {question} |
|
|
|
|
|
Options: |
|
|
{options} |
|
|
|
|
|
Other experts have provided their initial answers: |
|
|
{r1_context} |
|
|
|
|
|
Consider their perspectives. Do you agree or disagree? Why? |
|
|
You may change your answer if convinced, or defend your original choice. |
|
|
|
|
|
End with "The answer is [A/B/C/D]".""" |
|
|
|
|
|
def _get_synthesis_prompt( |
|
|
self, |
|
|
question: str, |
|
|
options: str, |
|
|
results: Dict |
|
|
) -> str: |
|
|
"""Generate the synthesis prompt for Tree-of-Thoughts analysis.""" |
|
|
r1_summary = "\n".join([ |
|
|
f"- {name}: {data['answer']}" |
|
|
for name, data in results["round1_answers"].items() |
|
|
]) |
|
|
|
|
|
r2_summary = "\n".join([ |
|
|
f"- {name}: {data['answer']}" |
|
|
for name, data in results["round2_answers"].items() |
|
|
]) |
|
|
|
|
|
changes = ", ".join(results["mind_changes"]) if results["mind_changes"] else "None" |
|
|
|
|
|
return f"""You are the synthesis agent. Your task is to analyze all perspectives and determine the best answer. |
|
|
|
|
|
Question: {question} |
|
|
|
|
|
Options: |
|
|
{options} |
|
|
|
|
|
Round 1 Answers (Independent): |
|
|
{r1_summary} |
|
|
|
|
|
Round 2 Answers (After Debate): |
|
|
{r2_summary} |
|
|
|
|
|
Mind Changes: {changes} |
|
|
|
|
|
Analyze the debate: |
|
|
1. Which answer has the most support? |
|
|
2. Did agents converge on a consensus? |
|
|
3. Are there valid reasons for disagreement? |
|
|
4. What is the most likely correct answer? |
|
|
|
|
|
Provide your final synthesis and end with "The answer is [A/B/C/D]".""" |
|
|
|
|
|
def _format_debate_context(self, round1_answers: Dict) -> str: |
|
|
"""Format round 1 answers for the debate context.""" |
|
|
lines = [] |
|
|
for name, data in round1_answers.items(): |
|
|
short_reasoning = data["reasoning"][:200] + "..." |
|
|
lines.append(f"{name.upper()}: Answer {data['answer']}\nReasoning: {short_reasoning}") |
|
|
return "\n\n".join(lines) |
|
|
|
|
|
|
|
|
def main(): |
|
|
"""Run a debate on a sample question.""" |
|
|
parser = argparse.ArgumentParser(description="Grogu Science MoE - Collaborative Debate System") |
|
|
parser.add_argument("--model-dir", default=".", help="Path to model directory (default: current dir)") |
|
|
parser.add_argument("--grogu-only", action="store_true", help="Run with Grogu only (no specialists)") |
|
|
parser.add_argument("--question", help="Question to debate") |
|
|
args = parser.parse_args() |
|
|
|
|
|
model_dir = Path(args.model_dir) |
|
|
|
|
|
|
|
|
question = args.question or """ |
|
|
A large gene has dozens of exons, of which the central ones code for |
|
|
folded triple helical repeats. The most common mutations create out-of-frame |
|
|
peptides. A Morpholino prevents spliceosome binding and creates exon skipping. |
|
|
Which structure is NOT involved in this therapy? |
|
|
""" |
|
|
|
|
|
options = { |
|
|
"A": "R-loops", |
|
|
"B": "lariat", |
|
|
"C": "polyA tail", |
|
|
"D": "antisense" |
|
|
} |
|
|
|
|
|
print("=" * 70) |
|
|
print("GROGU SCIENCE MoE - Full Debate System") |
|
|
print("=" * 70) |
|
|
|
|
|
|
|
|
nemotron_base = str(model_dir / "base-models" / "nemotron-qwen-1.5b") |
|
|
qwen_base = str(model_dir / "base-models" / "qwen2.5-1.5b-instruct") |
|
|
|
|
|
if args.grogu_only: |
|
|
|
|
|
orchestrator = GroguDebateOrchestrator( |
|
|
grogu_path=str(model_dir / "grogu-lora"), |
|
|
nemotron_base=nemotron_base, |
|
|
qwen_base=qwen_base |
|
|
) |
|
|
else: |
|
|
|
|
|
orchestrator = GroguDebateOrchestrator( |
|
|
grogu_path=str(model_dir / "grogu-lora"), |
|
|
physics_path=str(model_dir / "physics-lora"), |
|
|
chemistry_path=str(model_dir / "chemistry-lora"), |
|
|
biology_path=str(model_dir / "biology-lora"), |
|
|
nemotron_base=nemotron_base, |
|
|
qwen_base=qwen_base |
|
|
) |
|
|
|
|
|
|
|
|
result = orchestrator.run_debate(question, options) |
|
|
|
|
|
print("\n" + "=" * 70) |
|
|
print("FINAL RESULT") |
|
|
print("=" * 70) |
|
|
print(f"Answer: {result['final_answer']}") |
|
|
print(f"Confidence: {result['confidence']:.2%}") |
|
|
print(f"Mind Changes: {len(result['mind_changes'])}") |
|
|
|
|
|
|
|
|
output_path = Path("debate_result.json") |
|
|
with open(output_path, "w") as f: |
|
|
|
|
|
serializable = { |
|
|
k: v for k, v in result.items() |
|
|
if k != "round1_answers" and k != "round2_answers" |
|
|
} |
|
|
serializable["round1_answers"] = { |
|
|
name: data["answer"] for name, data in result["round1_answers"].items() |
|
|
} |
|
|
serializable["round2_answers"] = { |
|
|
name: data["answer"] for name, data in result["round2_answers"].items() |
|
|
} |
|
|
json.dump(serializable, f, indent=2) |
|
|
|
|
|
print(f"\nResults saved to {output_path}") |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|