File size: 6,990 Bytes

74f1bed

#!/usr/bin/env python3
"""
Grogu Science MoE - Simple Inference Example

This script demonstrates how to use the Grogu debate system
for graduate-level science questions.
"""

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
from typing import Dict, List, Optional
import json


class GroguDebateSystem:
    """
    A Mixture-of-Experts debate system for science questions.

    The system uses 4 agents:
    - Grogu: General reasoning agent (Nemotron-Qwen-1.5B + LoRA)
    - Physics: Domain specialist
    - Chemistry: Domain specialist
    - Biology: Domain specialist

    They engage in a 2-round collaborative debate with synthesis.
    """

    def __init__(
        self,
        grogu_path: str = "zenith-global/grogu-science-moe/grogu-lora",
        base_model: str = "nvidia/nemotron-qwen-1.5b",
        device: str = "cuda" if torch.cuda.is_available() else "cpu",
        dtype: torch.dtype = torch.float16
    ):
        """Initialize the debate system with model paths."""
        self.device = device
        self.dtype = dtype

        print("Loading Grogu model...")
        self.tokenizer = AutoTokenizer.from_pretrained(base_model)

        # Load base model
        self.base_model = AutoModelForCausalLM.from_pretrained(
            base_model,
            torch_dtype=dtype,
            device_map="auto"
        )

        # Apply LoRA weights
        self.grogu = PeftModel.from_pretrained(
            self.base_model,
            grogu_path
        )
        self.grogu.eval()
        print("Grogu loaded!")

    def generate(
        self,
        prompt: str,
        max_new_tokens: int = 512,
        temperature: float = 0.1,
        top_p: float = 0.95
    ) -> str:
        """Generate a response from Grogu."""
        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)

        with torch.no_grad():
            outputs = self.grogu.generate(
                **inputs,
                max_new_tokens=max_new_tokens,
                temperature=temperature,
                top_p=top_p,
                do_sample=temperature > 0,
                pad_token_id=self.tokenizer.eos_token_id
            )

        response = self.tokenizer.decode(
            outputs[0][inputs.input_ids.shape[1]:],
            skip_special_tokens=True
        )
        return response.strip()

    def extract_answer(self, response: str) -> str:
        """Extract the final answer (A, B, C, or D) from a response."""
        # Look for common patterns
        patterns = [
            "The answer is",
            "Answer:",
            "Final answer:",
            "Therefore,",
            "So the answer is"
        ]

        response_upper = response.upper()

        for pattern in patterns:
            if pattern.upper() in response_upper:
                idx = response_upper.find(pattern.upper())
                after = response_upper[idx:]
                for char in ['A', 'B', 'C', 'D']:
                    if char in after[:50]:
                        return char

        # Fallback: find any standalone A/B/C/D
        for char in ['A', 'B', 'C', 'D']:
            if f" {char}" in response_upper or f"({char})" in response_upper:
                return char

        return "A"  # Default

    def debate_single(
        self,
        question: str,
        options: Optional[Dict[str, str]] = None
    ) -> Dict:
        """
        Run a simplified single-agent debate (Grogu only).

        For the full 4-agent debate, see run_debate.py
        """
        # Format the question
        if options:
            formatted_options = "\n".join([
                f"{k}) {v}" for k, v in options.items()
            ])
            prompt = f"""You are an expert scientist. Answer this question step by step.

Question: {question}

Options:
{formatted_options}

Think through this carefully and provide your answer. End with "The answer is [A/B/C/D]"."""
        else:
            prompt = f"""You are an expert scientist. Answer this question step by step.

Question: {question}

Think through this carefully and provide your reasoning."""

        # Round 1: Initial reasoning
        response_r1 = self.generate(prompt)
        answer_r1 = self.extract_answer(response_r1)

        # Round 2: Self-reflection
        reflection_prompt = f"""{prompt}

Your initial answer was: {answer_r1}
Your reasoning was: {response_r1[:500]}...

Reconsider your answer. Are you confident? If not, what might be wrong?
End with "The answer is [A/B/C/D]"."""

        response_r2 = self.generate(reflection_prompt)
        answer_r2 = self.extract_answer(response_r2)

        return {
            "question": question,
            "round1_answer": answer_r1,
            "round1_reasoning": response_r1,
            "round2_answer": answer_r2,
            "round2_reasoning": response_r2,
            "final_answer": answer_r2,
            "changed_mind": answer_r1 != answer_r2
        }

    @classmethod
    def from_pretrained(cls, model_path: str) -> "GroguDebateSystem":
        """Load the debate system from a HuggingFace model path."""
        return cls(grogu_path=f"{model_path}/grogu-lora")


def main():
    """Example usage of the Grogu debate system."""

    # Example GPQA-style question
    question = """
    Two quantum states with energies E1 and E2 have a lifetime of 10^-9 sec
    and 10^-8 sec, respectively. We want to clearly distinguish these two
    energy levels. Which one of the following options could be their energy
    difference so that they can be clearly resolved?
    """

    options = {
        "A": "10^-4 eV",
        "B": "10^-11 eV",
        "C": "10^-8 eV",
        "D": "10^-9 eV"
    }

    print("=" * 60)
    print("GROGU SCIENCE MoE - Inference Demo")
    print("=" * 60)

    # Initialize system
    print("\nInitializing Grogu...")

    # For demo, we'll use a mock if the model isn't available
    try:
        system = GroguDebateSystem()

        print("\nRunning debate on physics question...")
        result = system.debate_single(question, options)

        print(f"\nQuestion: {question[:100]}...")
        print(f"\nRound 1 Answer: {result['round1_answer']}")
        print(f"Round 2 Answer: {result['round2_answer']}")
        print(f"Changed Mind: {result['changed_mind']}")
        print(f"\nFinal Answer: {result['final_answer']}")

        # Correct answer is A (10^-4 eV)
        correct = "A"
        print(f"Correct Answer: {correct}")
        print(f"Result: {'Correct!' if result['final_answer'] == correct else 'Incorrect'}")

    except Exception as e:
        print(f"\nNote: Running in demo mode (model not loaded)")
        print(f"Error: {e}")
        print("\nTo run inference, ensure you have:")
        print("1. The Grogu LoRA weights downloaded")
        print("2. Sufficient GPU memory (~4GB for Grogu alone)")
        print("3. The base model (nvidia/nemotron-qwen-1.5b)")


if __name__ == "__main__":
    main()