#!/usr/bin/env python3 """ Grogu Science MoE - Simple Inference Example This script demonstrates how to use the Grogu debate system for graduate-level science questions. """ import torch from transformers import AutoModelForCausalLM, AutoTokenizer from peft import PeftModel from typing import Dict, List, Optional import json class GroguDebateSystem: """ A Mixture-of-Experts debate system for science questions. The system uses 4 agents: - Grogu: General reasoning agent (Nemotron-Qwen-1.5B + LoRA) - Physics: Domain specialist - Chemistry: Domain specialist - Biology: Domain specialist They engage in a 2-round collaborative debate with synthesis. """ def __init__( self, grogu_path: str = "zenith-global/grogu-science-moe/grogu-lora", base_model: str = "nvidia/nemotron-qwen-1.5b", device: str = "cuda" if torch.cuda.is_available() else "cpu", dtype: torch.dtype = torch.float16 ): """Initialize the debate system with model paths.""" self.device = device self.dtype = dtype print("Loading Grogu model...") self.tokenizer = AutoTokenizer.from_pretrained(base_model) # Load base model self.base_model = AutoModelForCausalLM.from_pretrained( base_model, torch_dtype=dtype, device_map="auto" ) # Apply LoRA weights self.grogu = PeftModel.from_pretrained( self.base_model, grogu_path ) self.grogu.eval() print("Grogu loaded!") def generate( self, prompt: str, max_new_tokens: int = 512, temperature: float = 0.1, top_p: float = 0.95 ) -> str: """Generate a response from Grogu.""" inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device) with torch.no_grad(): outputs = self.grogu.generate( **inputs, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p, do_sample=temperature > 0, pad_token_id=self.tokenizer.eos_token_id ) response = self.tokenizer.decode( outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True ) return response.strip() def extract_answer(self, response: str) -> str: """Extract the final answer (A, B, C, or D) from a response.""" # Look for common patterns patterns = [ "The answer is", "Answer:", "Final answer:", "Therefore,", "So the answer is" ] response_upper = response.upper() for pattern in patterns: if pattern.upper() in response_upper: idx = response_upper.find(pattern.upper()) after = response_upper[idx:] for char in ['A', 'B', 'C', 'D']: if char in after[:50]: return char # Fallback: find any standalone A/B/C/D for char in ['A', 'B', 'C', 'D']: if f" {char}" in response_upper or f"({char})" in response_upper: return char return "A" # Default def debate_single( self, question: str, options: Optional[Dict[str, str]] = None ) -> Dict: """ Run a simplified single-agent debate (Grogu only). For the full 4-agent debate, see run_debate.py """ # Format the question if options: formatted_options = "\n".join([ f"{k}) {v}" for k, v in options.items() ]) prompt = f"""You are an expert scientist. Answer this question step by step. Question: {question} Options: {formatted_options} Think through this carefully and provide your answer. End with "The answer is [A/B/C/D]".""" else: prompt = f"""You are an expert scientist. Answer this question step by step. Question: {question} Think through this carefully and provide your reasoning.""" # Round 1: Initial reasoning response_r1 = self.generate(prompt) answer_r1 = self.extract_answer(response_r1) # Round 2: Self-reflection reflection_prompt = f"""{prompt} Your initial answer was: {answer_r1} Your reasoning was: {response_r1[:500]}... Reconsider your answer. Are you confident? If not, what might be wrong? End with "The answer is [A/B/C/D]".""" response_r2 = self.generate(reflection_prompt) answer_r2 = self.extract_answer(response_r2) return { "question": question, "round1_answer": answer_r1, "round1_reasoning": response_r1, "round2_answer": answer_r2, "round2_reasoning": response_r2, "final_answer": answer_r2, "changed_mind": answer_r1 != answer_r2 } @classmethod def from_pretrained(cls, model_path: str) -> "GroguDebateSystem": """Load the debate system from a HuggingFace model path.""" return cls(grogu_path=f"{model_path}/grogu-lora") def main(): """Example usage of the Grogu debate system.""" # Example GPQA-style question question = """ Two quantum states with energies E1 and E2 have a lifetime of 10^-9 sec and 10^-8 sec, respectively. We want to clearly distinguish these two energy levels. Which one of the following options could be their energy difference so that they can be clearly resolved? """ options = { "A": "10^-4 eV", "B": "10^-11 eV", "C": "10^-8 eV", "D": "10^-9 eV" } print("=" * 60) print("GROGU SCIENCE MoE - Inference Demo") print("=" * 60) # Initialize system print("\nInitializing Grogu...") # For demo, we'll use a mock if the model isn't available try: system = GroguDebateSystem() print("\nRunning debate on physics question...") result = system.debate_single(question, options) print(f"\nQuestion: {question[:100]}...") print(f"\nRound 1 Answer: {result['round1_answer']}") print(f"Round 2 Answer: {result['round2_answer']}") print(f"Changed Mind: {result['changed_mind']}") print(f"\nFinal Answer: {result['final_answer']}") # Correct answer is A (10^-4 eV) correct = "A" print(f"Correct Answer: {correct}") print(f"Result: {'Correct!' if result['final_answer'] == correct else 'Incorrect'}") except Exception as e: print(f"\nNote: Running in demo mode (model not loaded)") print(f"Error: {e}") print("\nTo run inference, ensure you have:") print("1. The Grogu LoRA weights downloaded") print("2. Sufficient GPU memory (~4GB for Grogu alone)") print("3. The base model (nvidia/nemotron-qwen-1.5b)") if __name__ == "__main__": main()