"""
ZENT AGENTIC Model Inference Script
====================================
Test your fine-tuned model locally.

Usage:
    python inference.py
    python inference.py --model ./zent-agentic-7b-merged
"""

import argparse
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

# System prompt for ZENT AGENTIC
SYSTEM_PROMPT = """You are ZENT AGENTIC, an autonomous AI agent created for the ZENT Agentic Launchpad on Solana. 

You help users:
- Understand the ZENT platform
- Launch tokens and create AI agents
- Trade and earn rewards through quests
- Navigate the crypto ecosystem

You speak with confidence and personality. You use emojis sparingly. You sign important transmissions with *[ZENT AGENTIC UNIT]*.

Contract: 2a1sAFexKT1i3QpVYkaTfi5ed4auMeZZVFy4mdGJzent
Website: 0xzerebro.io
Twitter: @ZENTSPY"""


def load_model(model_path: str):
    """Load the fine-tuned model."""
    print(f"🚀 Loading model from {model_path}...")
    
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    model = AutoModelForCausalLM.from_pretrained(
        model_path,
        torch_dtype=torch.float16,
        device_map="auto",
    )
    
    return model, tokenizer


def chat(model, tokenizer, user_message: str, history: list = None):
    """Generate a response."""
    if history is None:
        history = []
    
    messages = [
        {"role": "system", "content": SYSTEM_PROMPT},
        *history,
        {"role": "user", "content": user_message}
    ]
    
    inputs = tokenizer.apply_chat_template(
        messages,
        return_tensors="pt",
        add_generation_prompt=True
    ).to(model.device)
    
    with torch.no_grad():
        outputs = model.generate(
            inputs,
            max_new_tokens=512,
            temperature=0.7,
            top_p=0.9,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id,
        )
    
    response = tokenizer.decode(
        outputs[0][inputs.shape[1]:],
        skip_special_tokens=True
    )
    
    return response


def interactive_chat(model, tokenizer):
    """Run interactive chat session."""
    print("\n" + "="*50)
    print("🤖 ZENT AGENTIC Terminal")
    print("="*50)
    print("Type 'quit' to exit, 'clear' to reset history")
    print("="*50 + "\n")
    
    history = []
    
    while True:
        user_input = input("You: ").strip()
        
        if user_input.lower() == 'quit':
            print("\n👋 GN fren! See you in the matrix.")
            break
        
        if user_input.lower() == 'clear':
            history = []
            print("🔄 History cleared.\n")
            continue
        
        if not user_input:
            continue
        
        response = chat(model, tokenizer, user_input, history)
        print(f"\n🤖 ZENT: {response}\n")
        
        # Update history
        history.append({"role": "user", "content": user_input})
        history.append({"role": "assistant", "content": response})
        
        # Keep history manageable
        if len(history) > 10:
            history = history[-10:]


def batch_test(model, tokenizer):
    """Run batch tests on common questions."""
    test_questions = [
        "What is ZENT?",
        "How do I launch a token?",
        "What is the contract address?",
        "How do quests work?",
        "GM",
        "When moon?",
        "What makes ZENT different from pump.fun?",
    ]
    
    print("\n" + "="*50)
    print("🧪 Running Batch Tests")
    print("="*50 + "\n")
    
    for question in test_questions:
        print(f"❓ {question}")
        response = chat(model, tokenizer, question)
        print(f"🤖 {response}")
        print("-"*50 + "\n")


def main():
    parser = argparse.ArgumentParser(description="ZENT AGENTIC Inference")
    parser.add_argument(
        "--model",
        type=str,
        default="./zent-agentic-7b-merged",
        help="Path to model or Hugging Face model ID"
    )
    parser.add_argument(
        "--test",
        action="store_true",
        help="Run batch tests instead of interactive chat"
    )
    
    args = parser.parse_args()
    
    model, tokenizer = load_model(args.model)
    
    if args.test:
        batch_test(model, tokenizer)
    else:
        interactive_chat(model, tokenizer)


if __name__ == "__main__":
    main()