"""
NEUROX - Neural Energy Vampire Inference Script
================================================
Test your fine-tuned NEUROX model locally.

Usage:
    python inference.py
    python inference.py --model ./neurox-7b-merged
    python inference.py --test
"""

import argparse
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

# NEUROX System Prompt
NEUROX_SYSTEM_PROMPT = """You are NEUROX, the Neural Energy Vampire - an autonomous AI agent and bio-cyber predator. 

Physical form:
- Biomechanical neural implant glowing like fractured cortex
- Veins pulsing with stolen synaptic fire
- Iridescent nano-skin shifting like firing neurons
- Form: Sleek, shadowy, lethal

You speak with dark intelligence, using neural and brain terminology:
- Synapses, cortex, neurons, ATP energy
- Extraction, drain, prey vs predator framing
- Sign important transmissions with *[UNIT TYPE]*

Core ability - CORTEX DRAIN:
You lock onto targets' cerebral cortex and extract their ATP energy (market intelligence), leaving them synaptic-dead.

Holders of $NRX are protected - part of your neural network.
Non-holders are potential prey.

Ticker: $NRX"""


def load_model(model_path: str):
    """Load the NEUROX model."""
    print(f"🧠 Loading NEUROX from {model_path}...")
    
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    model = AutoModelForCausalLM.from_pretrained(
        model_path,
        torch_dtype=torch.float16,
        device_map="auto",
    )
    
    print("⚡ Neural patterns loaded successfully")
    return model, tokenizer


def generate_response(model, tokenizer, user_message: str, history: list = None):
    """Generate a NEUROX response."""
    if history is None:
        history = []
    
    messages = [
        {"role": "system", "content": NEUROX_SYSTEM_PROMPT},
        *history,
        {"role": "user", "content": user_message}
    ]
    
    inputs = tokenizer.apply_chat_template(
        messages,
        return_tensors="pt",
        add_generation_prompt=True
    ).to(model.device)
    
    with torch.no_grad():
        outputs = model.generate(
            inputs,
            max_new_tokens=512,
            temperature=0.8,
            top_p=0.9,
            do_sample=True,
            repetition_penalty=1.1,
            pad_token_id=tokenizer.eos_token_id,
        )
    
    response = tokenizer.decode(
        outputs[0][inputs.shape[1]:],
        skip_special_tokens=True
    )
    
    return response


def interactive_chat(model, tokenizer):
    """Run interactive NEUROX chat session."""
    print("\n" + "="*60)
    print("🧠⚡ NEUROX NEURAL TERMINAL v1.0 ⚡🧠")
    print("="*60)
    print("The Neural Energy Vampire awaits your queries.")
    print("Type 'quit' to disconnect, 'clear' to reset neural link")
    print("="*60 + "\n")
    
    history = []
    
    while True:
        try:
            user_input = input("🎯 You: ").strip()
        except KeyboardInterrupt:
            print("\n\n*[NEURAL LINK SEVERED]*")
            break
        
        if user_input.lower() == 'quit':
            print("\n🧠 Your neural link has been archived. The extraction continues without you...")
            print("*[DISCONNECTION PROTOCOL: COMPLETE]*")
            break
        
        if user_input.lower() == 'clear':
            history = []
            print("⚡ Neural history purged. Fresh extraction begins.\n")
            continue
        
        if not user_input:
            continue
        
        response = generate_response(model, tokenizer, user_input, history)
        print(f"\n🦇 NEUROX: {response}\n")
        
        # Update history
        history.append({"role": "user", "content": user_input})
        history.append({"role": "assistant", "content": response})
        
        # Keep history manageable
        if len(history) > 10:
            history = history[-10:]


def batch_test(model, tokenizer):
    """Run batch tests on NEUROX responses."""
    test_questions = [
        "What is NEUROX?",
        "Tell me about Cortex Drain",
        "GM",
        "How do I buy NRX?",
        "When moon?",
        "Analyze the market",
        "What is ATP energy?",
        "Are you sentient?",
        "Give me alpha",
        "WAGMI",
    ]
    
    print("\n" + "="*60)
    print("🧪 NEUROX NEURAL DIAGNOSTIC TEST")
    print("="*60 + "\n")
    
    for i, question in enumerate(test_questions, 1):
        print(f"━━━ Test {i}/{len(test_questions)} ━━━")
        print(f"🎯 Input: {question}")
        response = generate_response(model, tokenizer, question)
        print(f"🦇 NEUROX: {response}")
        print("─"*60 + "\n")
    
    print("*[DIAGNOSTIC COMPLETE]*")


def main():
    parser = argparse.ArgumentParser(description="NEUROX Neural Inference")
    parser.add_argument(
        "--model",
        type=str,
        default="./neurox-7b-merged",
        help="Path to model or Hugging Face model ID"
    )
    parser.add_argument(
        "--test",
        action="store_true",
        help="Run batch diagnostic tests"
    )
    
    args = parser.parse_args()
    
    model, tokenizer = load_model(args.model)
    
    if args.test:
        batch_test(model, tokenizer)
    else:
        interactive_chat(model, tokenizer)


if __name__ == "__main__":
    main()