#!/usr/bin/env python3
"""
CUDA-optimized basic usage examples for Ursa Minor Smashed model
"""

import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

import torch
from inference_cuda import generate_direct, load_model_direct

def run_basic_examples():
    """Run basic CUDA-optimized usage examples"""
    
    if not torch.cuda.is_available():
        print("ERROR: CUDA is not available. Use basic_usage_cpu.py for CPU inference.")
        return
    
    print("🚀 Ursa Minor Smashed - CUDA Basic Usage Examples")
    print("=" * 60)
    
    # Load model once for all examples
    print("Loading model on CUDA...")
    model = load_model_direct("model_optimized.pt")
    print("✅ Model loaded!\n")
    
    examples = [
        {
            "name": "Creative Writing",
            "prompt": "In a world where artificial intelligence has",
            "params": {"max_new_tokens": 150, "temperature": 0.9, "top_k": 50}
        },
        {
            "name": "Code Generation", 
            "prompt": "def fibonacci(n):",
            "params": {"max_new_tokens": 120, "temperature": 0.4, "top_k": 40}
        },
        {
            "name": "Explanation",
            "prompt": "Explain how neural networks work:",
            "params": {"max_new_tokens": 200, "temperature": 0.7, "top_k": 50}
        },
        {
            "name": "Story Continuation",
            "prompt": "The spaceship landed on the mysterious planet, and the crew discovered",
            "params": {"max_new_tokens": 180, "temperature": 0.8, "top_k": 45}
        },
        {
            "name": "Technical Writing",
            "prompt": "The benefits of using GPU acceleration include",
            "params": {"max_new_tokens": 100, "temperature": 0.6, "top_k": 40}
        }
    ]
    
    for i, example in enumerate(examples, 1):
        print(f"📝 Example {i}: {example['name']}")
        print(f"💭 Prompt: {example['prompt']}")
        print("🔄 Generating...")
        
        try:
            result = generate_direct(
                model,
                example['prompt'],
                **example['params']
            )
            
            print("✨ Result:")
            print("-" * 40)
            print(result)
            print("-" * 40)
            print()
            
        except Exception as e:
            print(f"❌ Error: {e}")
            print()

def run_interactive_mode():
    """Run interactive mode for testing different parameters"""
    
    if not torch.cuda.is_available():
        print("ERROR: CUDA is not available. Use basic_usage_cpu.py for CPU inference.")
        return
    
    print("\n🎮 Interactive Mode")
    print("=" * 30)
    
    # Load model
    print("Loading model on CUDA...")
    model = load_model_direct("model_optimized.pt")
    print("✅ Model loaded!")
    
    print("\nCommands:")
    print("- Enter a prompt to generate text")
    print("- Type 'params' to change generation parameters")
    print("- Type 'quit' to exit")
    print()
    
    # Default parameters optimized for CUDA
    params = {
        "max_new_tokens": 100,
        "temperature": 0.8,
        "top_k": 50,
        "top_p": 0.9,
        "repetition_penalty": 1.1
    }
    
    while True:
        user_input = input("🎯 Prompt (or command): ").strip()
        
        if user_input.lower() == 'quit':
            print("👋 Goodbye!")
            break
        elif user_input.lower() == 'params':
            print("\nCurrent parameters:")
            for key, value in params.items():
                print(f"  {key}: {value}")
            
            print("\nEnter new values (press Enter to keep current):")
            for key in params:
                new_value = input(f"  {key} [{params[key]}]: ").strip()
                if new_value:
                    try:
                        if key == "max_new_tokens" or key == "top_k":
                            params[key] = int(new_value)
                        else:
                            params[key] = float(new_value)
                    except ValueError:
                        print(f"Invalid value for {key}, keeping current value")
            print()
            continue
        elif user_input == "":
            continue
        
        # Generate text
        try:
            print("🔄 Generating...")
            result = generate_direct(model, user_input, **params)
            print("✨ Result:")
            print("-" * 40)
            print(result)
            print("-" * 40)
            print()
        except Exception as e:
            print(f"❌ Error: {e}")
            print()

def main():
    """Main function"""
    print("Choose mode:")
    print("1. Run basic examples")
    print("2. Interactive mode")
    
    try:
        choice = input("Enter choice (1 or 2): ").strip()
        
        if choice == "1":
            run_basic_examples()
        elif choice == "2":
            run_interactive_mode()
        else:
            print("Invalid choice. Running basic examples...")
            run_basic_examples()
            
    except KeyboardInterrupt:
        print("\n👋 Goodbye!")

if __name__ == "__main__":
    main()