#!/usr/bin/env python3 """ CUDA-optimized basic usage examples for Ursa Minor Smashed model """ import sys import os sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import torch from inference_cuda import generate_direct, load_model_direct def run_basic_examples(): """Run basic CUDA-optimized usage examples""" if not torch.cuda.is_available(): print("ERROR: CUDA is not available. Use basic_usage_cpu.py for CPU inference.") return print("šŸš€ Ursa Minor Smashed - CUDA Basic Usage Examples") print("=" * 60) # Load model once for all examples print("Loading model on CUDA...") model = load_model_direct("model_optimized.pt") print("āœ… Model loaded!\n") examples = [ { "name": "Creative Writing", "prompt": "In a world where artificial intelligence has", "params": {"max_new_tokens": 150, "temperature": 0.9, "top_k": 50} }, { "name": "Code Generation", "prompt": "def fibonacci(n):", "params": {"max_new_tokens": 120, "temperature": 0.4, "top_k": 40} }, { "name": "Explanation", "prompt": "Explain how neural networks work:", "params": {"max_new_tokens": 200, "temperature": 0.7, "top_k": 50} }, { "name": "Story Continuation", "prompt": "The spaceship landed on the mysterious planet, and the crew discovered", "params": {"max_new_tokens": 180, "temperature": 0.8, "top_k": 45} }, { "name": "Technical Writing", "prompt": "The benefits of using GPU acceleration include", "params": {"max_new_tokens": 100, "temperature": 0.6, "top_k": 40} } ] for i, example in enumerate(examples, 1): print(f"šŸ“ Example {i}: {example['name']}") print(f"šŸ’­ Prompt: {example['prompt']}") print("šŸ”„ Generating...") try: result = generate_direct( model, example['prompt'], **example['params'] ) print("✨ Result:") print("-" * 40) print(result) print("-" * 40) print() except Exception as e: print(f"āŒ Error: {e}") print() def run_interactive_mode(): """Run interactive mode for testing different parameters""" if not torch.cuda.is_available(): print("ERROR: CUDA is not available. Use basic_usage_cpu.py for CPU inference.") return print("\nšŸŽ® Interactive Mode") print("=" * 30) # Load model print("Loading model on CUDA...") model = load_model_direct("model_optimized.pt") print("āœ… Model loaded!") print("\nCommands:") print("- Enter a prompt to generate text") print("- Type 'params' to change generation parameters") print("- Type 'quit' to exit") print() # Default parameters optimized for CUDA params = { "max_new_tokens": 100, "temperature": 0.8, "top_k": 50, "top_p": 0.9, "repetition_penalty": 1.1 } while True: user_input = input("šŸŽÆ Prompt (or command): ").strip() if user_input.lower() == 'quit': print("šŸ‘‹ Goodbye!") break elif user_input.lower() == 'params': print("\nCurrent parameters:") for key, value in params.items(): print(f" {key}: {value}") print("\nEnter new values (press Enter to keep current):") for key in params: new_value = input(f" {key} [{params[key]}]: ").strip() if new_value: try: if key == "max_new_tokens" or key == "top_k": params[key] = int(new_value) else: params[key] = float(new_value) except ValueError: print(f"Invalid value for {key}, keeping current value") print() continue elif user_input == "": continue # Generate text try: print("šŸ”„ Generating...") result = generate_direct(model, user_input, **params) print("✨ Result:") print("-" * 40) print(result) print("-" * 40) print() except Exception as e: print(f"āŒ Error: {e}") print() def main(): """Main function""" print("Choose mode:") print("1. Run basic examples") print("2. Interactive mode") try: choice = input("Enter choice (1 or 2): ").strip() if choice == "1": run_basic_examples() elif choice == "2": run_interactive_mode() else: print("Invalid choice. Running basic examples...") run_basic_examples() except KeyboardInterrupt: print("\nšŸ‘‹ Goodbye!") if __name__ == "__main__": main()