| # This script tests loading and saving a small DeepSeek model using PyTorch and transformers. | |
| # Purpose: Verify setup for Veltraxor LLM project, ensuring compatibility for DeepSeek R1 fine-tuning. | |
| # Run in local VS Code (CPU mode) to test code logic; actual fine-tuning on cloud (Colab Pro). | |
| # Requirements: PyTorch (CPU version installed), transformers, datasets (pip installed). | |
| # No errors: Tested logic is simple and robust for zero-foundation users. | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| import torch | |
| try: | |
| # Define model name (small test model, ~2GB, compatible with DeepSeek R1 structure) | |
| model_name = "deepseek-ai/DeepSeek-Coder-V2-Lite-Base" # Use this small base for testing; replace with DeepSeek R1 in cloud | |
| # Load the model and tokenizer | |
| model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True) | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| # Save to local directory for verification | |
| save_path = "./test_model" | |
| model.save_pretrained(save_path) | |
| tokenizer.save_pretrained(save_path) | |
| # Print success message and basic info | |
| print("Model loaded and saved successfully!") | |
| print("Model type:", type(model)) | |
| print("PyTorch version:", torch.__version__) | |
| print("CUDA available:", torch.cuda.is_available()) # False on CPU, True on GPU/cloud | |
| except Exception as e: | |
| # Basic error handling for debugging | |
| print("Error occurred:", str(e)) | |
| print("Check: Ensure transformers is installed and internet connection for download.") | |
| # Next: Adjust code as needed (e.g., add LoRA for fine-tuning), upload to Colab Pro for DeepSeek R1 (16B). |