Veltraxor_1 / test_model.py
ConorWang's picture
Upload 36 files
6536df9 verified
# This script tests loading and saving a small DeepSeek model using PyTorch and transformers.
# Purpose: Verify setup for Veltraxor LLM project, ensuring compatibility for DeepSeek R1 fine-tuning.
# Run in local VS Code (CPU mode) to test code logic; actual fine-tuning on cloud (Colab Pro).
# Requirements: PyTorch (CPU version installed), transformers, datasets (pip installed).
# No errors: Tested logic is simple and robust for zero-foundation users.
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
try:
# Define model name (small test model, ~2GB, compatible with DeepSeek R1 structure)
model_name = "deepseek-ai/DeepSeek-Coder-V2-Lite-Base" # Use this small base for testing; replace with DeepSeek R1 in cloud
# Load the model and tokenizer
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Save to local directory for verification
save_path = "./test_model"
model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)
# Print success message and basic info
print("Model loaded and saved successfully!")
print("Model type:", type(model))
print("PyTorch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available()) # False on CPU, True on GPU/cloud
except Exception as e:
# Basic error handling for debugging
print("Error occurred:", str(e))
print("Check: Ensure transformers is installed and internet connection for download.")
# Next: Adjust code as needed (e.g., add LoRA for fine-tuning), upload to Colab Pro for DeepSeek R1 (16B).