test_model.py · Veltraxor/Veltraxor

Veltraxor_1 / test_model.py

Upload 36 files

6536df9 verified 8 months ago

1.69 kB

	# This script tests loading and saving a small DeepSeek model using PyTorch and transformers.
	# Purpose: Verify setup for Veltraxor LLM project, ensuring compatibility for DeepSeek R1 fine-tuning.
	# Run in local VS Code (CPU mode) to test code logic; actual fine-tuning on cloud (Colab Pro).
	# Requirements: PyTorch (CPU version installed), transformers, datasets (pip installed).
	# No errors: Tested logic is simple and robust for zero-foundation users.

	from transformers import AutoModelForCausalLM, AutoTokenizer
	import torch

	try:
	# Define model name (small test model, ~2GB, compatible with DeepSeek R1 structure)
	model_name = "deepseek-ai/DeepSeek-Coder-V2-Lite-Base" # Use this small base for testing; replace with DeepSeek R1 in cloud

	# Load the model and tokenizer
	model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
	tokenizer = AutoTokenizer.from_pretrained(model_name)

	# Save to local directory for verification
	save_path = "./test_model"
	model.save_pretrained(save_path)
	tokenizer.save_pretrained(save_path)

	# Print success message and basic info
	print("Model loaded and saved successfully!")
	print("Model type:", type(model))
	print("PyTorch version:", torch.__version__)
	print("CUDA available:", torch.cuda.is_available()) # False on CPU, True on GPU/cloud

	except Exception as e:
	# Basic error handling for debugging
	print("Error occurred:", str(e))
	print("Check: Ensure transformers is installed and internet connection for download.")

	# Next: Adjust code as needed (e.g., add LoRA for fine-tuning), upload to Colab Pro for DeepSeek R1 (16B).