Spaces:
Sleeping
Sleeping
| from src.training import train_model | |
| from src.model import load_model | |
| from src.dataset import formatting_prompts_func | |
| from datasets import Dataset | |
| import pytest | |
| import torch | |
| def test_gpu_feature(): | |
| # Your test code that needs a GPU | |
| assert torch.cuda.is_available() | |
| def mock_dataset(): | |
| """Fixture to provide a mock dataset for training""" | |
| data = { | |
| "instruction": ["Test instruction 1", "Test instruction 2"], | |
| "input": ["Test input 1", "Test input 2"], | |
| "output": ["Test output 1", "Test output 2"] | |
| } | |
| formatted_data = formatting_prompts_func(data, template="Instruction: {}\nInput: {}\nOutput: {}", eos_token="<EOS>") | |
| return Dataset.from_dict(formatted_data) | |
| def test_train_model(mock_dataset): | |
| """Test to ensure the training model function works with a mock dataset""" | |
| # Load model | |
| model_name = "unsloth/Meta-Llama-3.1-8B" | |
| model, tokenizer = load_model(model_name, 16, None, True, {'': 0}) | |
| # Training arguments | |
| training_args = { | |
| "max_steps": 1, | |
| "output_dir": "outputs" | |
| } | |
| # Train the model | |
| train_stats = train_model( | |
| model=model, | |
| tokenizer=tokenizer, | |
| train_dataset=mock_dataset, | |
| dataset_text_field="text", | |
| max_seq_length=16, | |
| dataset_num_proc=1, | |
| packing=False, | |
| training_args=training_args | |
| ) | |
| # Assert that training statistics are returned | |
| assert train_stats is not None | |
| # Optionally, check for specific fields in `train_stats` (e.g., loss, global_step) | |
| # Since trainer.train() returns an object that has 'global_step' and 'train_loss', we can assert them | |
| assert hasattr(train_stats, "global_step") | |
| assert hasattr(train_stats, "train_loss") | |
| # For further validation, assert that the model directory was created (outputs directory) | |
| assert "outputs" in train_stats.args.output_dir | |