Spaces:
Sleeping
Sleeping
| from src.inference import prepare_inference_inputs, generate_responses | |
| from src.model import load_model | |
| import pytest | |
| import torch | |
| def test_gpu_feature(): | |
| # Your test code that needs a GPU | |
| assert torch.cuda.is_available() | |
| def model_and_tokenizer(): | |
| """Fixture to load model and tokenizer for inference""" | |
| model_name = "unsloth/Meta-Llama-3.1-8B" | |
| model, tokenizer = load_model(model_name, 16, None, True, {'': 0}) | |
| return model, tokenizer | |
| def test_inference(model_and_tokenizer): | |
| model, tokenizer = model_and_tokenizer | |
| # Test input values | |
| instruction = "What is your name?" | |
| input_text = "Tell me about yourself." | |
| eos_token = "<EOS>" | |
| # Prepare inference inputs | |
| inputs = prepare_inference_inputs(tokenizer, "Instruction: {}\nInput: {}", instruction, input_text, eos_token) | |
| # Generate responses | |
| responses = generate_responses(model, inputs, tokenizer, max_new_tokens=32) | |
| # Assertions | |
| assert isinstance(responses, list), f"Expected list, but got {type(responses)}" | |
| assert len(responses) > 0, "Expected non-empty responses list" | |
| assert isinstance(responses[0], str), f"Expected string, but got {type(responses[0])}" | |
| assert len(responses[0]) > 0, "Expected non-empty string response" | |
| # Optionally, assert that the response matches some expected pattern or content | |
| assert "name" in responses[0].lower(), "Response does not contain expected content" | |