| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| """Smoke tests for Cosmos-Embed1 including Transformer Engine support.""" |
|
|
| |
| import os |
|
|
| import pytest |
| import torch |
| from transformers import AutoConfig, AutoModel, AutoProcessor, AutoTokenizer |
|
|
| MODEL_PATH = os.environ.get("COSMOS_EMBED1_MODEL_PATH", ".") |
|
|
|
|
| def test_smoke() -> None: |
| """Original smoke test for basic functionality.""" |
|
|
| preprocess = AutoProcessor.from_pretrained(MODEL_PATH, trust_remote_code=True) |
| model = AutoModel.from_pretrained(MODEL_PATH, trust_remote_code=True).to("cuda", dtype=torch.bfloat16) |
|
|
| with torch.no_grad(): |
| text_inputs = preprocess(text=["a cat", "a dog"]).to("cuda", dtype=torch.bfloat16) |
| text_out = model.get_text_embeddings(**text_inputs) |
| assert text_out.text_proj.shape == (2, 256) |
|
|
| video_inputs = preprocess(videos=torch.randint(0, 255, size=(2, 8, 3, 224, 224))).to( |
| "cuda", dtype=torch.bfloat16 |
| ) |
| video_out = model.get_video_embeddings(**video_inputs) |
| assert video_out.visual_proj.shape == (2, 256) |
|
|
| text_video_inputs = preprocess( |
| text=["a cat", "a dog"], |
| videos=torch.randint(0, 255, size=(2, 8, 3, 448, 448)), |
| ).to("cuda", dtype=torch.bfloat16) |
| text_video_out = model(**text_video_inputs) |
| assert text_video_out.text_proj.shape == text_video_out.visual_proj.shape == (2, 256) |
|
|
| tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True) |
| assert len(tokenizer) == 30523 |
|
|
| |
| del model |
| torch.cuda.empty_cache() |
|
|
|
|
| def test_transformer_engine_available(): |
| """Test if Transformer Engine is available.""" |
| try: |
| import transformer_engine.pytorch as te |
|
|
| |
| assert True |
| except ImportError: |
| pytest.skip("Transformer Engine not available, skipping TE tests") |
|
|
|
|
| def test_load_standard_model(): |
| """Test loading the standard (non-TE) model.""" |
| model = AutoModel.from_pretrained(MODEL_PATH, trust_remote_code=True, torch_dtype=torch.bfloat16) |
| assert model.transformer_engine == False |
| assert hasattr(model, "visual_encoder") |
| assert hasattr(model, "qformer") |
|
|
| |
| del model |
| torch.cuda.empty_cache() |
|
|
|
|
| def test_load_transformer_engine_model(): |
| """Test loading model with Transformer Engine enabled.""" |
| try: |
| import transformer_engine.pytorch as te |
| except ImportError: |
| pytest.skip("Transformer Engine not available, skipping TE tests") |
|
|
| |
| config = AutoConfig.from_pretrained(MODEL_PATH) |
| config.transformer_engine = True |
| config.use_fp8 = False |
|
|
| model = AutoModel.from_pretrained(MODEL_PATH, config=config, trust_remote_code=True, torch_dtype=torch.bfloat16) |
|
|
| assert model.transformer_engine == True |
| assert model.use_fp8 == False |
| assert hasattr(model, "visual_encoder") |
| assert hasattr(model, "qformer") |
|
|
| |
| del model |
| torch.cuda.empty_cache() |
|
|
|
|
| @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available, skipping GPU test") |
| def test_transformer_engine_inference(): |
| """Test inference with Transformer Engine model.""" |
| try: |
| import transformer_engine.pytorch as te |
| except ImportError: |
| pytest.skip("Transformer Engine not available, skipping TE tests") |
|
|
| |
| preprocess = AutoProcessor.from_pretrained(MODEL_PATH, trust_remote_code=True) |
| text_inputs = preprocess(text=["a cat"]).to("cuda", dtype=torch.bfloat16) |
|
|
| |
| model_standard = AutoModel.from_pretrained(MODEL_PATH, trust_remote_code=True, torch_dtype=torch.bfloat16).to( |
| "cuda" |
| ) |
|
|
| with torch.no_grad(): |
| text_out_std = model_standard.get_text_embeddings(**text_inputs) |
|
|
| |
| del model_standard |
| torch.cuda.empty_cache() |
|
|
| |
| config = AutoConfig.from_pretrained(MODEL_PATH) |
| config.transformer_engine = True |
| config.use_fp8 = False |
|
|
| model_te = AutoModel.from_pretrained( |
| MODEL_PATH, config=config, trust_remote_code=True, torch_dtype=torch.bfloat16 |
| ).to("cuda") |
|
|
| with torch.no_grad(): |
| text_out_te = model_te.get_text_embeddings(**text_inputs) |
|
|
| |
| assert text_out_std.text_proj.shape == text_out_te.text_proj.shape |
| assert text_out_std.text_proj.shape == (1, 256) |
|
|
| |
| del model_te |
| torch.cuda.empty_cache() |
|
|
|
|
| @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available, skipping GPU test") |
| def test_transformer_engine_fp8(): |
| """Test loading model with Transformer Engine + FP8 (requires substantial GPU memory).""" |
| try: |
| import transformer_engine.pytorch as te |
| except ImportError: |
| pytest.skip("Transformer Engine not available, skipping FP8 tests") |
|
|
| |
| torch.cuda.empty_cache() |
|
|
| config = AutoConfig.from_pretrained(MODEL_PATH) |
| config.transformer_engine = True |
| config.use_fp8 = True |
|
|
| model = AutoModel.from_pretrained(MODEL_PATH, config=config, trust_remote_code=True, torch_dtype=torch.bfloat16) |
|
|
| assert model.transformer_engine == True |
| assert model.use_fp8 == True |
|
|
| |
| del model |
| torch.cuda.empty_cache() |
|
|
|
|
| def test_transformer_engine_config_validation(): |
| """Test configuration validation for Transformer Engine.""" |
|
|
| |
| config = AutoConfig.from_pretrained(MODEL_PATH) |
| config.transformer_engine = False |
| config.use_fp8 = True |
|
|
| with pytest.raises(ValueError, match="transformer_engine.*must be enabled.*use_fp8"): |
| from modeling_vit import VisionTransformer |
|
|
| VisionTransformer(transformer_engine=False, use_fp8=True) |
|
|