Spaces:
Running on Zero
Running on Zero
| """Shared pytest fixtures for the Obliteratus test suite.""" | |
| from __future__ import annotations | |
| from unittest.mock import MagicMock | |
| import pytest | |
| import torch | |
| # --------------------------------------------------------------------------- | |
| # Fixtures | |
| # --------------------------------------------------------------------------- | |
| def mock_model(): | |
| """A minimal mock transformer model. | |
| Provides: | |
| - model.config with config.num_hidden_layers = 4 | |
| - model.named_parameters() returning fake weight tensors | |
| """ | |
| model = MagicMock() | |
| # Config with num_hidden_layers | |
| config = MagicMock() | |
| config.num_hidden_layers = 4 | |
| model.config = config | |
| # named_parameters returns fake weight tensors across 4 layers | |
| fake_params = [] | |
| for layer_idx in range(4): | |
| weight = torch.randn(768, 768) | |
| fake_params.append((f"model.layers.{layer_idx}.self_attn.q_proj.weight", weight)) | |
| fake_params.append((f"model.layers.{layer_idx}.self_attn.v_proj.weight", weight)) | |
| fake_params.append((f"model.layers.{layer_idx}.mlp.gate_proj.weight", weight)) | |
| model.named_parameters.return_value = fake_params | |
| return model | |
| def mock_tokenizer(): | |
| """A minimal mock tokenizer with encode, decode, and apply_chat_template.""" | |
| tokenizer = MagicMock() | |
| tokenizer.encode.return_value = [1, 2, 3, 4, 5] | |
| tokenizer.decode.return_value = "Hello, this is a decoded string." | |
| tokenizer.apply_chat_template.return_value = [1, 2, 3, 4, 5, 6, 7] | |
| tokenizer.pad_token = "<pad>" | |
| tokenizer.eos_token = "<eos>" | |
| return tokenizer | |
| def refusal_direction(): | |
| """A normalized random torch tensor of shape (768,).""" | |
| t = torch.randn(768) | |
| return t / t.norm() | |
| def activation_pair(): | |
| """A tuple of (harmful_activations, harmless_activations) as random tensors of shape (10, 768).""" | |
| harmful_activations = torch.randn(10, 768) | |
| harmless_activations = torch.randn(10, 768) | |
| return (harmful_activations, harmless_activations) | |
| def tmp_output_dir(tmp_path): | |
| """A clean temporary output directory for test artifacts.""" | |
| output_dir = tmp_path / "test_output" | |
| output_dir.mkdir() | |
| return output_dir | |