"""Pytest configuration and fixtures for prolewiki-llm tests.""" import os from pathlib import Path import pytest # ============================================================================= # Module-Level Setup for train_headless.py Tests # ============================================================================= # train_headless.py reads required env vars (HF_TOKEN, WANDB_API_KEY) at import time. # We need to set dummy values BEFORE pytest collects tests that import this module. # This must be done at module level, not in a fixture, because fixtures run # after collection and the import happens during collection. if "HF_TOKEN" not in os.environ: os.environ["HF_TOKEN"] = "test-token-for-unit-tests" if "WANDB_API_KEY" not in os.environ: os.environ["WANDB_API_KEY"] = "test-key-for-unit-tests" @pytest.fixture def sample_question() -> str: """Sample Marxist theory question for testing.""" return "What is the difference between revisionism and Marxism-Leninism?" @pytest.fixture def sample_good_answer() -> str: """Sample well-structured answer for testing rewards.""" return """ The question asks about the distinction between revisionism and Marxism-Leninism, which requires explaining both concepts and their historical relationship. Revisionism refers to attempts to revise or water down the fundamental principles of Marxism, particularly the necessity of class struggle and proletarian dictatorship. It emerged historically with Eduard Bernstein who rejected revolutionary change in favor of gradual reform within capitalism. Marxism-Leninism, in contrast, upholds the core tenets of scientific socialism: the class nature of the state, the necessity of socialist revolution, and the dictatorship of the proletariat as the transitional form to communism. The key distinction lies in their approach to the capitalist state - revisionists seek to reform it, while Marxist-Leninists understand it must be replaced by a workers' state through revolutionary action.""" @pytest.fixture def sample_bad_answer() -> str: """Sample buzzword-heavy answer lacking depth.""" return """Revisionism is bad and Marxism-Leninism is good. We must fight imperialism and support the working class. Solidarity forever! The revolution will triumph against capitalism and fascism. Workers of the world unite!""" # ============================================================================= # Integration Test Fixtures # ============================================================================= @pytest.fixture def project_root() -> Path: """Return the project root directory.""" return Path(__file__).parent.parent @pytest.fixture def mock_bin_dir(tmp_path: Path) -> Path: """Create mock bin directory with executable stubs for shell script testing. Copies mock scripts from tests/fixtures/mock_bin/ to a temporary directory and makes them executable. """ mock_bin = tmp_path / "mock_bin" mock_bin.mkdir() # Copy mock scripts from fixtures fixtures_dir = Path(__file__).parent / "fixtures" / "mock_bin" if fixtures_dir.exists(): for script in fixtures_dir.iterdir(): if script.is_file(): dest = mock_bin / script.name dest.write_text(script.read_text()) dest.chmod(0o755) # Make executable return mock_bin @pytest.fixture def start_sh_env(tmp_path: Path, mock_bin_dir: Path) -> dict[str, str]: """Base environment for start.sh integration tests. Provides a controlled environment with: - PATH pointing to mock binaries first - Log directory for capturing mock invocations - Default success values for mock commands - Output directories in tmp_path (not /workspace) """ log_dir = tmp_path / "logs" log_dir.mkdir() return { "PATH": f"{mock_bin_dir}:/usr/bin:/bin", "HOME": str(tmp_path), "MOCK_LOG_DIR": str(log_dir), "MOCK_CUDA_AVAILABLE": "1", "MOCK_TRAINING_EXIT_CODE": "0", # Override /workspace paths to use tmp_path "CHECKPOINT_DIR": str(tmp_path / "checkpoints"), "LORA_OUTPUT": str(tmp_path / "lora-output"), "OUTPUT_DIR": str(tmp_path / "outputs"), }