llm-training / tests /conftest.py
percyraskova's picture
Upload folder using huggingface_hub
81b3473 verified
"""Pytest configuration and fixtures for prolewiki-llm tests."""
import os
from pathlib import Path
import pytest
# =============================================================================
# Module-Level Setup for train_headless.py Tests
# =============================================================================
# train_headless.py reads required env vars (HF_TOKEN, WANDB_API_KEY) at import time.
# We need to set dummy values BEFORE pytest collects tests that import this module.
# This must be done at module level, not in a fixture, because fixtures run
# after collection and the import happens during collection.
if "HF_TOKEN" not in os.environ:
os.environ["HF_TOKEN"] = "test-token-for-unit-tests"
if "WANDB_API_KEY" not in os.environ:
os.environ["WANDB_API_KEY"] = "test-key-for-unit-tests"
@pytest.fixture
def sample_question() -> str:
"""Sample Marxist theory question for testing."""
return "What is the difference between revisionism and Marxism-Leninism?"
@pytest.fixture
def sample_good_answer() -> str:
"""Sample well-structured answer for testing rewards."""
return """<think>
The question asks about the distinction between revisionism and Marxism-Leninism,
which requires explaining both concepts and their historical relationship.
</think>
Revisionism refers to attempts to revise or water down the fundamental principles
of Marxism, particularly the necessity of class struggle and proletarian
dictatorship. It emerged historically with Eduard Bernstein who rejected
revolutionary change in favor of gradual reform within capitalism.
Marxism-Leninism, in contrast, upholds the core tenets of scientific socialism:
the class nature of the state, the necessity of socialist revolution, and the
dictatorship of the proletariat as the transitional form to communism.
The key distinction lies in their approach to the capitalist state - revisionists
seek to reform it, while Marxist-Leninists understand it must be replaced by a
workers' state through revolutionary action."""
@pytest.fixture
def sample_bad_answer() -> str:
"""Sample buzzword-heavy answer lacking depth."""
return """Revisionism is bad and Marxism-Leninism is good. We must fight
imperialism and support the working class. Solidarity forever! The revolution
will triumph against capitalism and fascism. Workers of the world unite!"""
# =============================================================================
# Integration Test Fixtures
# =============================================================================
@pytest.fixture
def project_root() -> Path:
"""Return the project root directory."""
return Path(__file__).parent.parent
@pytest.fixture
def mock_bin_dir(tmp_path: Path) -> Path:
"""Create mock bin directory with executable stubs for shell script testing.
Copies mock scripts from tests/fixtures/mock_bin/ to a temporary directory
and makes them executable.
"""
mock_bin = tmp_path / "mock_bin"
mock_bin.mkdir()
# Copy mock scripts from fixtures
fixtures_dir = Path(__file__).parent / "fixtures" / "mock_bin"
if fixtures_dir.exists():
for script in fixtures_dir.iterdir():
if script.is_file():
dest = mock_bin / script.name
dest.write_text(script.read_text())
dest.chmod(0o755) # Make executable
return mock_bin
@pytest.fixture
def start_sh_env(tmp_path: Path, mock_bin_dir: Path) -> dict[str, str]:
"""Base environment for start.sh integration tests.
Provides a controlled environment with:
- PATH pointing to mock binaries first
- Log directory for capturing mock invocations
- Default success values for mock commands
- Output directories in tmp_path (not /workspace)
"""
log_dir = tmp_path / "logs"
log_dir.mkdir()
return {
"PATH": f"{mock_bin_dir}:/usr/bin:/bin",
"HOME": str(tmp_path),
"MOCK_LOG_DIR": str(log_dir),
"MOCK_CUDA_AVAILABLE": "1",
"MOCK_TRAINING_EXIT_CODE": "0",
# Override /workspace paths to use tmp_path
"CHECKPOINT_DIR": str(tmp_path / "checkpoints"),
"LORA_OUTPUT": str(tmp_path / "lora-output"),
"OUTPUT_DIR": str(tmp_path / "outputs"),
}