import os import sys # Ensure project root and src are on sys.path for tests PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) SRC_PATH = os.path.join(PROJECT_ROOT, "src") if PROJECT_ROOT not in sys.path: sys.path.insert(0, PROJECT_ROOT) if SRC_PATH not in sys.path: sys.path.insert(0, SRC_PATH) # Set environment variables to disable ChromaDB telemetry os.environ["ANONYMIZED_TELEMETRY"] = "False" os.environ["CHROMA_TELEMETRY"] = "False" # Mark that pytest is running so startup routines can skip external HF/network calls os.environ["PYTEST_RUNNING"] = "1" # Ensure CI/local test runs don't accidentally use real HF creds from developer environment for _var in ("HF_TOKEN", "OPENROUTER_API_KEY", "GROQ_API_KEY", "ENABLE_HF_SERVICES", "HF_DATASET_NAME"): os.environ.pop(_var, None) from typing import List, Optional # noqa: E402 from unittest.mock import MagicMock, patch # noqa: E402 import pytest # noqa: E402 @pytest.fixture(scope="session", autouse=True) def disable_chromadb_telemetry(): """Disable ChromaDB telemetry to avoid errors in tests""" patches = [] try: # Patch multiple telemetry-related functions patches.extend( [ patch( "chromadb.telemetry.product.posthog.capture", return_value=None, ), patch( "chromadb.telemetry.product.posthog.Posthog.capture", return_value=None, ), patch( "chromadb.telemetry.product.posthog.Posthog", return_value=MagicMock(), ), patch("chromadb.configure", return_value=None), ] ) for p in patches: p.start() yield except (ImportError, AttributeError): # If modules don't exist, continue without patching yield finally: for p in patches: try: p.stop() except Exception: pass @pytest.fixture def app(): """Flask application fixture.""" # Import the Flask app lazily here so autouse fixtures (e.g. mock_embedding_service) # can apply their patches before the application and its modules are imported. from app import app as flask_app # noqa: E402 # Clear any cached services before each test to prevent state contamination flask_app.config["RAG_PIPELINE"] = None flask_app.config["INGESTION_PIPELINE"] = None flask_app.config["SEARCH_SERVICE"] = None # Also clear any module-level caches that might exist import sys modules_to_clear = [ "src.rag.rag_pipeline", "src.llm.llm_service", "src.search.search_service", "src.embedding.embedding_service", "src.vector_store.vector_db", ] for module_name in modules_to_clear: if module_name in sys.modules: # Clear any cached instances on the module module = sys.modules[module_name] for attr_name in dir(module): attr = getattr(module, attr_name) if hasattr(attr, "__dict__") and not attr_name.startswith("_"): # Clear instance dictionaries that might contain cached data if hasattr(attr, "_instances"): attr._instances = {} yield flask_app @pytest.fixture def client(app): """Flask test client fixture.""" return app.test_client() @pytest.fixture(autouse=True) def reset_mock_state(): """Fixture to reset any global mock state between tests.""" yield # Clean up any lingering mock state after each test import unittest.mock # Clear any patches that might have been left hanging unittest.mock.patch.stopall() class FakeEmbeddingService: """A mock embedding service that returns dummy data without loading a real model. Compatible with both legacy EmbeddingService and new HFEmbeddingService interfaces. """ def __init__( self, model_name: Optional[str] = None, device: Optional[str] = None, batch_size: Optional[int] = None, ): """Initializes the fake service. Ignores parameters and provides sensible defaults. """ self.model_name = model_name or "intfloat/multilingual-e5-large" self.device = device or "cpu" self.batch_size = batch_size or 32 self.dim = 1024 # HF multilingual-e5-large dimension self.hf_token = "fake_token" # For HF service compatibility self.api_url = f"https://router.huggingface.co/hf-inference/models/{self.model_name}" self.headers = {"Authorization": "Bearer fake_token"} # Legacy EmbeddingService interface def embed_text(self, text: str): """Returns a dummy embedding for a single text.""" return [0.1] * self.dim def embed_texts(self, texts: List[str]): """Returns a list of dummy embeddings for multiple texts.""" return [[0.1] * self.dim for _ in texts] # HF EmbeddingService interface def get_embeddings(self, texts: List[str]): """Returns a list of dummy embeddings for multiple texts (HF interface).""" return [[0.1] * self.dim for _ in texts] def get_embedding(self, text: str): """Returns a dummy embedding for a single text (HF interface).""" return [0.1] * self.dim # Common interface methods def get_embedding_dimension(self): """Returns the fixed dimension of the dummy embeddings.""" return self.dim def health_check(self): """Returns True for health checks.""" return True @pytest.fixture(autouse=True) def mock_embedding_service(monkeypatch): """ Automatically replace the real HF services with fake ones for testing. This fixture works with the hybrid architecture using HF services. """ # Mock HF Embedding Service (new hybrid architecture) try: monkeypatch.setattr( "src.embedding.hf_embedding_service.HFEmbeddingService", FakeEmbeddingService, ) except (ImportError, AttributeError): pass # HF service may not exist in all test contexts # Mock legacy embedding service if it exists try: monkeypatch.setattr( "src.embedding.embedding_service.EmbeddingService", FakeEmbeddingService, ) except (ImportError, AttributeError): pass # Legacy service may not exist in hybrid architecture # Mock in ingestion pipeline (only if the import exists) try: import src.ingestion.ingestion_pipeline if hasattr(src.ingestion.ingestion_pipeline, "EmbeddingService"): monkeypatch.setattr( "src.ingestion.ingestion_pipeline.EmbeddingService", FakeEmbeddingService, ) except (ImportError, AttributeError): pass