Spaces:
Sleeping
Sleeping
| import os | |
| import sys | |
| # Ensure project root and src are on sys.path for tests | |
| PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) | |
| SRC_PATH = os.path.join(PROJECT_ROOT, "src") | |
| if PROJECT_ROOT not in sys.path: | |
| sys.path.insert(0, PROJECT_ROOT) | |
| if SRC_PATH not in sys.path: | |
| sys.path.insert(0, SRC_PATH) | |
| # Set environment variables to disable ChromaDB telemetry | |
| os.environ["ANONYMIZED_TELEMETRY"] = "False" | |
| os.environ["CHROMA_TELEMETRY"] = "False" | |
| # Mark that pytest is running so startup routines can skip external HF/network calls | |
| os.environ["PYTEST_RUNNING"] = "1" | |
| # Ensure CI/local test runs don't accidentally use real HF creds from developer environment | |
| for _var in ("HF_TOKEN", "OPENROUTER_API_KEY", "GROQ_API_KEY", "ENABLE_HF_SERVICES", "HF_DATASET_NAME"): | |
| os.environ.pop(_var, None) | |
| from typing import List, Optional # noqa: E402 | |
| from unittest.mock import MagicMock, patch # noqa: E402 | |
| import pytest # noqa: E402 | |
| def disable_chromadb_telemetry(): | |
| """Disable ChromaDB telemetry to avoid errors in tests""" | |
| patches = [] | |
| try: | |
| # Patch multiple telemetry-related functions | |
| patches.extend( | |
| [ | |
| patch( | |
| "chromadb.telemetry.product.posthog.capture", | |
| return_value=None, | |
| ), | |
| patch( | |
| "chromadb.telemetry.product.posthog.Posthog.capture", | |
| return_value=None, | |
| ), | |
| patch( | |
| "chromadb.telemetry.product.posthog.Posthog", | |
| return_value=MagicMock(), | |
| ), | |
| patch("chromadb.configure", return_value=None), | |
| ] | |
| ) | |
| for p in patches: | |
| p.start() | |
| yield | |
| except (ImportError, AttributeError): | |
| # If modules don't exist, continue without patching | |
| yield | |
| finally: | |
| for p in patches: | |
| try: | |
| p.stop() | |
| except Exception: | |
| pass | |
| def app(): | |
| """Flask application fixture.""" | |
| # Import the Flask app lazily here so autouse fixtures (e.g. mock_embedding_service) | |
| # can apply their patches before the application and its modules are imported. | |
| from app import app as flask_app # noqa: E402 | |
| # Clear any cached services before each test to prevent state contamination | |
| flask_app.config["RAG_PIPELINE"] = None | |
| flask_app.config["INGESTION_PIPELINE"] = None | |
| flask_app.config["SEARCH_SERVICE"] = None | |
| # Also clear any module-level caches that might exist | |
| import sys | |
| modules_to_clear = [ | |
| "src.rag.rag_pipeline", | |
| "src.llm.llm_service", | |
| "src.search.search_service", | |
| "src.embedding.embedding_service", | |
| "src.vector_store.vector_db", | |
| ] | |
| for module_name in modules_to_clear: | |
| if module_name in sys.modules: | |
| # Clear any cached instances on the module | |
| module = sys.modules[module_name] | |
| for attr_name in dir(module): | |
| attr = getattr(module, attr_name) | |
| if hasattr(attr, "__dict__") and not attr_name.startswith("_"): | |
| # Clear instance dictionaries that might contain cached data | |
| if hasattr(attr, "_instances"): | |
| attr._instances = {} | |
| yield flask_app | |
| def client(app): | |
| """Flask test client fixture.""" | |
| return app.test_client() | |
| def reset_mock_state(): | |
| """Fixture to reset any global mock state between tests.""" | |
| yield | |
| # Clean up any lingering mock state after each test | |
| import unittest.mock | |
| # Clear any patches that might have been left hanging | |
| unittest.mock.patch.stopall() | |
| class FakeEmbeddingService: | |
| """A mock embedding service that returns dummy data without loading a real model. | |
| Compatible with both legacy EmbeddingService and new HFEmbeddingService interfaces. | |
| """ | |
| def __init__( | |
| self, | |
| model_name: Optional[str] = None, | |
| device: Optional[str] = None, | |
| batch_size: Optional[int] = None, | |
| ): | |
| """Initializes the fake service. | |
| Ignores parameters and provides sensible defaults. | |
| """ | |
| self.model_name = model_name or "intfloat/multilingual-e5-large" | |
| self.device = device or "cpu" | |
| self.batch_size = batch_size or 32 | |
| self.dim = 1024 # HF multilingual-e5-large dimension | |
| self.hf_token = "fake_token" # For HF service compatibility | |
| self.api_url = f"https://router.huggingface.co/hf-inference/models/{self.model_name}" | |
| self.headers = {"Authorization": "Bearer fake_token"} | |
| # Legacy EmbeddingService interface | |
| def embed_text(self, text: str): | |
| """Returns a dummy embedding for a single text.""" | |
| return [0.1] * self.dim | |
| def embed_texts(self, texts: List[str]): | |
| """Returns a list of dummy embeddings for multiple texts.""" | |
| return [[0.1] * self.dim for _ in texts] | |
| # HF EmbeddingService interface | |
| def get_embeddings(self, texts: List[str]): | |
| """Returns a list of dummy embeddings for multiple texts (HF interface).""" | |
| return [[0.1] * self.dim for _ in texts] | |
| def get_embedding(self, text: str): | |
| """Returns a dummy embedding for a single text (HF interface).""" | |
| return [0.1] * self.dim | |
| # Common interface methods | |
| def get_embedding_dimension(self): | |
| """Returns the fixed dimension of the dummy embeddings.""" | |
| return self.dim | |
| def health_check(self): | |
| """Returns True for health checks.""" | |
| return True | |
| def mock_embedding_service(monkeypatch): | |
| """ | |
| Automatically replace the real HF services with fake ones for testing. | |
| This fixture works with the hybrid architecture using HF services. | |
| """ | |
| # Mock HF Embedding Service (new hybrid architecture) | |
| try: | |
| monkeypatch.setattr( | |
| "src.embedding.hf_embedding_service.HFEmbeddingService", | |
| FakeEmbeddingService, | |
| ) | |
| except (ImportError, AttributeError): | |
| pass # HF service may not exist in all test contexts | |
| # Mock legacy embedding service if it exists | |
| try: | |
| monkeypatch.setattr( | |
| "src.embedding.embedding_service.EmbeddingService", | |
| FakeEmbeddingService, | |
| ) | |
| except (ImportError, AttributeError): | |
| pass # Legacy service may not exist in hybrid architecture | |
| # Mock in ingestion pipeline (only if the import exists) | |
| try: | |
| import src.ingestion.ingestion_pipeline | |
| if hasattr(src.ingestion.ingestion_pipeline, "EmbeddingService"): | |
| monkeypatch.setattr( | |
| "src.ingestion.ingestion_pipeline.EmbeddingService", | |
| FakeEmbeddingService, | |
| ) | |
| except (ImportError, AttributeError): | |
| pass | |