GitHub Action
Clean deployment without binary files
f884e6e
import os
import sys
# Ensure project root and src are on sys.path for tests
PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
SRC_PATH = os.path.join(PROJECT_ROOT, "src")
if PROJECT_ROOT not in sys.path:
sys.path.insert(0, PROJECT_ROOT)
if SRC_PATH not in sys.path:
sys.path.insert(0, SRC_PATH)
# Set environment variables to disable ChromaDB telemetry
os.environ["ANONYMIZED_TELEMETRY"] = "False"
os.environ["CHROMA_TELEMETRY"] = "False"
# Mark that pytest is running so startup routines can skip external HF/network calls
os.environ["PYTEST_RUNNING"] = "1"
# Ensure CI/local test runs don't accidentally use real HF creds from developer environment
for _var in ("HF_TOKEN", "OPENROUTER_API_KEY", "GROQ_API_KEY", "ENABLE_HF_SERVICES", "HF_DATASET_NAME"):
os.environ.pop(_var, None)
from typing import List, Optional # noqa: E402
from unittest.mock import MagicMock, patch # noqa: E402
import pytest # noqa: E402
@pytest.fixture(scope="session", autouse=True)
def disable_chromadb_telemetry():
"""Disable ChromaDB telemetry to avoid errors in tests"""
patches = []
try:
# Patch multiple telemetry-related functions
patches.extend(
[
patch(
"chromadb.telemetry.product.posthog.capture",
return_value=None,
),
patch(
"chromadb.telemetry.product.posthog.Posthog.capture",
return_value=None,
),
patch(
"chromadb.telemetry.product.posthog.Posthog",
return_value=MagicMock(),
),
patch("chromadb.configure", return_value=None),
]
)
for p in patches:
p.start()
yield
except (ImportError, AttributeError):
# If modules don't exist, continue without patching
yield
finally:
for p in patches:
try:
p.stop()
except Exception:
pass
@pytest.fixture
def app():
"""Flask application fixture."""
# Import the Flask app lazily here so autouse fixtures (e.g. mock_embedding_service)
# can apply their patches before the application and its modules are imported.
from app import app as flask_app # noqa: E402
# Clear any cached services before each test to prevent state contamination
flask_app.config["RAG_PIPELINE"] = None
flask_app.config["INGESTION_PIPELINE"] = None
flask_app.config["SEARCH_SERVICE"] = None
# Also clear any module-level caches that might exist
import sys
modules_to_clear = [
"src.rag.rag_pipeline",
"src.llm.llm_service",
"src.search.search_service",
"src.embedding.embedding_service",
"src.vector_store.vector_db",
]
for module_name in modules_to_clear:
if module_name in sys.modules:
# Clear any cached instances on the module
module = sys.modules[module_name]
for attr_name in dir(module):
attr = getattr(module, attr_name)
if hasattr(attr, "__dict__") and not attr_name.startswith("_"):
# Clear instance dictionaries that might contain cached data
if hasattr(attr, "_instances"):
attr._instances = {}
yield flask_app
@pytest.fixture
def client(app):
"""Flask test client fixture."""
return app.test_client()
@pytest.fixture(autouse=True)
def reset_mock_state():
"""Fixture to reset any global mock state between tests."""
yield
# Clean up any lingering mock state after each test
import unittest.mock
# Clear any patches that might have been left hanging
unittest.mock.patch.stopall()
class FakeEmbeddingService:
"""A mock embedding service that returns dummy data without loading a real model.
Compatible with both legacy EmbeddingService and new HFEmbeddingService interfaces.
"""
def __init__(
self,
model_name: Optional[str] = None,
device: Optional[str] = None,
batch_size: Optional[int] = None,
):
"""Initializes the fake service.
Ignores parameters and provides sensible defaults.
"""
self.model_name = model_name or "intfloat/multilingual-e5-large"
self.device = device or "cpu"
self.batch_size = batch_size or 32
self.dim = 1024 # HF multilingual-e5-large dimension
self.hf_token = "fake_token" # For HF service compatibility
self.api_url = f"https://router.huggingface.co/hf-inference/models/{self.model_name}"
self.headers = {"Authorization": "Bearer fake_token"}
# Legacy EmbeddingService interface
def embed_text(self, text: str):
"""Returns a dummy embedding for a single text."""
return [0.1] * self.dim
def embed_texts(self, texts: List[str]):
"""Returns a list of dummy embeddings for multiple texts."""
return [[0.1] * self.dim for _ in texts]
# HF EmbeddingService interface
def get_embeddings(self, texts: List[str]):
"""Returns a list of dummy embeddings for multiple texts (HF interface)."""
return [[0.1] * self.dim for _ in texts]
def get_embedding(self, text: str):
"""Returns a dummy embedding for a single text (HF interface)."""
return [0.1] * self.dim
# Common interface methods
def get_embedding_dimension(self):
"""Returns the fixed dimension of the dummy embeddings."""
return self.dim
def health_check(self):
"""Returns True for health checks."""
return True
@pytest.fixture(autouse=True)
def mock_embedding_service(monkeypatch):
"""
Automatically replace the real HF services with fake ones for testing.
This fixture works with the hybrid architecture using HF services.
"""
# Mock HF Embedding Service (new hybrid architecture)
try:
monkeypatch.setattr(
"src.embedding.hf_embedding_service.HFEmbeddingService",
FakeEmbeddingService,
)
except (ImportError, AttributeError):
pass # HF service may not exist in all test contexts
# Mock legacy embedding service if it exists
try:
monkeypatch.setattr(
"src.embedding.embedding_service.EmbeddingService",
FakeEmbeddingService,
)
except (ImportError, AttributeError):
pass # Legacy service may not exist in hybrid architecture
# Mock in ingestion pipeline (only if the import exists)
try:
import src.ingestion.ingestion_pipeline
if hasattr(src.ingestion.ingestion_pipeline, "EmbeddingService"):
monkeypatch.setattr(
"src.ingestion.ingestion_pipeline.EmbeddingService",
FakeEmbeddingService,
)
except (ImportError, AttributeError):
pass