Spaces:

msse-team-3
/

ai-engineering-project

Sleeping

ai-engineering-project / tests /conftest.py

GitHub Action

Clean deployment without binary files

f884e6e 2 months ago

6.98 kB

	import os
	import sys

	# Ensure project root and src are on sys.path for tests
	PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
	SRC_PATH = os.path.join(PROJECT_ROOT, "src")

	if PROJECT_ROOT not in sys.path:
	sys.path.insert(0, PROJECT_ROOT)

	if SRC_PATH not in sys.path:
	sys.path.insert(0, SRC_PATH)

	# Set environment variables to disable ChromaDB telemetry
	os.environ["ANONYMIZED_TELEMETRY"] = "False"
	os.environ["CHROMA_TELEMETRY"] = "False"
	# Mark that pytest is running so startup routines can skip external HF/network calls
	os.environ["PYTEST_RUNNING"] = "1"

	# Ensure CI/local test runs don't accidentally use real HF creds from developer environment
	for _var in ("HF_TOKEN", "OPENROUTER_API_KEY", "GROQ_API_KEY", "ENABLE_HF_SERVICES", "HF_DATASET_NAME"):
	os.environ.pop(_var, None)

	from typing import List, Optional # noqa: E402
	from unittest.mock import MagicMock, patch # noqa: E402

	import pytest # noqa: E402


	@pytest.fixture(scope="session", autouse=True)
	def disable_chromadb_telemetry():
	"""Disable ChromaDB telemetry to avoid errors in tests"""
	patches = []
	try:
	# Patch multiple telemetry-related functions
	patches.extend(
	[
	patch(
	"chromadb.telemetry.product.posthog.capture",
	return_value=None,
	),
	patch(
	"chromadb.telemetry.product.posthog.Posthog.capture",
	return_value=None,
	),
	patch(
	"chromadb.telemetry.product.posthog.Posthog",
	return_value=MagicMock(),
	),
	patch("chromadb.configure", return_value=None),
	]
	)
	for p in patches:
	p.start()
	yield
	except (ImportError, AttributeError):
	# If modules don't exist, continue without patching
	yield
	finally:
	for p in patches:
	try:
	p.stop()
	except Exception:
	pass


	@pytest.fixture
	def app():
	"""Flask application fixture."""
	# Import the Flask app lazily here so autouse fixtures (e.g. mock_embedding_service)
	# can apply their patches before the application and its modules are imported.
	from app import app as flask_app # noqa: E402

	# Clear any cached services before each test to prevent state contamination
	flask_app.config["RAG_PIPELINE"] = None
	flask_app.config["INGESTION_PIPELINE"] = None
	flask_app.config["SEARCH_SERVICE"] = None

	# Also clear any module-level caches that might exist
	import sys

	modules_to_clear = [
	"src.rag.rag_pipeline",
	"src.llm.llm_service",
	"src.search.search_service",
	"src.embedding.embedding_service",
	"src.vector_store.vector_db",
	]
	for module_name in modules_to_clear:
	if module_name in sys.modules:
	# Clear any cached instances on the module
	module = sys.modules[module_name]
	for attr_name in dir(module):
	attr = getattr(module, attr_name)
	if hasattr(attr, "__dict__") and not attr_name.startswith("_"):
	# Clear instance dictionaries that might contain cached data
	if hasattr(attr, "_instances"):
	attr._instances = {}

	yield flask_app


	@pytest.fixture
	def client(app):
	"""Flask test client fixture."""
	return app.test_client()


	@pytest.fixture(autouse=True)
	def reset_mock_state():
	"""Fixture to reset any global mock state between tests."""
	yield
	# Clean up any lingering mock state after each test
	import unittest.mock

	# Clear any patches that might have been left hanging
	unittest.mock.patch.stopall()


	class FakeEmbeddingService:
	"""A mock embedding service that returns dummy data without loading a real model.

	Compatible with both legacy EmbeddingService and new HFEmbeddingService interfaces.
	"""

	def __init__(
	self,
	model_name: Optional[str] = None,
	device: Optional[str] = None,
	batch_size: Optional[int] = None,
	):
	"""Initializes the fake service.

	Ignores parameters and provides sensible defaults.
	"""
	self.model_name = model_name or "intfloat/multilingual-e5-large"
	self.device = device or "cpu"
	self.batch_size = batch_size or 32
	self.dim = 1024 # HF multilingual-e5-large dimension
	self.hf_token = "fake_token" # For HF service compatibility
	self.api_url = f"https://router.huggingface.co/hf-inference/models/{self.model_name}"
	self.headers = {"Authorization": "Bearer fake_token"}

	# Legacy EmbeddingService interface
	def embed_text(self, text: str):
	"""Returns a dummy embedding for a single text."""
	return [0.1] * self.dim

	def embed_texts(self, texts: List[str]):
	"""Returns a list of dummy embeddings for multiple texts."""
	return [[0.1] * self.dim for _ in texts]

	# HF EmbeddingService interface
	def get_embeddings(self, texts: List[str]):
	"""Returns a list of dummy embeddings for multiple texts (HF interface)."""
	return [[0.1] * self.dim for _ in texts]

	def get_embedding(self, text: str):
	"""Returns a dummy embedding for a single text (HF interface)."""
	return [0.1] * self.dim

	# Common interface methods
	def get_embedding_dimension(self):
	"""Returns the fixed dimension of the dummy embeddings."""
	return self.dim

	def health_check(self):
	"""Returns True for health checks."""
	return True


	@pytest.fixture(autouse=True)
	def mock_embedding_service(monkeypatch):
	"""
	Automatically replace the real HF services with fake ones for testing.
	This fixture works with the hybrid architecture using HF services.
	"""
	# Mock HF Embedding Service (new hybrid architecture)
	try:
	monkeypatch.setattr(
	"src.embedding.hf_embedding_service.HFEmbeddingService",
	FakeEmbeddingService,
	)
	except (ImportError, AttributeError):
	pass # HF service may not exist in all test contexts

	# Mock legacy embedding service if it exists
	try:
	monkeypatch.setattr(
	"src.embedding.embedding_service.EmbeddingService",
	FakeEmbeddingService,
	)
	except (ImportError, AttributeError):
	pass # Legacy service may not exist in hybrid architecture

	# Mock in ingestion pipeline (only if the import exists)
	try:
	import src.ingestion.ingestion_pipeline

	if hasattr(src.ingestion.ingestion_pipeline, "EmbeddingService"):
	monkeypatch.setattr(
	"src.ingestion.ingestion_pipeline.EmbeddingService",
	FakeEmbeddingService,
	)
	except (ImportError, AttributeError):
	pass