Spaces:

galbendavids
/

CarsRUS

Sleeping

App Files Files Community

CarsRUS / tests /test_business_logic.py

galbendavids

CarsRUS: Link & Co 01 normalization (and/לינק&קו), tests

b262f99 3 months ago

raw

history blame contribute delete

9.08 kB

	#!/usr/bin/env python
	"""
	Business-logic test suite for CarsRUS (QA / DevOps).

	Validates expected behavior from request_file.txt:
	- Ingest automotive review content → searchable knowledge base
	- Respond based on retrieved knowledge (no hallucination for unsupported cars)
	- Supported cars: Citroen C3, Audi RS3, Kia EV9, MG S6, Hyundai Elantra N, Aion HT, Genesis GV80, Link & Co 01
	- Unsupported car questions → refusal with supported list
	- Comparison: 2 supported cars → proceed; 1 or 0 → refusal
	- Car name normalization (e.g. RS3 → audi_rs3, קיה EV9 → kia_ev9)

	Run before pushing to Hugging Face: python test_business_logic.py
	"""

	import os
	import sys

	# Allow imports from project root when run via tests/run_tests.sh (PYTHONPATH=project_root)
	_tests_dir = os.path.dirname(os.path.abspath(__file__))
	_project_root = os.path.dirname(_tests_dir)
	if _project_root not in sys.path:
	sys.path.insert(0, _project_root)

	_engine = None

	def _get_engine():
	"""Shared RAGEngine instance so we only load model/embeddings once per test run."""
	global _engine
	if _engine is None:
	from rag_engine import RAGEngine
	_engine = RAGEngine()
	return _engine


	def test_supported_cars_list():
	"""Supported models must match the knowledge base (scraped articles)."""
	engine = _get_engine()
	display = engine._supported_cars_display()
	expected = [
	"Citroen C3",
	"Audi RS3",
	"Kia EV9",
	"MG S6",
	"Hyundai Elantra N",
	"Aion HT",
	"Genesis GV80",
	"Link & Co 01",
	]
	assert set(display) == set(expected), f"Supported cars mismatch: got {display}"
	assert len(display) == 8, f"Expected 8 supported models, got {len(display)}"
	print("✅ test_supported_cars_list passed")


	def test_unsupported_car_returns_refusal():
	"""Asking about a car not in the knowledge base must return a refusal with supported list."""
	engine = _get_engine()
	# Hebrew: "What do you think about BMW X5?"
	query = "מה דעתך על BMW X5?"
	refusal, sys_prompt, user_prompt, steps = engine.prepare_generation(query)
	assert refusal is not None, "Unsupported car query must return refusal"
	assert sys_prompt is None and user_prompt is None, "Refusal must not return prompts"
	assert "Citroen C3" in refusal or "Audi RS3" in refusal, "Refusal must list supported models"
	assert "לא נמצא" in refusal or "not in my knowledge" in refusal or "not in my knowledge base" in refusal
	print("✅ test_unsupported_car_returns_refusal passed")


	def test_supported_car_single_no_refusal():
	"""Single supported car question must NOT refuse; must return prompts for generation."""
	engine = _get_engine()
	query = "Tell me about the Audi RS3"
	refusal, sys_prompt, user_prompt, steps = engine.prepare_generation(query)
	assert refusal is None, "Supported car query must not refuse"
	assert sys_prompt and user_prompt, "Must return system and user prompts for LLM"
	assert len(steps) >= 1, "Steps log must be populated"
	print("✅ test_supported_car_single_no_refusal passed")


	def test_comparison_two_supported_no_refusal():
	"""Comparison of two supported cars must NOT refuse."""
	engine = _get_engine()
	query = "Compare Audi RS3 vs Hyundai Elantra N"
	refusal, sys_prompt, user_prompt, steps = engine.prepare_generation(query)
	assert refusal is None, "Two supported cars comparison must not refuse"
	assert sys_prompt and user_prompt
	print("✅ test_comparison_two_supported_no_refusal passed")


	def test_comparison_hebrew_two_cars_identified():
	"""Hebrew comparison 'השוואה בין RS3 לאלנטרה N' must identify both cars (audi_rs3, hyundai_elantra_n)."""
	engine = _get_engine()
	query = "השוואה בין RS3 לאלנטרה N"
	ordered = engine._get_ordered_supported_canonicals_in_text(query)
	assert len(ordered) >= 2, f"Expected at least 2 cars in Hebrew comparison, got {ordered}"
	assert "audi_rs3" in ordered, f"Expected audi_rs3 in {ordered}"
	assert "hyundai_elantra_n" in ordered, f"Expected hyundai_elantra_n in {ordered}"
	refusal, _, user_prompt, _ = engine.prepare_generation(query)
	assert refusal is None, "Hebrew comparison with two cars must not refuse"
	assert "Elantra" in user_prompt or "אלנטרה" in user_prompt or "RS3" in user_prompt
	print("✅ test_comparison_hebrew_two_cars_identified passed")


	def test_comparison_one_supported_refusal():
	"""Comparison with only one supported car: no refusal; we answer with disclaimer + info on the one we know."""
	engine = _get_engine()
	# "Compare RS3 vs BMW X5" — only RS3 is supported → we don't refuse; we say we can't compare to unknown and tell about RS3
	query = "Compare RS3 vs BMW X5"
	refusal, sys_prompt, user_prompt, steps = engine.prepare_generation(query)
	assert refusal is None, "Comparison with one supported: we answer with disclaimer + info on that model, not refuse"
	assert sys_prompt and user_prompt
	# Prompt should instruct to tell about the one model we know
	assert "RS3" in user_prompt or "audi" in user_prompt.lower()
	print("✅ test_comparison_one_supported_refusal passed")


	def test_car_name_normalization():
	"""Normalize car names: RS3 → audi_rs3, קיה EV9 → kia_ev9, Link & Co 01 → link_co_01."""
	engine = _get_engine()
	cases = [
	("Audi RS3", "audi_rs3"),
	("RS3", "audi_rs3"),
	("קיה EV9", "kia_ev9"),
	("Citroen C3", "citroen_c3"),
	("Kia EV9", "kia_ev9"),
	("Link & Co 01", "link_co_01"),
	("Link and Co 01", "link_co_01"),
	("לינק אנד קו 01", "link_co_01"),
	("ספר על לינק אנד קו 01", "link_co_01"),
	]
	for text, expected in cases:
	got = engine._normalize_car_name(text)
	assert got == expected, f"Normalize {text!r}: expected {expected}, got {got}"
	print("✅ test_car_name_normalization passed")


	def test_link_co_01_no_refusal():
	"""Link & Co 01 / לינק אנד קו 01 must be recognized and must NOT refuse (we have the document)."""
	engine = _get_engine()
	for query in ["ספר על לינק אנד קו 01", "Tell me about Link & Co 01", "Link and Co 01"]:
	refusal, sys_prompt, user_prompt, steps = engine.prepare_generation(query)
	assert refusal is None, f"Link & Co 01 query {query!r} must not refuse (got refusal)"
	assert sys_prompt and user_prompt, f"Must return prompts for {query!r}"
	print("✅ test_link_co_01_no_refusal passed")


	def test_rag_engine_initialization_and_chunks():
	"""RAG engine must load chunks from scraped_data.json (knowledge base exists)."""
	engine = _get_engine()
	assert len(engine.chunks) > 0, "Knowledge base must have at least one chunk"
	assert len(engine.chunk_metadata) == len(engine.chunks)
	print("✅ test_rag_engine_initialization_and_chunks passed")


	def test_hybrid_search_returns_relevant_results():
	"""Hybrid search must return results for a supported car query."""
	engine = _get_engine()
	results = engine._hybrid_search("Tell me about the Audi RS3", top_k=3)
	assert len(results) >= 1, "Search must return at least one result for supported car"
	assert "metadata" in results[0] and "text" in results[0]
	assert "title" in results[0]["metadata"]
	print("✅ test_hybrid_search_returns_relevant_results passed")


	def test_chat_function_requires_gemini_key():
	"""App chat must handle missing API key with clear error (no crash)."""
	from app import chat_function

	# Temporarily unset if set
	old_key = os.environ.pop("gemini_api", None)
	try:
	out = list(chat_function("Tell me about Audi RS3", []))
	assert len(out) >= 1
	assert "gemini" in out[0].lower() or "API key" in out[0] or "Configuration" in out[0]
	finally:
	if old_key is not None:
	os.environ["gemini_api"] = old_key
	print("✅ test_chat_function_requires_gemini_key passed")


	def run_all():
	"""Run all business-logic tests. Exit 0 if all pass, 1 otherwise."""
	tests = [
	test_supported_cars_list,
	test_car_name_normalization,
	test_link_co_01_no_refusal,
	test_rag_engine_initialization_and_chunks,
	test_unsupported_car_returns_refusal,
	test_supported_car_single_no_refusal,
	test_comparison_two_supported_no_refusal,
	test_comparison_hebrew_two_cars_identified,
	test_comparison_one_supported_refusal,
	test_hybrid_search_returns_relevant_results,
	test_chat_function_requires_gemini_key,
	]
	failed = []
	for t in tests:
	try:
	t()
	except Exception as e:
	failed.append((t.__name__, e))
	print(f"❌ {t.__name__} failed: {e}")
	if failed:
	print(f"\n❌ {len(failed)} test(s) failed: {[n for n, _ in failed]}")
	return 1
	print("\n✅ All business-logic tests passed.")
	return 0


	if __name__ == "__main__":
	sys.exit(run_all())