CarsRUS / tests /test_business_logic.py
galbendavids's picture
CarsRUS: Link & Co 01 normalization (and/ืœื™ื ืง&ืงื•), tests
b262f99
#!/usr/bin/env python
"""
Business-logic test suite for CarsRUS (QA / DevOps).
Validates expected behavior from request_file.txt:
- Ingest automotive review content โ†’ searchable knowledge base
- Respond based on retrieved knowledge (no hallucination for unsupported cars)
- Supported cars: Citroen C3, Audi RS3, Kia EV9, MG S6, Hyundai Elantra N, Aion HT, Genesis GV80, Link & Co 01
- Unsupported car questions โ†’ refusal with supported list
- Comparison: 2 supported cars โ†’ proceed; 1 or 0 โ†’ refusal
- Car name normalization (e.g. RS3 โ†’ audi_rs3, ืงื™ื” EV9 โ†’ kia_ev9)
Run before pushing to Hugging Face: python test_business_logic.py
"""
import os
import sys
# Allow imports from project root when run via tests/run_tests.sh (PYTHONPATH=project_root)
_tests_dir = os.path.dirname(os.path.abspath(__file__))
_project_root = os.path.dirname(_tests_dir)
if _project_root not in sys.path:
sys.path.insert(0, _project_root)
_engine = None
def _get_engine():
"""Shared RAGEngine instance so we only load model/embeddings once per test run."""
global _engine
if _engine is None:
from rag_engine import RAGEngine
_engine = RAGEngine()
return _engine
def test_supported_cars_list():
"""Supported models must match the knowledge base (scraped articles)."""
engine = _get_engine()
display = engine._supported_cars_display()
expected = [
"Citroen C3",
"Audi RS3",
"Kia EV9",
"MG S6",
"Hyundai Elantra N",
"Aion HT",
"Genesis GV80",
"Link & Co 01",
]
assert set(display) == set(expected), f"Supported cars mismatch: got {display}"
assert len(display) == 8, f"Expected 8 supported models, got {len(display)}"
print("โœ… test_supported_cars_list passed")
def test_unsupported_car_returns_refusal():
"""Asking about a car not in the knowledge base must return a refusal with supported list."""
engine = _get_engine()
# Hebrew: "What do you think about BMW X5?"
query = "ืžื” ื“ืขืชืš ืขืœ BMW X5?"
refusal, sys_prompt, user_prompt, steps = engine.prepare_generation(query)
assert refusal is not None, "Unsupported car query must return refusal"
assert sys_prompt is None and user_prompt is None, "Refusal must not return prompts"
assert "Citroen C3" in refusal or "Audi RS3" in refusal, "Refusal must list supported models"
assert "ืœื ื ืžืฆื" in refusal or "not in my knowledge" in refusal or "not in my knowledge base" in refusal
print("โœ… test_unsupported_car_returns_refusal passed")
def test_supported_car_single_no_refusal():
"""Single supported car question must NOT refuse; must return prompts for generation."""
engine = _get_engine()
query = "Tell me about the Audi RS3"
refusal, sys_prompt, user_prompt, steps = engine.prepare_generation(query)
assert refusal is None, "Supported car query must not refuse"
assert sys_prompt and user_prompt, "Must return system and user prompts for LLM"
assert len(steps) >= 1, "Steps log must be populated"
print("โœ… test_supported_car_single_no_refusal passed")
def test_comparison_two_supported_no_refusal():
"""Comparison of two supported cars must NOT refuse."""
engine = _get_engine()
query = "Compare Audi RS3 vs Hyundai Elantra N"
refusal, sys_prompt, user_prompt, steps = engine.prepare_generation(query)
assert refusal is None, "Two supported cars comparison must not refuse"
assert sys_prompt and user_prompt
print("โœ… test_comparison_two_supported_no_refusal passed")
def test_comparison_hebrew_two_cars_identified():
"""Hebrew comparison 'ื”ืฉื•ื•ืื” ื‘ื™ืŸ RS3 ืœืืœื ื˜ืจื” N' must identify both cars (audi_rs3, hyundai_elantra_n)."""
engine = _get_engine()
query = "ื”ืฉื•ื•ืื” ื‘ื™ืŸ RS3 ืœืืœื ื˜ืจื” N"
ordered = engine._get_ordered_supported_canonicals_in_text(query)
assert len(ordered) >= 2, f"Expected at least 2 cars in Hebrew comparison, got {ordered}"
assert "audi_rs3" in ordered, f"Expected audi_rs3 in {ordered}"
assert "hyundai_elantra_n" in ordered, f"Expected hyundai_elantra_n in {ordered}"
refusal, _, user_prompt, _ = engine.prepare_generation(query)
assert refusal is None, "Hebrew comparison with two cars must not refuse"
assert "Elantra" in user_prompt or "ืืœื ื˜ืจื”" in user_prompt or "RS3" in user_prompt
print("โœ… test_comparison_hebrew_two_cars_identified passed")
def test_comparison_one_supported_refusal():
"""Comparison with only one supported car: no refusal; we answer with disclaimer + info on the one we know."""
engine = _get_engine()
# "Compare RS3 vs BMW X5" โ€” only RS3 is supported โ†’ we don't refuse; we say we can't compare to unknown and tell about RS3
query = "Compare RS3 vs BMW X5"
refusal, sys_prompt, user_prompt, steps = engine.prepare_generation(query)
assert refusal is None, "Comparison with one supported: we answer with disclaimer + info on that model, not refuse"
assert sys_prompt and user_prompt
# Prompt should instruct to tell about the one model we know
assert "RS3" in user_prompt or "audi" in user_prompt.lower()
print("โœ… test_comparison_one_supported_refusal passed")
def test_car_name_normalization():
"""Normalize car names: RS3 โ†’ audi_rs3, ืงื™ื” EV9 โ†’ kia_ev9, Link & Co 01 โ†’ link_co_01."""
engine = _get_engine()
cases = [
("Audi RS3", "audi_rs3"),
("RS3", "audi_rs3"),
("ืงื™ื” EV9", "kia_ev9"),
("Citroen C3", "citroen_c3"),
("Kia EV9", "kia_ev9"),
("Link & Co 01", "link_co_01"),
("Link and Co 01", "link_co_01"),
("ืœื™ื ืง ืื ื“ ืงื• 01", "link_co_01"),
("ืกืคืจ ืขืœ ืœื™ื ืง ืื ื“ ืงื• 01", "link_co_01"),
]
for text, expected in cases:
got = engine._normalize_car_name(text)
assert got == expected, f"Normalize {text!r}: expected {expected}, got {got}"
print("โœ… test_car_name_normalization passed")
def test_link_co_01_no_refusal():
"""Link & Co 01 / ืœื™ื ืง ืื ื“ ืงื• 01 must be recognized and must NOT refuse (we have the document)."""
engine = _get_engine()
for query in ["ืกืคืจ ืขืœ ืœื™ื ืง ืื ื“ ืงื• 01", "Tell me about Link & Co 01", "Link and Co 01"]:
refusal, sys_prompt, user_prompt, steps = engine.prepare_generation(query)
assert refusal is None, f"Link & Co 01 query {query!r} must not refuse (got refusal)"
assert sys_prompt and user_prompt, f"Must return prompts for {query!r}"
print("โœ… test_link_co_01_no_refusal passed")
def test_rag_engine_initialization_and_chunks():
"""RAG engine must load chunks from scraped_data.json (knowledge base exists)."""
engine = _get_engine()
assert len(engine.chunks) > 0, "Knowledge base must have at least one chunk"
assert len(engine.chunk_metadata) == len(engine.chunks)
print("โœ… test_rag_engine_initialization_and_chunks passed")
def test_hybrid_search_returns_relevant_results():
"""Hybrid search must return results for a supported car query."""
engine = _get_engine()
results = engine._hybrid_search("Tell me about the Audi RS3", top_k=3)
assert len(results) >= 1, "Search must return at least one result for supported car"
assert "metadata" in results[0] and "text" in results[0]
assert "title" in results[0]["metadata"]
print("โœ… test_hybrid_search_returns_relevant_results passed")
def test_chat_function_requires_gemini_key():
"""App chat must handle missing API key with clear error (no crash)."""
from app import chat_function
# Temporarily unset if set
old_key = os.environ.pop("gemini_api", None)
try:
out = list(chat_function("Tell me about Audi RS3", []))
assert len(out) >= 1
assert "gemini" in out[0].lower() or "API key" in out[0] or "Configuration" in out[0]
finally:
if old_key is not None:
os.environ["gemini_api"] = old_key
print("โœ… test_chat_function_requires_gemini_key passed")
def run_all():
"""Run all business-logic tests. Exit 0 if all pass, 1 otherwise."""
tests = [
test_supported_cars_list,
test_car_name_normalization,
test_link_co_01_no_refusal,
test_rag_engine_initialization_and_chunks,
test_unsupported_car_returns_refusal,
test_supported_car_single_no_refusal,
test_comparison_two_supported_no_refusal,
test_comparison_hebrew_two_cars_identified,
test_comparison_one_supported_refusal,
test_hybrid_search_returns_relevant_results,
test_chat_function_requires_gemini_key,
]
failed = []
for t in tests:
try:
t()
except Exception as e:
failed.append((t.__name__, e))
print(f"โŒ {t.__name__} failed: {e}")
if failed:
print(f"\nโŒ {len(failed)} test(s) failed: {[n for n, _ in failed]}")
return 1
print("\nโœ… All business-logic tests passed.")
return 0
if __name__ == "__main__":
sys.exit(run_all())