Spaces:

galbendavids
/

CarsRUS

Sleeping

App Files Files Community

galbendavids commited on Feb 2

Commit

b262f99

1 Parent(s): ea834ed

CarsRUS: Link & Co 01 normalization (and/לינק&קו), tests

Browse files

Files changed (3) hide show

rag_engine.py +9 -1
tests/test_business_logic.py +16 -1
tests/test_cloud.py +176 -0

rag_engine.py CHANGED Viewed

@@ -116,11 +116,15 @@ class RAGEngine:
             'gv80': 'genesis_gv80',
             "ג'נסיס gv80": 'genesis_gv80',
-            # Link & Co 01
             'link & co 01': 'link_co_01',
             'link co 01': 'link_co_01',
             "לינק אנד קו 01": 'link_co_01',
             "לינק אנד קו": 'link_co_01',
         }
     def _chunk_by_topic(self, text: str, title: str, url: str) -> List[Dict]:
@@ -220,7 +224,9 @@ class RAGEngine:
             r'\baion\s*ht\b': 'aion_ht',
             r'\bgenesis[\s\-]*gv\s*80\b': 'genesis_gv80',
             r'\bgv\s*80\b': 'genesis_gv80',
             r'\blink\s*&?\s*co\s*01\b': 'link_co_01',
             r'\brs\s*3\b': 'audi_rs3',
             r'\bcorolla\b': 'toyota_corolla',
         }
@@ -302,6 +308,8 @@ class RAGEngine:
             'אלנטרה': 'sedan',
             'Elantra': 'sedan',
             'HT': 'suv',
         }
         for key, type_val in types_map.items():
             if key in title:

             'gv80': 'genesis_gv80',
             "ג'נסיס gv80": 'genesis_gv80',
+            # Link & Co 01 (support "and", "&", Hebrew לינק אנד/& קו)
             'link & co 01': 'link_co_01',
             'link co 01': 'link_co_01',
+            'link and co 01': 'link_co_01',
+            'link and co': 'link_co_01',
             "לינק אנד קו 01": 'link_co_01',
             "לינק אנד קו": 'link_co_01',
+            "לינק & קו 01": 'link_co_01',
+            "לינק & קו": 'link_co_01',
         }
     def _chunk_by_topic(self, text: str, title: str, url: str) -> List[Dict]:
             r'\baion\s*ht\b': 'aion_ht',
             r'\bgenesis[\s\-]*gv\s*80\b': 'genesis_gv80',
             r'\bgv\s*80\b': 'genesis_gv80',
+            r'\blink\s*(?:&|and)\s*co\.?\s*01\b': 'link_co_01',
             r'\blink\s*&?\s*co\s*01\b': 'link_co_01',
+            r'לינק\s*(?:&|אנד)\s*קו\s*01?': 'link_co_01',  # Hebrew: לינק אנד/& קו 01
             r'\brs\s*3\b': 'audi_rs3',
             r'\bcorolla\b': 'toyota_corolla',
         }
             'אלנטרה': 'sedan',
             'Elantra': 'sedan',
             'HT': 'suv',
+            'לינק': 'compact',  # Link & Co 01
+            '01': 'compact',   # Link & Co 01 (title contains "01")
         }
         for key, type_val in types_map.items():
             if key in title:

tests/test_business_logic.py CHANGED Viewed

@@ -114,7 +114,7 @@ def test_comparison_one_supported_refusal():
 def test_car_name_normalization():
-    """Normalize car names: RS3 → audi_rs3, קיה EV9 → kia_ev9, Citroen C3 → citroen_c3."""
     engine = _get_engine()
     cases = [
         ("Audi RS3", "audi_rs3"),
@@ -122,6 +122,10 @@ def test_car_name_normalization():
         ("קיה EV9", "kia_ev9"),
         ("Citroen C3", "citroen_c3"),
         ("Kia EV9", "kia_ev9"),
     ]
     for text, expected in cases:
         got = engine._normalize_car_name(text)
@@ -129,6 +133,16 @@ def test_car_name_normalization():
     print("✅ test_car_name_normalization passed")
 def test_rag_engine_initialization_and_chunks():
     """RAG engine must load chunks from scraped_data.json (knowledge base exists)."""
     engine = _get_engine()
@@ -168,6 +182,7 @@ def run_all():
     tests = [
         test_supported_cars_list,
         test_car_name_normalization,
         test_rag_engine_initialization_and_chunks,
         test_unsupported_car_returns_refusal,
         test_supported_car_single_no_refusal,

 def test_car_name_normalization():
+    """Normalize car names: RS3 → audi_rs3, קיה EV9 → kia_ev9, Link & Co 01 → link_co_01."""
     engine = _get_engine()
     cases = [
         ("Audi RS3", "audi_rs3"),
         ("קיה EV9", "kia_ev9"),
         ("Citroen C3", "citroen_c3"),
         ("Kia EV9", "kia_ev9"),
+        ("Link & Co 01", "link_co_01"),
+        ("Link and Co 01", "link_co_01"),
+        ("לינק אנד קו 01", "link_co_01"),
+        ("ספר על לינק אנד קו 01", "link_co_01"),
     ]
     for text, expected in cases:
         got = engine._normalize_car_name(text)
     print("✅ test_car_name_normalization passed")
+def test_link_co_01_no_refusal():
+    """Link & Co 01 / לינק אנד קו 01 must be recognized and must NOT refuse (we have the document)."""
+    engine = _get_engine()
+    for query in ["ספר על לינק אנד קו 01", "Tell me about Link & Co 01", "Link and Co 01"]:
+        refusal, sys_prompt, user_prompt, steps = engine.prepare_generation(query)
+        assert refusal is None, f"Link & Co 01 query {query!r} must not refuse (got refusal)"
+        assert sys_prompt and user_prompt, f"Must return prompts for {query!r}"
+    print("✅ test_link_co_01_no_refusal passed")
 def test_rag_engine_initialization_and_chunks():
     """RAG engine must load chunks from scraped_data.json (knowledge base exists)."""
     engine = _get_engine()
     tests = [
         test_supported_cars_list,
         test_car_name_normalization,
+        test_link_co_01_no_refusal,
         test_rag_engine_initialization_and_chunks,
         test_unsupported_car_returns_refusal,
         test_supported_car_single_no_refusal,

tests/test_cloud.py ADDED Viewed

	@@ -0,0 +1,176 @@

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Cloud tests for CarsRUS Space on Hugging Face.
+Runs against: https://huggingface.co/spaces/galbendavids/CarsRUS
+Requires: pip install gradio_client
+Run: PYTHONPATH=. python tests/test_cloud.py
+     or: bash tests/run_cloud_tests.sh
+"""
+import os
+import sys
+_tests_dir = os.path.dirname(os.path.abspath(__file__))
+_project_root = os.path.dirname(_tests_dir)
+if _project_root not in sys.path:
+    sys.path.insert(0, _project_root)
+# Space URL (public Space – no token needed for read)
+SPACE_URL = os.environ.get("CARSRUS_SPACE_URL", "galbendavids/CarsRUS")
+def get_client():
+    """Create Gradio client for the Space. Uses HF token from env if set."""
+    try:
+        from gradio_client import Client
+    except ImportError:
+        print("❌ gradio_client not installed. Run: pip install gradio_client")
+        sys.exit(1)
+    hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
+    return Client(SPACE_URL, hf_token=hf_token or None)
+def collect_chat_response(client, message: str):
+    """Call the chat endpoint and return the full response (streaming collected)."""
+    # Space API: predict(message, api_name="/chat") -> response (str)
+    result = client.predict(message, api_name="/chat")
+    return result
+def test_space_reachable():
+    """Space is up and client can connect."""
+    print("\n[Cloud] test_space_reachable...")
+    client = get_client()
+    # Just ensure we can get API info (view_api or similar)
+    try:
+        api_info = client.view_api()
+        print(f"   Endpoints: {list(api_info.get('named_endpoints', api_info) if isinstance(api_info, dict) else 'ok')}")
+    except Exception as e:
+        print(f"   view_api: {e}")
+    print("✅ test_space_reachable passed")
+    return client
+def test_chat_supported_car(client):
+    """Ask about a supported car (e.g. Audi RS3) – expect substantive answer, no config error."""
+    print("\n[Cloud] test_chat_supported_car...")
+    message = "Tell me about the Audi RS3"
+    try:
+        result = collect_chat_response(client, message)
+    except Exception as e:
+        print(f"   Call failed: {e}")
+        try:
+            api = client.view_api()
+            print(f"   API: {api}")
+        except Exception:
+            pass
+        raise
+    text = result if isinstance(result, str) else str(result)
+    # Should not be config/init error
+    assert "Configuration Error" not in text and "Initialization Error" not in text, (
+        f"Expected normal answer, got: {text[:200]}"
+    )
+    # Should mention something about the car or content
+    assert len(text.strip()) > 50, f"Response too short: {text[:200]}"
+    print(f"   Response length: {len(text)} chars")
+    print("✅ test_chat_supported_car passed")
+def test_chat_unsupported_car(client):
+    """Ask about an unsupported car – expect refusal or supported list."""
+    print("\n[Cloud] test_chat_unsupported_car...")
+    message = "What do you think about BMW X5?"
+    try:
+        result = collect_chat_response(client, message)
+    except Exception as e:
+        print(f"   Call failed: {e}")
+        raise
+    text = result if isinstance(result, str) else str(result)
+    assert "Configuration Error" not in text and "Initialization Error" not in text
+    # Refusal or supported list
+    has_refusal = (
+        "not in my knowledge" in text.lower()
+        or "לא נמצא" in text
+        or "supported" in text.lower()
+        or "נתמכים" in text
+        or "Citroen" in text
+        or "Audi RS3" in text
+    )
+    assert has_refusal or len(text) > 20, f"Expected refusal/supported list, got: {text[:300]}"
+    print("✅ test_chat_unsupported_car passed")
+def test_chat_comparison(client):
+    """Ask to compare two supported cars – expect comparison content."""
+    print("\n[Cloud] test_chat_comparison...")
+    message = "Compare Audi RS3 vs Hyundai Elantra N"
+    try:
+        result = collect_chat_response(client, message)
+    except Exception as e:
+        print(f"   Call failed: {e}")
+        raise
+    text = result if isinstance(result, str) else str(result)
+    assert "Configuration Error" not in text and "Initialization Error" not in text
+    assert len(text.strip()) > 30, f"Response too short: {text[:200]}"
+    print("✅ test_chat_comparison passed")
+def test_chat_hebrew(client):
+    """Hebrew query – app should respond (Hebrew or English)."""
+    print("\n[Cloud] test_chat_hebrew...")
+    message = "ספר לי על אודי RS3"
+    try:
+        result = collect_chat_response(client, message)
+    except Exception as e:
+        print(f"   Call failed: {e}")
+        raise
+    text = result if isinstance(result, str) else str(result)
+    assert "Configuration Error" not in text and "Initialization Error" not in text
+    assert len(text.strip()) > 20, f"Response too short: {text[:200]}"
+    print("✅ test_chat_hebrew passed")
+def test_chat_link_co_01(client):
+    """Link & Co 01 is in the knowledge base – must NOT say 'not in my knowledge'."""
+    print("\n[Cloud] test_chat_link_co_01...")
+    for message in ["ספר על לינק אנד קו 01", "Tell me about Link and Co 01"]:
+        try:
+            result = collect_chat_response(client, message)
+        except Exception as e:
+            print(f"   Call failed for {message!r}: {e}")
+            raise
+        text = result if isinstance(result, str) else str(result)
+        assert "Configuration Error" not in text and "Initialization Error" not in text
+        assert "not in my knowledge" not in text and "לא נמצא בבסיס הידע" not in text, (
+            f"Link & Co 01 is in scraped_data (link-and-co-01-2026); got refusal: {text[:300]}"
+        )
+        assert len(text.strip()) > 50, f"Response too short for {message!r}: {text[:200]}"
+    print("✅ test_chat_link_co_01 passed")
+def run_all():
+    """Run all cloud tests. Exit 0 if all pass."""
+    print("=" * 60)
+    print("CarsRUS – Cloud tests")
+    print(f"Space: {SPACE_URL}")
+    print("=" * 60)
+    try:
+        client = test_space_reachable()
+        test_chat_supported_car(client)
+        test_chat_unsupported_car(client)
+        test_chat_comparison(client)
+        test_chat_hebrew(client)
+        test_chat_link_co_01(client)
+    except Exception as e:
+        print(f"\n❌ Cloud test failed: {e}")
+        import traceback
+        traceback.print_exc()
+        return 1
+    print("\n✅ All cloud tests passed.")
+    return 0
+if __name__ == "__main__":
+    sys.exit(run_all())