galbendavids commited on
Commit
b262f99
·
1 Parent(s): ea834ed

CarsRUS: Link & Co 01 normalization (and/לינק&קו), tests

Browse files
rag_engine.py CHANGED
@@ -116,11 +116,15 @@ class RAGEngine:
116
  'gv80': 'genesis_gv80',
117
  "ג'נסיס gv80": 'genesis_gv80',
118
 
119
- # Link & Co 01
120
  'link & co 01': 'link_co_01',
121
  'link co 01': 'link_co_01',
 
 
122
  "לינק אנד קו 01": 'link_co_01',
123
  "לינק אנד קו": 'link_co_01',
 
 
124
  }
125
 
126
  def _chunk_by_topic(self, text: str, title: str, url: str) -> List[Dict]:
@@ -220,7 +224,9 @@ class RAGEngine:
220
  r'\baion\s*ht\b': 'aion_ht',
221
  r'\bgenesis[\s\-]*gv\s*80\b': 'genesis_gv80',
222
  r'\bgv\s*80\b': 'genesis_gv80',
 
223
  r'\blink\s*&?\s*co\s*01\b': 'link_co_01',
 
224
  r'\brs\s*3\b': 'audi_rs3',
225
  r'\bcorolla\b': 'toyota_corolla',
226
  }
@@ -302,6 +308,8 @@ class RAGEngine:
302
  'אלנטרה': 'sedan',
303
  'Elantra': 'sedan',
304
  'HT': 'suv',
 
 
305
  }
306
  for key, type_val in types_map.items():
307
  if key in title:
 
116
  'gv80': 'genesis_gv80',
117
  "ג'נסיס gv80": 'genesis_gv80',
118
 
119
+ # Link & Co 01 (support "and", "&", Hebrew לינק אנד/& קו)
120
  'link & co 01': 'link_co_01',
121
  'link co 01': 'link_co_01',
122
+ 'link and co 01': 'link_co_01',
123
+ 'link and co': 'link_co_01',
124
  "לינק אנד קו 01": 'link_co_01',
125
  "לינק אנד קו": 'link_co_01',
126
+ "לינק & קו 01": 'link_co_01',
127
+ "לינק & קו": 'link_co_01',
128
  }
129
 
130
  def _chunk_by_topic(self, text: str, title: str, url: str) -> List[Dict]:
 
224
  r'\baion\s*ht\b': 'aion_ht',
225
  r'\bgenesis[\s\-]*gv\s*80\b': 'genesis_gv80',
226
  r'\bgv\s*80\b': 'genesis_gv80',
227
+ r'\blink\s*(?:&|and)\s*co\.?\s*01\b': 'link_co_01',
228
  r'\blink\s*&?\s*co\s*01\b': 'link_co_01',
229
+ r'לינק\s*(?:&|אנד)\s*קו\s*01?': 'link_co_01', # Hebrew: לינק אנד/& קו 01
230
  r'\brs\s*3\b': 'audi_rs3',
231
  r'\bcorolla\b': 'toyota_corolla',
232
  }
 
308
  'אלנטרה': 'sedan',
309
  'Elantra': 'sedan',
310
  'HT': 'suv',
311
+ 'לינק': 'compact', # Link & Co 01
312
+ '01': 'compact', # Link & Co 01 (title contains "01")
313
  }
314
  for key, type_val in types_map.items():
315
  if key in title:
tests/test_business_logic.py CHANGED
@@ -114,7 +114,7 @@ def test_comparison_one_supported_refusal():
114
 
115
 
116
  def test_car_name_normalization():
117
- """Normalize car names: RS3 → audi_rs3, קיה EV9 → kia_ev9, Citroen C3citroen_c3."""
118
  engine = _get_engine()
119
  cases = [
120
  ("Audi RS3", "audi_rs3"),
@@ -122,6 +122,10 @@ def test_car_name_normalization():
122
  ("קיה EV9", "kia_ev9"),
123
  ("Citroen C3", "citroen_c3"),
124
  ("Kia EV9", "kia_ev9"),
 
 
 
 
125
  ]
126
  for text, expected in cases:
127
  got = engine._normalize_car_name(text)
@@ -129,6 +133,16 @@ def test_car_name_normalization():
129
  print("✅ test_car_name_normalization passed")
130
 
131
 
 
 
 
 
 
 
 
 
 
 
132
  def test_rag_engine_initialization_and_chunks():
133
  """RAG engine must load chunks from scraped_data.json (knowledge base exists)."""
134
  engine = _get_engine()
@@ -168,6 +182,7 @@ def run_all():
168
  tests = [
169
  test_supported_cars_list,
170
  test_car_name_normalization,
 
171
  test_rag_engine_initialization_and_chunks,
172
  test_unsupported_car_returns_refusal,
173
  test_supported_car_single_no_refusal,
 
114
 
115
 
116
  def test_car_name_normalization():
117
+ """Normalize car names: RS3 → audi_rs3, קיה EV9 → kia_ev9, Link & Co 01 link_co_01."""
118
  engine = _get_engine()
119
  cases = [
120
  ("Audi RS3", "audi_rs3"),
 
122
  ("קיה EV9", "kia_ev9"),
123
  ("Citroen C3", "citroen_c3"),
124
  ("Kia EV9", "kia_ev9"),
125
+ ("Link & Co 01", "link_co_01"),
126
+ ("Link and Co 01", "link_co_01"),
127
+ ("לינק אנד קו 01", "link_co_01"),
128
+ ("ספר על לינק אנד קו 01", "link_co_01"),
129
  ]
130
  for text, expected in cases:
131
  got = engine._normalize_car_name(text)
 
133
  print("✅ test_car_name_normalization passed")
134
 
135
 
136
+ def test_link_co_01_no_refusal():
137
+ """Link & Co 01 / לינק אנד קו 01 must be recognized and must NOT refuse (we have the document)."""
138
+ engine = _get_engine()
139
+ for query in ["ספר על לינק אנד קו 01", "Tell me about Link & Co 01", "Link and Co 01"]:
140
+ refusal, sys_prompt, user_prompt, steps = engine.prepare_generation(query)
141
+ assert refusal is None, f"Link & Co 01 query {query!r} must not refuse (got refusal)"
142
+ assert sys_prompt and user_prompt, f"Must return prompts for {query!r}"
143
+ print("✅ test_link_co_01_no_refusal passed")
144
+
145
+
146
  def test_rag_engine_initialization_and_chunks():
147
  """RAG engine must load chunks from scraped_data.json (knowledge base exists)."""
148
  engine = _get_engine()
 
182
  tests = [
183
  test_supported_cars_list,
184
  test_car_name_normalization,
185
+ test_link_co_01_no_refusal,
186
  test_rag_engine_initialization_and_chunks,
187
  test_unsupported_car_returns_refusal,
188
  test_supported_car_single_no_refusal,
tests/test_cloud.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Cloud tests for CarsRUS Space on Hugging Face.
5
+ Runs against: https://huggingface.co/spaces/galbendavids/CarsRUS
6
+
7
+ Requires: pip install gradio_client
8
+ Run: PYTHONPATH=. python tests/test_cloud.py
9
+ or: bash tests/run_cloud_tests.sh
10
+ """
11
+
12
+ import os
13
+ import sys
14
+
15
+ _tests_dir = os.path.dirname(os.path.abspath(__file__))
16
+ _project_root = os.path.dirname(_tests_dir)
17
+ if _project_root not in sys.path:
18
+ sys.path.insert(0, _project_root)
19
+
20
+ # Space URL (public Space – no token needed for read)
21
+ SPACE_URL = os.environ.get("CARSRUS_SPACE_URL", "galbendavids/CarsRUS")
22
+
23
+
24
+ def get_client():
25
+ """Create Gradio client for the Space. Uses HF token from env if set."""
26
+ try:
27
+ from gradio_client import Client
28
+ except ImportError:
29
+ print("❌ gradio_client not installed. Run: pip install gradio_client")
30
+ sys.exit(1)
31
+ hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
32
+ return Client(SPACE_URL, hf_token=hf_token or None)
33
+
34
+
35
+ def collect_chat_response(client, message: str):
36
+ """Call the chat endpoint and return the full response (streaming collected)."""
37
+ # Space API: predict(message, api_name="/chat") -> response (str)
38
+ result = client.predict(message, api_name="/chat")
39
+ return result
40
+
41
+
42
+ def test_space_reachable():
43
+ """Space is up and client can connect."""
44
+ print("\n[Cloud] test_space_reachable...")
45
+ client = get_client()
46
+ # Just ensure we can get API info (view_api or similar)
47
+ try:
48
+ api_info = client.view_api()
49
+ print(f" Endpoints: {list(api_info.get('named_endpoints', api_info) if isinstance(api_info, dict) else 'ok')}")
50
+ except Exception as e:
51
+ print(f" view_api: {e}")
52
+ print("✅ test_space_reachable passed")
53
+ return client
54
+
55
+
56
+ def test_chat_supported_car(client):
57
+ """Ask about a supported car (e.g. Audi RS3) – expect substantive answer, no config error."""
58
+ print("\n[Cloud] test_chat_supported_car...")
59
+ message = "Tell me about the Audi RS3"
60
+ try:
61
+ result = collect_chat_response(client, message)
62
+ except Exception as e:
63
+ print(f" Call failed: {e}")
64
+ try:
65
+ api = client.view_api()
66
+ print(f" API: {api}")
67
+ except Exception:
68
+ pass
69
+ raise
70
+ text = result if isinstance(result, str) else str(result)
71
+ # Should not be config/init error
72
+ assert "Configuration Error" not in text and "Initialization Error" not in text, (
73
+ f"Expected normal answer, got: {text[:200]}"
74
+ )
75
+ # Should mention something about the car or content
76
+ assert len(text.strip()) > 50, f"Response too short: {text[:200]}"
77
+ print(f" Response length: {len(text)} chars")
78
+ print("✅ test_chat_supported_car passed")
79
+
80
+
81
+ def test_chat_unsupported_car(client):
82
+ """Ask about an unsupported car – expect refusal or supported list."""
83
+ print("\n[Cloud] test_chat_unsupported_car...")
84
+ message = "What do you think about BMW X5?"
85
+ try:
86
+ result = collect_chat_response(client, message)
87
+ except Exception as e:
88
+ print(f" Call failed: {e}")
89
+ raise
90
+ text = result if isinstance(result, str) else str(result)
91
+ assert "Configuration Error" not in text and "Initialization Error" not in text
92
+ # Refusal or supported list
93
+ has_refusal = (
94
+ "not in my knowledge" in text.lower()
95
+ or "לא נמצא" in text
96
+ or "supported" in text.lower()
97
+ or "נתמכים" in text
98
+ or "Citroen" in text
99
+ or "Audi RS3" in text
100
+ )
101
+ assert has_refusal or len(text) > 20, f"Expected refusal/supported list, got: {text[:300]}"
102
+ print("✅ test_chat_unsupported_car passed")
103
+
104
+
105
+ def test_chat_comparison(client):
106
+ """Ask to compare two supported cars – expect comparison content."""
107
+ print("\n[Cloud] test_chat_comparison...")
108
+ message = "Compare Audi RS3 vs Hyundai Elantra N"
109
+ try:
110
+ result = collect_chat_response(client, message)
111
+ except Exception as e:
112
+ print(f" Call failed: {e}")
113
+ raise
114
+ text = result if isinstance(result, str) else str(result)
115
+ assert "Configuration Error" not in text and "Initialization Error" not in text
116
+ assert len(text.strip()) > 30, f"Response too short: {text[:200]}"
117
+ print("✅ test_chat_comparison passed")
118
+
119
+
120
+ def test_chat_hebrew(client):
121
+ """Hebrew query – app should respond (Hebrew or English)."""
122
+ print("\n[Cloud] test_chat_hebrew...")
123
+ message = "ספר לי על אודי RS3"
124
+ try:
125
+ result = collect_chat_response(client, message)
126
+ except Exception as e:
127
+ print(f" Call failed: {e}")
128
+ raise
129
+ text = result if isinstance(result, str) else str(result)
130
+ assert "Configuration Error" not in text and "Initialization Error" not in text
131
+ assert len(text.strip()) > 20, f"Response too short: {text[:200]}"
132
+ print("✅ test_chat_hebrew passed")
133
+
134
+
135
+ def test_chat_link_co_01(client):
136
+ """Link & Co 01 is in the knowledge base – must NOT say 'not in my knowledge'."""
137
+ print("\n[Cloud] test_chat_link_co_01...")
138
+ for message in ["ספר על לינק אנד קו 01", "Tell me about Link and Co 01"]:
139
+ try:
140
+ result = collect_chat_response(client, message)
141
+ except Exception as e:
142
+ print(f" Call failed for {message!r}: {e}")
143
+ raise
144
+ text = result if isinstance(result, str) else str(result)
145
+ assert "Configuration Error" not in text and "Initialization Error" not in text
146
+ assert "not in my knowledge" not in text and "לא נמצא בבסיס הידע" not in text, (
147
+ f"Link & Co 01 is in scraped_data (link-and-co-01-2026); got refusal: {text[:300]}"
148
+ )
149
+ assert len(text.strip()) > 50, f"Response too short for {message!r}: {text[:200]}"
150
+ print("✅ test_chat_link_co_01 passed")
151
+
152
+
153
+ def run_all():
154
+ """Run all cloud tests. Exit 0 if all pass."""
155
+ print("=" * 60)
156
+ print("CarsRUS – Cloud tests")
157
+ print(f"Space: {SPACE_URL}")
158
+ print("=" * 60)
159
+ try:
160
+ client = test_space_reachable()
161
+ test_chat_supported_car(client)
162
+ test_chat_unsupported_car(client)
163
+ test_chat_comparison(client)
164
+ test_chat_hebrew(client)
165
+ test_chat_link_co_01(client)
166
+ except Exception as e:
167
+ print(f"\n❌ Cloud test failed: {e}")
168
+ import traceback
169
+ traceback.print_exc()
170
+ return 1
171
+ print("\n✅ All cloud tests passed.")
172
+ return 0
173
+
174
+
175
+ if __name__ == "__main__":
176
+ sys.exit(run_all())