Spaces:
Sleeping
Sleeping
File size: 67,884 Bytes
1f92167 51e2e17 b609489 51e2e17 0bc4550 fe3cfdf 0bc4550 2fa5774 1f92167 6ab8b6b 51e2e17 6ab8b6b 7f608f2 da458f9 51e2e17 1f92167 51e2e17 01310ab 51e2e17 fe3cfdf 0bc4550 0b17d83 0bc4550 80e3e46 0b17d83 80e3e46 fe3cfdf 1f92167 de2bc35 7f608f2 51e2e17 3204a1c 51e2e17 3204a1c 51e2e17 3204a1c 51e2e17 3204a1c 51e2e17 3204a1c 51e2e17 3204a1c 51e2e17 3204a1c 51e2e17 3204a1c b262f99 3204a1c b262f99 3204a1c b262f99 51e2e17 01310ab 75c53f5 01310ab 75c53f5 01310ab b262f99 01310ab b262f99 01310ab 51e2e17 1f92167 51e2e17 1f92167 51e2e17 1f92167 aaa458a 51e2e17 1f92167 51e2e17 1f92167 51e2e17 1f92167 51e2e17 b262f99 51e2e17 da458f9 7f608f2 da458f9 7f608f2 da458f9 7f608f2 da458f9 7f608f2 1f92167 7f608f2 1f92167 51e2e17 7f608f2 da458f9 7f608f2 51e2e17 697b33e 1f92167 51e2e17 697b33e da458f9 51e2e17 1f92167 51e2e17 1f92167 51e2e17 1f92167 51e2e17 7f608f2 51e2e17 b609489 51e2e17 b609489 51e2e17 cdf8db8 fe3cfdf 0bc4550 80e3e46 de2bc35 80e3e46 de2bc35 80e3e46 de2bc35 80e3e46 de2bc35 80e3e46 de2bc35 80e3e46 de2bc35 80e3e46 de2bc35 80e3e46 fe3cfdf 0bc4550 fe3cfdf b609489 126332e b609489 126332e b609489 bc4e61d b609489 fe3cfdf 2aea9f9 b609489 2aea9f9 b609489 2aea9f9 58029e3 0bc4550 ca1b20d 0bc4550 ca1b20d 2fa5774 de2bc35 80e3e46 de2bc35 80e3e46 de2bc35 2fa5774 de2bc35 2fa5774 ca1b20d 0bc4550 ca1b20d 58029e3 ca1b20d 0bc4550 58029e3 0bc4550 58029e3 0bc4550 2fa5774 ca1b20d 2fa5774 ca1b20d fe3cfdf 0bc4550 ca1b20d 0bc4550 2fa5774 0bc4550 fe3cfdf 2fa5774 fe3cfdf 80e3e46 58029e3 80e3e46 de2bc35 80e3e46 de2bc35 80e3e46 0b17d83 80e3e46 0b17d83 80e3e46 58029e3 80e3e46 58029e3 80e3e46 37bbf25 2fa5774 37bbf25 cdf8db8 37bbf25 cdf8db8 37bbf25 cdf8db8 37bbf25 de2bc35 2ebebf3 37bbf25 de2bc35 2fa5774 37bbf25 de2bc35 2fa5774 37bbf25 de2bc35 37bbf25 de2bc35 37bbf25 de2bc35 37bbf25 de2bc35 37bbf25 de2bc35 37bbf25 de2bc35 37bbf25 cdf8db8 37bbf25 cdf8db8 fba06c0 cdf8db8 fba06c0 37bbf25 de2bc35 37bbf25 de2bc35 37bbf25 fba06c0 2fa5774 37bbf25 51e2e17 1f92167 01310ab fe3cfdf 01310ab 1f92167 01310ab 51e2e17 80e3e46 de2bc35 2ebebf3 80e3e46 de2bc35 80e3e46 de2bc35 80e3e46 51e2e17 de2bc35 01310ab de2bc35 80e3e46 01310ab 51e2e17 de2bc35 51e2e17 de2bc35 51e2e17 de2bc35 51e2e17 de2bc35 51e2e17 80e3e46 51e2e17 01310ab 80e3e46 51e2e17 1f92167 51e2e17 80e3e46 51e2e17 de2bc35 75c53f5 de2bc35 1f92167 51e2e17 de2bc35 1f92167 51e2e17 de2bc35 1f92167 51e2e17 de2bc35 1f92167 01310ab fe3cfdf 947082a fe3cfdf 0bc4550 01310ab 51e2e17 01310ab e1b2ed3 947082a 7f608f2 947082a 7f608f2 947082a 7f608f2 947082a 7f608f2 947082a 7f608f2 947082a 7f608f2 947082a 7f608f2 947082a de2bc35 2ebebf3 80e3e46 de2bc35 80e3e46 de2bc35 80e3e46 7f608f2 947082a de2bc35 7f608f2 947082a de2bc35 80e3e46 7f608f2 947082a de2bc35 947082a de2bc35 947082a de2bc35 947082a de2bc35 947082a 80e3e46 947082a 7f608f2 947082a 80e3e46 7f608f2 947082a 7f608f2 947082a 80e3e46 947082a de2bc35 75c53f5 de2bc35 947082a de2bc35 947082a de2bc35 947082a de2bc35 947082a 7f608f2 947082a 80e3e46 7f608f2 947082a 0b17d83 947082a 7f608f2 947082a 1f92167 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 |
import json
import numpy as np
from sentence_transformers import SentenceTransformer
import google.generativeai as genai
import os
import re
import requests
from collections import defaultdict
from typing import List, Dict, Tuple, Optional
import time
import hashlib
import random
import threading
import logging
# Pipeline logging: visible in HF Spaces logs and helps debug RAG flow
PIPELINE_LOG = logging.getLogger("pipeline")
if not PIPELINE_LOG.handlers:
h = logging.StreamHandler()
h.setFormatter(logging.Formatter("[PIPELINE] %(message)s"))
PIPELINE_LOG.addHandler(h)
PIPELINE_LOG.setLevel(logging.INFO)
class RAGEngine:
def __init__(self, data_path=None):
print("Initializing RAG Engine with Advanced Features...")
if data_path is None:
base_dir = os.path.dirname(__file__)
self.data_path = os.path.join(base_dir, "data_ingestion", "scraped_data.json")
else:
self.data_path = data_path
print(f"Using data path: {self.data_path}")
# Encoder loaded at startup (before building index). No lazy load.
self.encoder = None
self._encoder_model_name = 'paraphrase-multilingual-MiniLM-L12-v2'
# Initialize advanced features
self.chunks = []
self.chunk_metadata = [] # ืขืฆื 2: ืืื-ืืืื ืขืฉืืจื
self.embeddings = None
self.keyword_index = {} # ืขืฆื 3: ืืืคืืฉ ืืืืืช ืืคืชื
self.car_normalization = self._build_car_normalization() # ืขืฆื 4: ื ืจืืื ืฉืืืช
# Regex patterns for robust cold name detection
self._build_regex_patterns()
self.conversation_history = [] # ืขืฆื 10: ืืืกืืืจืืืช ืฉืืื
# Rate limiting and caching
self.response_cache = {} # Cache for identical queries
self.last_request_time = 0 # Track last API request time
# Minimum delay between requests (seconds). Can be tuned via env.
# Minimal delay; rate-limit backoff handles 429s.
self.request_delay = float(os.environ.get("GEMINI_REQUEST_DELAY", "0.5"))
# Thread-safe throttling + shared cooldown across concurrent requests.
self._rate_limit_lock = threading.Lock()
self._rate_limited_until = 0.0
# Speed/size knobs (reduce prompt size + generation time)
self.max_chunks_general = int(os.environ.get("RAG_TOP_K_GENERAL", "4"))
self.max_chunks_comparison = int(os.environ.get("RAG_TOP_K_COMPARISON", "6"))
self.max_context_chars_per_chunk = int(os.environ.get("RAG_CONTEXT_CHARS_PER_CHUNK", "280"))
self.max_output_tokens = int(os.environ.get("GEMINI_MAX_OUTPUT_TOKENS", "600"))
self._api_timeout_seconds = int(os.environ.get("GEMINI_API_TIMEOUT", "45"))
self._generation_config = {
"max_output_tokens": self.max_output_tokens,
"temperature": 0.4,
}
self._load_and_process_data()
# Build or load chunk embeddings. Encoder is loaded only when building (no cache); otherwise lazy on first search.
self._build_index()
print("RAG Engine Initialized (encoder + all embeddings ready at startup).")
def _build_car_normalization(self) -> Dict[str, str]:
"""ืขืฆื 4: ืืืืื ื ืจืืื ืฉืืืช ืจืืืื (ืขืืจืืช + ืื ืืืืช)"""
return {
# ืืืืืื ืงืืจืืื
'ืงืืจืืื': 'toyota_corolla',
'toyota corolla': 'toyota_corolla',
'ืืืืืื ืงืืจืืื': 'toyota_corolla',
'corolla': 'toyota_corolla',
# ืกืืืจืืื C3
'c3': 'citroen_c3',
'citroen c3': 'citroen_c3',
'ืกืืืจืืื c3': 'citroen_c3',
'c3 ืืืืฉื': 'citroen_c3',
# ืืืื RS3
'rs3': 'audi_rs3',
'audi rs3': 'audi_rs3',
'ืืืื rs3': 'audi_rs3',
# ืงืื EV9
'ev9': 'kia_ev9',
'kia ev9': 'kia_ev9',
'ืงืื ev9': 'kia_ev9',
# MG S6
's6': 'mg_s6',
'mg s6': 'mg_s6',
'mg-s6': 'mg_s6',
# ืืื ืืื ืืื ืืจื N
'elantra n': 'hyundai_elantra_n',
'ืืื ืืจื n': 'hyundai_elantra_n',
'elantra': 'hyundai_elantra_n',
# ืืืื HT
'aion ht': 'aion_ht',
'ht': 'aion_ht',
'ืืืื ht': 'aion_ht',
# ื'ื ืกืืก GV80
'genesis gv80': 'genesis_gv80',
'gv80': 'genesis_gv80',
"ื'ื ืกืืก gv80": 'genesis_gv80',
# Link & Co 01 (support "and", "&", Hebrew ืืื ืง ืื ื/& ืงื)
'link & co 01': 'link_co_01',
'link co 01': 'link_co_01',
'link and co 01': 'link_co_01',
'link and co': 'link_co_01',
"ืืื ืง ืื ื ืงื 01": 'link_co_01',
"ืืื ืง ืื ื ืงื": 'link_co_01',
"ืืื ืง & ืงื 01": 'link_co_01',
"ืืื ืง & ืงื": 'link_co_01',
}
def _chunk_by_topic(self, text: str, title: str, url: str) -> List[Dict]:
"""ืขืฆื 1: ืืืืงืช ืืืืข ืืืชืืืืช ืืคื ื ืืฉืืื"""
chunks_list = []
# ื ืืฉืืื ืขืืงืจืืื ืืืชืื
topics = {
'ืืคืจื ืืื ื|ืืืฆืืขืื|ืื ืืข|ืชืืื|ืื ืขื': 'technical_specs',
'ืืืืืืช|ืืขืจืืืช ืืืืืืช|ืืืืื': 'safety',
'ืืืืจ|ืืืืจื': 'price',
'ืขืืฆืื|ืืจืื|ืืืฆืื ื': 'design',
'ื ืืืืช|ืืชื ืืืืช|ืืฉืืืฉืื': 'comfort',
'ืฆืจืืื|ืืืื|ืืขืื ื': 'efficiency',
'ืืื ืืืงื|ืืื|ืืืฆืืขืื ืืื ืืืื': 'dynamic',
}
# ืืืืงื ืืกืืกืืช ืืคืกืงืืืช
paragraphs = text.split('\n')
current_chunk = []
current_topic = 'general'
for para in paragraphs:
if len(para.strip()) < 20:
continue
# ืืืืื ื ืืฉื
for pattern, topic in topics.items():
if re.search(pattern, para, re.IGNORECASE):
if current_chunk and current_topic != topic:
chunk_text = '\n'.join(current_chunk)
if len(chunk_text) > 50:
chunks_list.append({
'text': chunk_text,
'topic': current_topic,
'title': title,
'url': url
})
current_chunk = []
current_topic = topic
break
current_chunk.append(para)
# ืืืกืคืช ืื ืชืื ืืืืจืื
if current_chunk:
chunk_text = '\n'.join(current_chunk)
if len(chunk_text) > 50:
chunks_list.append({
'text': chunk_text,
'topic': current_topic,
'title': title,
'url': url
})
return chunks_list
def _normalize_car_name(self, text: str) -> str:
"""ืขืฆื 4: ื ืจืืื ืฉืืืช ืจืืืื ืืืงืกื
Returns canonical id (e.g. 'audi_rs3') if matched, else returns None.
Uses regex_patterns first, then falls back to simple variation map.
"""
if not text:
return None
txt = text.lower()
# Try regex patterns (robust, multilingual, handles spaces/hyphens)
for pattern, canonical in getattr(self, 'regex_patterns', {}).items():
try:
if re.search(pattern, txt):
return canonical
except re.error:
# Skip invalid patterns (shouldn't happen)
continue
# Fallback: match known variants as whole words
for variant, canonical in self.car_normalization.items():
if re.search(rf"\b{re.escape(variant.lower())}\b", txt):
return canonical
return None
def _build_regex_patterns(self):
"""ืื ื ืชืื ืืืช ืจืืงืก ืืืืืช ืืืืืื ืฉืืืช ืจืืืื (ืืืื ืขืืจืืช โ ืืื \\b ืขื ืขืืจืืช)"""
# Patterns: English use \\b; Hebrew has no word boundary (ืืื ืืจื can appear as "ืืืื ืืจื")
self.regex_patterns = {
r'\baudi[\s\-]*rs\s*3\b': 'audi_rs3',
r'\bcitroen[\s\-]*c\s*3\b': 'citroen_c3',
r'\bc\s*3\b': 'citroen_c3',
r'\bkia[\s\-]*ev\s*9\b': 'kia_ev9',
r'\bev\s*9\b': 'kia_ev9',
r'\bhyundai[\s\-]*elantra\s*n\b': 'hyundai_elantra_n',
r'\belantra\s*n\b': 'hyundai_elantra_n',
r'ืืื ืืจื\s*[nN]?': 'hyundai_elantra_n', # Hebrew: "ืืื ืืจื" or "ืืื ืืจื N" (no \\b โ "ืืืื ืืจื" ok)
r'\baion\s*ht\b': 'aion_ht',
r'\bgenesis[\s\-]*gv\s*80\b': 'genesis_gv80',
r'\bgv\s*80\b': 'genesis_gv80',
r'\blink\s*(?:&|and)\s*co\.?\s*01\b': 'link_co_01',
r'\blink\s*&?\s*co\s*01\b': 'link_co_01',
r'ืืื ืง\s*(?:&|ืื ื)\s*ืงื\s*01?': 'link_co_01', # Hebrew: ืืื ืง ืื ื/& ืงื 01
r'\brs\s*3\b': 'audi_rs3',
r'\bcorolla\b': 'toyota_corolla',
}
def _extract_keywords(self, text: str) -> List[str]:
"""ืขืฆื 3: ืืืืืฅ ืืืืืช ืืคืชื"""
# ืืืืืฅ ืืืืืช ืขื ืืฉืืขืืช (ืืขืืจืืช ืืื ืืืืช)
keywords = []
# ืืืืืืืช ืฉื ืืืืืช ืืคืชื ืืฉืืืืช
important_words = [
r'\b\d+\s*ื"ืก\b', # ืงืื
r'\b\d+\s*ืงื"ืฉ\b', # ืงื"ืฉ
r'\b\d+\.?\d*\s*ืฉื ืืืช?\b', # ืืื ืืืฆื
r'\b\d+\s*ืืืืจ\b', # ื ืคื
r'ืืืจืื|ืืืืจืืื|ืืฉืืื|ืื-ืืฆืืืืช|ืืื ืืช', # ืกืืื ืื ืขื
r'ืื ืืข|ืืืืืืช|ื ืืืืช|ืขืืฆืื|ืืืืจ', # ืงืืืืจืืืช
]
for pattern in important_words:
matches = re.findall(pattern, text, re.IGNORECASE)
keywords.extend(matches)
return list(set(keywords))
def _load_and_process_data(self):
"""ืขืฆืืช 1+2: ืืขืื ื ืืืืืงื ืืืื ืขื ืืื-ืืืื"""
with open(self.data_path, 'r', encoding='utf-8') as f:
raw_data = json.load(f)
self.chunks = []
self.chunk_metadata = []
for article in raw_data:
text = article['content']
url = article['url']
title = article['title']
# ื ืจืืื ืฉืืืช ืจืืืื ืืืงืกื (ืฉืืจื ืืช ืืชืืฆืื ืื ืคืจื)
normalized_car = self._normalize_car_name(text)
# ืืืืงื ืืืื ืืคื ื ืืฉืืื - ืืฉืชืืฉื ืืืงืกื ืืืื (ืื ืืขืจื ืืื ืืจืื)
topic_chunks = self._chunk_by_topic(text, title, url)
for chunk_data in topic_chunks:
chunk_text = chunk_data['text']
# ืขืฆื 2: ืืื-ืืืื ืขืฉืืจื ืืื ืืชืืื
keywords = self._extract_keywords(chunk_text)
metadata = {
"title": chunk_data['title'],
"url": chunk_data['url'],
"topic": chunk_data['topic'],
"keywords": keywords,
"publish_date": "2024-2025", # ืืืืข ืคืจืกืื
"car_type": self._extract_car_type(chunk_data['title']),
"length": len(chunk_text)
}
self.chunks.append(chunk_text)
self.chunk_metadata.append(metadata)
# ืขืฆื 3: ืื ืืื ืฉื ืืื ืืงืก ืืืืืช ืืคืชื
for keyword in keywords:
if keyword not in self.keyword_index:
self.keyword_index[keyword] = []
self.keyword_index[keyword].append(len(self.chunks) - 1)
print(f"Created {len(self.chunks)} smart chunks from {len(raw_data)} articles with rich metadata.")
def _extract_car_type(self, title: str) -> str:
"""ืืืืื ืกืื ืืจืื"""
types_map = {
'C3': 'supermini',
'RS3': 'compact',
'EV9': 'suv',
'S6': 'suv',
'ืืื ืืจื': 'sedan',
'Elantra': 'sedan',
'HT': 'suv',
'ืืื ืง': 'compact', # Link & Co 01
'01': 'compact', # Link & Co 01 (title contains "01")
}
for key, type_val in types_map.items():
if key in title:
return type_val
return 'unknown'
def _get_encoder(self):
"""Load encoder once (called at startup before _build_index)."""
if self.encoder is None:
print("Loading embedding model...")
self.encoder = SentenceTransformer(self._encoder_model_name)
print("Embedding model loaded.")
return self.encoder
def _embeddings_path(self) -> Tuple[str, str]:
"""Path to saved embeddings and meta (same dir as scraped_data.json)."""
data_dir = os.path.dirname(self.data_path)
return (
os.path.join(data_dir, "chunk_embeddings.npy"),
os.path.join(data_dir, "chunk_embeddings_meta.json"),
)
def _build_index(self):
"""Build or load chunk vectors. Saves to disk so next startup loads from file (no encoder over chunks)."""
if self.embeddings is not None:
return
emb_path, meta_path = self._embeddings_path()
n_chunks = len(self.chunks)
# Try load existing vectors (only if chunk count matches)
if os.path.isfile(emb_path) and os.path.isfile(meta_path):
try:
with open(meta_path, "r", encoding="utf-8") as f:
meta = json.load(f)
if meta.get("n_chunks") == n_chunks and meta.get("model") == self._encoder_model_name:
self.embeddings = np.load(emb_path)
if self.embeddings.shape[0] == n_chunks:
print(f"Loaded {n_chunks} embeddings from {emb_path}")
return
except Exception as e:
print(f"Could not load saved embeddings: {e}. Rebuilding...")
# Build and save
print("Building chunk embeddings...")
encoder = self._get_encoder()
self.embeddings = encoder.encode(self.chunks, batch_size=32)
norm = np.linalg.norm(self.embeddings, axis=1, keepdims=True)
self.embeddings = self.embeddings / norm
os.makedirs(os.path.dirname(emb_path) or ".", exist_ok=True)
np.save(emb_path, self.embeddings)
with open(meta_path, "w", encoding="utf-8") as f:
json.dump({"n_chunks": n_chunks, "model": self._encoder_model_name}, f, indent=0)
print(f"Saved {n_chunks} embeddings to {emb_path}")
def _hybrid_search(self, query: str, top_k: int = 5) -> List[Dict]:
"""Hybrid search: prebuilt chunk vectors + keyword index. Only the query is embedded at runtime."""
# Embeddings are built at startup; this is a no-op if already built.
self._build_index()
# ื ืจืืื ืืฉืืืืชื
normalized_query = self._normalize_car_name(query)
# ืื ืื ืจืืื ืื ืืฆื canonical id, ืืฉืชืืฉ ืืฉืืืืชื ืืืงืืจืืช
if normalized_query is None:
normalized_query = query
# ืืืคืืฉ ืืงืืืจื
# Ensure we pass a string to the encoder
query_text_for_embedding = normalized_query if isinstance(normalized_query, str) else str(normalized_query)
encoder = self._get_encoder()
query_embedding = encoder.encode([query_text_for_embedding])
query_embedding = query_embedding / np.linalg.norm(query_embedding)
scores = np.dot(self.embeddings, query_embedding.T).flatten()
# ืืืคืืฉ ืืืืืช ืืคืชื
keywords = self._extract_keywords(normalized_query)
keyword_matches = set()
for keyword in keywords:
if keyword in self.keyword_index:
keyword_matches.update(self.keyword_index[keyword])
# ืฉืืืื ืืชืืฆืืืช
combined_scores = scores.copy()
for idx in keyword_matches:
combined_scores[idx] += 0.3 # ืืื ืืก ืืืฉืืงื ืืืืืช ืืคืชื
# ืืืืจืช Top K
top_indices = np.argsort(combined_scores)[-top_k:][::-1]
results = []
for idx in top_indices:
results.append({
"text": self.chunks[idx],
"metadata": self.chunk_metadata[idx],
"score": float(combined_scores[idx])
})
return results
def retrieve(self, query: str, top_k: int = 5):
"""ืขืฆื 3: ืืืคืืฉ ืืืืจืืื ืืืงืื ืจืง ืืงืืืจื"""
return self._hybrid_search(query, top_k)
def _extract_comparison_data(self, car1: str, car2: str) -> Dict:
"""ืขืฆื 5: ืืืืืฅ ื ืชืื ืื ืืืื ืื ืืืฉืืืื"""
specs_map = {
'power': r'(\d+)\s*ื"ืก',
'torque': r'(\d+\.?\d*)\s*ืงื"ื',
'acceleration': r'(\d+\.?\d*)\s*ืฉื ืืืช?\s*ื-?100',
'top_speed': r'(\d+)\s*ืงื"ืฉ',
'consumption': r'(\d+\.?\d*)\s*ืง"ื/l',
}
# ืืืคืืฉ ื ืชืื ืื ืขืืืจ ืื ืจืื
car1_data = {}
car2_data = {}
results1 = self._hybrid_search(car1, top_k=10)
results2 = self._hybrid_search(car2, top_k=10)
for result in results1:
for spec, pattern in specs_map.items():
match = re.search(pattern, result['text'])
if match and spec not in car1_data:
car1_data[spec] = match.group(1)
for result in results2:
for spec, pattern in specs_map.items():
match = re.search(pattern, result['text'])
if match and spec not in car2_data:
car2_data[spec] = match.group(1)
return {
"car1": {"name": car1, "specs": car1_data},
"car2": {"name": car2, "specs": car2_data}
}
def _is_comparison_question(self, query: str) -> bool:
"""Rule-based only (regex/keywords). No LLM. Detects comparison vs single-model questions."""
if not query:
return False
q = query.lower()
comparison_keywords = [
'ืื ืืืชืจ ืืื', 'ืืฉืืืื', 'ืืขืืืช', 'vs', 'versus', 'compare', 'better than',
'ืืื ', ' ืืฉืืืื', 'ืืืฉืืืช', 'compare between',
]
if any(k in q for k in comparison_keywords):
return True
# "X vs Y" or "X ื-Y" / "X and Y" with two model-like tokens
if re.search(r'\bvs\b|\bversus\b| ื | and | versus ', q, re.IGNORECASE):
return True
return False
def _maintain_conversation_history(self, query: str, response: str, max_turns: int = 5):
"""ืขืฆื 10: ื ืืืื ืืืกืืืจืืืช ืฉืืื ืืืื"""
self.conversation_history.append({
"query": query,
"response": response
})
# ืฉืืืจืช ืจืง 5 ืชืืจืืช ืืืจืื ืืช
if len(self.conversation_history) > max_turns:
self.conversation_history = self.conversation_history[-max_turns:]
def _get_context_from_history(self) -> str:
"""ืืืืืฅ ืืงืฉืจ ืืืืกืืืจืืืช ืืฉืืื โ Q ื-A ืืื ืฉืืืืื ืืืื ืืืืข ืืฉืืื ืืืืฉื"""
if not self.conversation_history:
return ""
context_lines = []
for turn in self.conversation_history[-3:]: # 3 ืชืืจืืช ืืืจืื ืืช
q = (turn.get("query") or "")[:200]
a = (turn.get("response") or "")[:300]
context_lines.append(f"Q: {q}\nA: {a}")
return "\n\n".join(context_lines)
def _get_mentioned_cars_in_conversation(self, max_turns: int = 5) -> set:
"""ืืืืื ืฉืืืคืืขืื ืืืืกืืืจืืืช ืืฉืืื ืื ืืืืืช (ืฉืืืืช + ืชืฉืืืืช) โ ืืื ืืืืืง ืืื ืึพfollow-up."""
mentioned = set()
if not self.conversation_history:
return mentioned
for turn in self.conversation_history[-max_turns:]:
for key in ("query", "response"):
text = (turn.get(key) or "")[:1500]
mentioned.update(self._find_supported_canonicals_in_text(text))
return mentioned
def _get_cache_key(self, query: str) -> str:
"""Generate cache key for query"""
return hashlib.md5(query.lower().encode()).hexdigest()
@staticmethod
def _extract_retry_after_seconds(error_text: str) -> Optional[int]:
"""Best-effort parse of Retry-After seconds from an error message."""
if not error_text:
return None
# Common patterns: "Retry-After: 60", "retry_after: 60", "Retry after 60s"
m = re.search(r"retry[-_\s]*after[:\s]+(\d+)", error_text, re.IGNORECASE)
if m:
try:
return int(m.group(1))
except Exception:
return None
m = re.search(r"retry\s+after\s+(\d+)\s*s", error_text, re.IGNORECASE)
if m:
try:
return int(m.group(1))
except Exception:
return None
return None
@staticmethod
def _is_hebrew(text: str) -> bool:
return bool(re.search(r"[\u0590-\u05FF]", text or ""))
# Canonical id -> display name for supported models only (allowed to recommend/compare).
CANONICAL_TO_DISPLAY = {
"citroen_c3": "Citroen C3",
"audi_rs3": "Audi RS3",
"kia_ev9": "Kia EV9",
"mg_s6": "MG S6",
"hyundai_elantra_n": "Hyundai Elantra N",
"aion_ht": "Aion HT",
"genesis_gv80": "Genesis GV80",
"link_co_01": "Link & Co 01",
}
@classmethod
def _supported_cars_display(cls) -> List[str]:
"""
The only car models this app is allowed to recommend/compare.
Must correspond to articles that exist in `data_ingestion/scraped_data.json`.
"""
return list(cls.CANONICAL_TO_DISPLAY.values())
def _find_supported_canonicals_in_text(self, text: str) -> set:
"""Find all supported canonical car ids mentioned in the text (only canonicals we allow)."""
found = set()
if not text:
return found
txt = text.lower()
allowed = set(self.CANONICAL_TO_DISPLAY.keys())
# Regex patterns
for pattern, canonical in getattr(self, "regex_patterns", {}).items():
try:
if canonical in allowed and re.search(pattern, txt):
found.add(canonical)
except re.error:
continue
# Variant map
for variant, canonical in self.car_normalization.items():
if canonical in allowed and re.search(rf"\b{re.escape(variant.lower())}\b", txt):
found.add(canonical)
return found
def _get_ordered_supported_canonicals_in_text(self, text: str) -> List[str]:
"""Return supported canonicals mentioned in text, in order of first appearance."""
if not text:
return []
txt = text.lower()
allowed = set(self.CANONICAL_TO_DISPLAY.keys())
# canonical -> earliest start position
positions: Dict[str, int] = {}
for pattern, canonical in getattr(self, "regex_patterns", {}).items():
if canonical not in allowed:
continue
try:
m = re.search(pattern, txt)
if m:
pos = m.start()
if canonical not in positions or pos < positions[canonical]:
positions[canonical] = pos
except re.error:
continue
for variant, canonical in self.car_normalization.items():
if canonical not in allowed:
continue
m = re.search(rf"\b{re.escape(variant.lower())}\b", txt)
if m:
pos = m.start()
if canonical not in positions or pos < positions[canonical]:
positions[canonical] = pos
return [c for c in sorted(positions.keys(), key=lambda c: positions[c])]
@staticmethod
def _looks_like_specific_car_question(text: str) -> bool:
"""
Heuristic: decide if user likely asks about a specific car model,
not a general concept question.
"""
if not text:
return False
t = text.lower()
# comparison markers
if re.search(r"\b(vs|versus|compare)\b", t) or any(k in t for k in ["ืืฉืืืื", "ืืขืืืช", "ืื ืืืชืจ ืืื", "ืืื"]):
return True
# common โtell me about modelโ phrasing
if any(k in t for k in ["tell me about", "what do you think", "review", "ืืืื", "ืืขื ืขื", "ืื ืืขืชื", "ืกืคืจ ืื ืขื", "ืชืกืคืจ ืื ืขื"]):
return True
# model-like token patterns (letters+digits, e.g. rs3, ev9, x5)
if re.search(r"\b[a-z]{1,}\s*\d{1,}\b", t) or re.search(r"\b\d{1,}\s*[a-z]{1,}\b", t) or re.search(r"\b[a-z]{2,}\d{1,}\b", t):
return True
return False
def _unsupported_car_refusal(self, query: str, is_comparison: bool) -> str:
supported = ", ".join(self._supported_cars_display())
if self._is_hebrew(query):
if is_comparison:
return (
"โ ืื ื ืืืื ืืืฉืืืช/ืืืืืืฅ **ืจืง ืขื ืืกืืก ืืืืข ืฉืงืืื ืืฆืื** ืืชืื ืืชืืืช ืึพ`auto.co.il`.\n"
"ื ืจืื ืฉืืคืืืช ืืื ืืืืืืื ืฉืืืงืฉืช **ืื ื ืืฆื ืืืกืืก ืืืืข ืฉืื**, ืืืื ืืกืืจ ืื ืืืืืืฅ ืขืืื ืื ืืืฉืืืช ืืืชื.\n\n"
f"โ
ืืืืื ื ืชืืืื ืืจืืข: {supported}\n"
"ืื ืชืจืฆื, ืืชืื ืืฉืืืื ืืื ืฉื ื ืืืืื ืืืจืฉืืื."
)
return (
"โ ืื ื ืืืื ืืืืืืฅ **ืจืง ืขื ืืกืืก ืืืืข ืฉืงืืื ืืฆืื** ืืชืื ืืชืืืช ืึพ`auto.co.il`.\n"
"ืืืื ืฉืืืงืฉืช **ืื ื ืืฆื ืืืกืืก ืืืืข ืฉืื**, ืืืื ืืกืืจ ืื ืืืืืืฅ ืขืืื.\n\n"
f"โ
ืืืืื ื ืชืืืื ืืจืืข: {supported}\n"
"ืื ืชืืชืื ืืื ืืืืืืื ืืืจืฉืืื โ ืืฉืื ืืขืืืจ."
)
else:
if is_comparison:
return (
"โ I can compare/recommend **only using information I have** from articles scraped from `auto.co.il`.\n"
"At least one of the models you mentioned is **not in my knowledge base**, so Iโm not allowed to recommend or compare it.\n\n"
f"โ
Currently supported models: {supported}\n"
"If you want, ask for a comparison between two models from this list."
)
return (
"โ I can recommend **only using information I have** from articles scraped from `auto.co.il`.\n"
"The model you asked about is **not in my knowledge base**, so Iโm not allowed to recommend it.\n\n"
f"โ
Currently supported models: {supported}\n"
"Ask about one of these models and Iโll help."
)
def _wait_for_rate_limit(self):
"""Enforce minimum delay between API requests to avoid rate limiting"""
# Thread-safe: Gradio can execute requests concurrently.
with self._rate_limit_lock:
now = time.time()
# Honor global cooldown after a 429.
if now < self._rate_limited_until:
time.sleep(self._rate_limited_until - now)
now = time.time()
elapsed = now - self.last_request_time
if elapsed < self.request_delay:
time.sleep(self.request_delay - elapsed)
self.last_request_time = time.time()
def _get_openrouter_key(self) -> Optional[str]:
"""OpenRouter API key from env. HF Spaces: add Secret OPENROUTER_API_KEY. Local: .env openRouter_API_KEY."""
for name in ("OPENROUTER_API_KEY", "openRouter_API_KEY", "OPENROUTER_APIKEY", "OPENROUTER_KEY"):
v = os.environ.get(name)
if v and str(v).strip():
return str(v).strip()
return None
def _call_openrouter(self, system_prompt: str, prompt: str, timeout_seconds: int = 28) -> Optional[str]:
"""Call OpenRouter API for a fast response. Returns text or None on failure."""
key = self._get_openrouter_key()
if not key or not key.strip():
PIPELINE_LOG.info("OpenRouter key not set - add Secret OPENROUTER_API_KEY in HF Space settings. Using Gemini.")
return None
url = "https://openrouter.ai/api/v1/chat/completions"
# Prefer fast Gemini on OpenRouter (gemini-2.0-flash-exp:free was deprecated/404; use gemini-3-flash-preview)
model = os.environ.get("OPENROUTER_MODEL", "google/gemini-3-flash-preview")
payload = {
"model": model,
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt},
],
"max_tokens": self.max_output_tokens,
"temperature": 0.4,
}
headers = {"Authorization": f"Bearer {key.strip()}", "Content-Type": "application/json"}
try:
self._wait_for_rate_limit()
PIPELINE_LOG.info("Calling OpenRouter model=%s timeout=%ds", model, timeout_seconds)
r = requests.post(url, json=payload, headers=headers, timeout=timeout_seconds)
r.raise_for_status()
data = r.json()
choices = data.get("choices") or []
if not choices:
return None
content = (choices[0].get("message") or {}).get("content") or ""
if not content or not str(content).strip():
return None
out = str(content).strip()
PIPELINE_LOG.info("OpenRouter response OK len=%d", len(out))
return out
except Exception as e:
PIPELINE_LOG.warning("OpenRouter failed: %s", str(e)[:120])
return None
def _call_api_with_backoff(self, system_prompt: str, prompt: str, models: List[str]):
"""When OPENROUTER_API_KEY is set: use only OpenRouter (no Gemini). Else: use Gemini with backoff."""
PIPELINE_LOG.info("_call_api_with_backoff START models=%s prompt_len=%d", models, len(prompt))
openrouter_key = self._get_openrouter_key()
if openrouter_key:
# Generation: use only OpenRouter when key is set (avoid Gemini rate limit)
PIPELINE_LOG.info("OpenRouter key present - using OpenRouter only for generation (no Gemini)")
for attempt in range(2):
result = self._call_openrouter(system_prompt, prompt, timeout_seconds=35)
if result:
return result
PIPELINE_LOG.warning("OpenRouter attempt %d failed, retrying...", attempt + 1)
return (
"โ OpenRouter request failed after retries. Check OPENROUTER_API_KEY and OPENROUTER_MODEL in Space secrets. "
"See logs for details."
)
# No OpenRouter key: use Gemini
PIPELINE_LOG.info("OpenRouter key not set - using Gemini for generation")
max_attempts_per_model = 8
max_rate_limit_wait_s = 180 # wait up to 3 minutes per attempt before retry
# Try each model
for model_idx, model in enumerate(models):
for attempt in range(max_attempts_per_model):
try:
# Wait before API call to respect rate limits
self._wait_for_rate_limit()
PIPELINE_LOG.info("Calling LLM model=%s attempt=%d", model, attempt + 1)
try:
model_obj = genai.GenerativeModel(model, system_instruction=system_prompt)
contents = prompt
except TypeError:
model_obj = genai.GenerativeModel(model)
contents = system_prompt + "\n\n" + prompt
response = model_obj.generate_content(
contents,
generation_config=self._generation_config,
)
text = getattr(response, "text", None) if response else None
if not (text and str(text).strip()):
PIPELINE_LOG.warning("LLM returned empty or no text")
return "โ The model returned no text (possibly blocked). Please try rephrasing."
out = str(text).strip()
PIPELINE_LOG.info("LLM response OK len=%d preview=%s", len(out), (out[:200] + "..." if len(out) > 200 else out))
return out
except Exception as e:
error_text = str(e)
error_msg = error_text.lower()
# Handle rate limit errors - wait longer and retry more
if "429" in error_msg or "rate" in error_msg or "quota" in error_msg or "too many" in error_msg:
retry_after = self._extract_retry_after_seconds(error_text) or 0
backoff = min(max_rate_limit_wait_s, 10 * (2 ** min(attempt, 5)))
jitter = random.uniform(0.0, 1.0)
wait_time = min(max_rate_limit_wait_s, max(retry_after, backoff) + jitter)
# Global cooldown so concurrent calls don't stampede.
with self._rate_limit_lock:
self._rate_limited_until = max(self._rate_limited_until, time.time() + wait_time)
print(f"โ ๏ธ Rate limited on {model}. Waiting {wait_time:.1f}s before retry ({attempt + 1}/{max_attempts_per_model})...")
time.sleep(wait_time)
# Retry same model unless attempts exhausted.
if attempt < max_attempts_per_model - 1:
continue
if model_idx < len(models) - 1:
print("โ ๏ธ Rate limit persists. Trying next model...")
break
msg = "โ ๏ธ API Rate Limit: ืืืชืื ืึพ2โ3 ืืงืืช ืื ืกื ืฉืื. / Please wait 2โ3 minutes and try again."
PIPELINE_LOG.warning("_call_api_with_backoff returning rate-limit message")
return msg
# Handle 404 errors - model not available, try next one
elif "404" in error_msg or "not found" in error_msg or "not supported" in error_msg:
if model_idx < len(models) - 1:
print(f"โ ๏ธ Model {model} not available. Trying next model...")
time.sleep(2)
break # Move to next model
else:
PIPELINE_LOG.warning("_call_api_with_backoff no available models")
return f"โ No available models. Please try again later."
# Other errors - retry with same model once
else:
if attempt < 2:
sleep_s = 1.5 * (attempt + 1)
print(f"โ ๏ธ Error: {str(e)[:80]}. Retrying in {sleep_s:.1f}s...")
time.sleep(sleep_s)
continue
# If retry also failed, try next model
if model_idx < len(models) - 1:
print("โ ๏ธ Trying next model...")
break
PIPELINE_LOG.warning("_call_api_with_backoff error: %s", str(e)[:150])
return f"โ Error: {str(e)[:100]}"
PIPELINE_LOG.warning("_call_api_with_backoff exhausted all models, returning failure")
return "โ Failed to get response from API"
def _call_api_with_backoff_stream(self, system_prompt: str, prompt: str, models: List[str]):
"""
Streaming version: yields incremental text while generating.
Returns the final text via StopIteration.value (internal use) and also yields it progressively.
"""
max_attempts_per_model = 8
max_rate_limit_wait_s = 180
for model_idx, model in enumerate(models):
for attempt in range(max_attempts_per_model):
try:
self._wait_for_rate_limit()
try:
model_obj = genai.GenerativeModel(model, system_instruction=system_prompt)
contents = prompt
except TypeError:
model_obj = genai.GenerativeModel(model)
contents = system_prompt + "\n\n" + prompt
stream = model_obj.generate_content(
contents,
generation_config=self._generation_config,
stream=True,
)
acc = ""
start = time.time()
timeout = self._api_timeout_seconds
for chunk in stream:
if time.time() - start > timeout:
yield (acc + "\n\nโฑ๏ธ Request timed out. Partial response above. Try again or shorten the query.") if acc else "โฑ๏ธ Request timed out. Please try again."
return
piece = getattr(chunk, "text", "") or ""
if not piece:
continue
acc += piece
yield acc
return
except Exception as e:
error_text = str(e)
error_msg = error_text.lower()
if "429" in error_msg or "rate" in error_msg or "quota" in error_msg or "too many" in error_msg:
retry_after = self._extract_retry_after_seconds(error_text) or 0
backoff = min(max_rate_limit_wait_s, 10 * (2 ** min(attempt, 5)))
jitter = random.uniform(0.0, 1.0)
wait_time = min(max_rate_limit_wait_s, max(retry_after, backoff) + jitter)
with self._rate_limit_lock:
self._rate_limited_until = max(self._rate_limited_until, time.time() + wait_time)
time.sleep(wait_time)
if attempt < max_attempts_per_model - 1:
continue
if model_idx < len(models) - 1:
break
yield "โ ๏ธ API Rate Limit: ืืืชืื ืึพ2โ3 ืืงืืช ืื ืกื ืฉืื. / Please wait 2โ3 minutes and try again."
return
if "404" in error_msg or "not found" in error_msg or "not supported" in error_msg:
if model_idx < len(models) - 1:
time.sleep(1.0)
break
yield "โ No available models. Please try again later."
return
if attempt < 2:
time.sleep(1.0 + attempt)
continue
if model_idx < len(models) - 1:
break
yield f"โ Error: {error_text[:120]}"
return
yield "โ Failed to get response from API"
def configure_api(self, api_key: str) -> None:
"""Configure Gemini API key (for use by external agent)."""
genai.configure(api_key=api_key)
def prepare_generation(self, query: str) -> Tuple[Optional[str], Optional[str], Optional[str], List[str]]:
"""
Run RAG pipeline up to (but not including) the LLM call.
Returns (refusal_message, system_prompt, user_prompt, steps_log).
If refusal_message is set, the other three are None / empty; otherwise use prompts for generation.
"""
PIPELINE_LOG.info("prepare_generation START query=%r", query[:80] if query else "")
steps_log: List[str] = []
steps_log.append("๐ Normalizing car names...")
canonical = self._normalize_car_name(query)
ordered_supported = self._get_ordered_supported_canonicals_in_text(query)
current_query_cars = set(ordered_supported) if ordered_supported else set()
if canonical:
current_query_cars.add(canonical)
search_query = canonical
else:
search_query = query
# ืืืืื ืฉืืืฉืชืืฉ ืืืจ ืืืืจ ืขืืืื ืืฉืืื โ ืึพfollow-up ื ืืืืง ืจืง ืืื
mentioned_in_session = self._get_mentioned_cars_in_conversation(max_turns=5)
is_follow_up = (
len(mentioned_in_session) > 0
and (not current_query_cars or current_query_cars <= mentioned_in_session)
)
if is_follow_up:
steps_log.append(f"๐ Follow-up: ืืืงืื ืืืืื ืืฉืืื โ {', '.join(self.CANONICAL_TO_DISPLAY.get(c, c) for c in sorted(mentioned_in_session))}")
is_comparison = self._is_comparison_question(query)
if is_comparison:
steps_log.append("๐ Detected: comparison question (rule-based)")
else:
steps_log.append("๐ Detected: single-model question (rule-based)")
ordered_supported = self._get_ordered_supported_canonicals_in_text(query)
# Show user which cars were identified (for comparison: both; for single: one)
if is_comparison:
if len(ordered_supported) >= 2:
names = ", ".join(self.CANONICAL_TO_DISPLAY.get(c, c) for c in ordered_supported[:2])
steps_log.append(f"โ
ืืืืื ืืืืื ืืืฉืืืื: {names}")
elif len(ordered_supported) == 1:
one_display = self.CANONICAL_TO_DISPLAY.get(ordered_supported[0], ordered_supported[0])
steps_log.append(f"โ
ืืืืื ืืื ืืื (ืืฉื ื ืื ืืจืฉืืื): {one_display}")
else:
if canonical:
steps_log.append(f"โ
Recognized canonical id: {canonical}")
elif not ordered_supported:
steps_log.append("โน๏ธ No canonical car found; using full query for search")
if is_comparison:
if len(ordered_supported) == 0:
refusal = self._unsupported_car_refusal(query, is_comparison=True)
PIPELINE_LOG.info("prepare_generation END refusal=True (comparison, no supported) steps=%d", len(steps_log))
return (refusal, None, None, steps_log)
else:
if not canonical and not ordered_supported and self._looks_like_specific_car_question(query):
refusal = self._unsupported_car_refusal(query, is_comparison=False)
PIPELINE_LOG.info("prepare_generation END refusal=True (single, unsupported) steps=%d", len(steps_log))
return (refusal, None, None, steps_log)
steps_log.append("๐ Searching knowledge base (vectors + keywords)...")
comparison_prompt = ""
context_results = []
if is_comparison:
if len(ordered_supported) >= 2:
car1_can, car2_can = ordered_supported[0], ordered_supported[1]
car1_display = self.CANONICAL_TO_DISPLAY.get(car1_can, car1_can)
car2_display = self.CANONICAL_TO_DISPLAY.get(car2_can, car2_can)
steps_log.append("๐ Extracting structured comparison data (regex)...")
comparison_data = self._extract_comparison_data(car1_can, car2_can)
context_results = self._hybrid_search(search_query, top_k=self.max_chunks_comparison)
steps_log.append(f"โ
Retrieved {len(context_results)} chunks for comparison")
comparison_prompt = f"""
Based on the car reviews, create a structured comparison between {car1_display} and {car2_display}:
Format your response as:
**ืืชืจืื ืืช {car1_display}:**
- [list advantages]
**ืืชืจืื ืืช {car2_display}:**
- [list advantages]
**ืืืืฆื ืืคื ืคืจืืคืื ืืฉืชืืฉ:**
- [personalized recommendation]
Structured Data:
{json.dumps(comparison_data, ensure_ascii=False, indent=2)}
Context from reviews:
"""
elif len(ordered_supported) == 1:
# One model in list, one or more not: can't compare but can tell about the one we know
one_can = ordered_supported[0]
one_display = self.CANONICAL_TO_DISPLAY.get(one_can, one_can)
steps_log.append(f"๐ One supported model ({one_display}); providing info only for it")
context_results = self._hybrid_search(one_can, top_k=self.max_chunks_general)
steps_log.append(f"โ
Retrieved {len(context_results)} chunks")
if self._is_hebrew(query):
comparison_prompt = f"""
ืืืฉืชืืฉ ืืืงืฉ ืืฉืืืื. ืืื ืืืืืื (ืื ืืืชืจ) ืฉืืื ืฆืืื **ืื ื ืืฆื ืืืกืืก ืืืืข ืฉืื** โ ืื ืืคืฉืจ ืืืฉืืืช ืืืืืื ืฉืื ืืืืชื ืขืืืื.
ืืชืฉืืืชื: ืฆืืื ืืงืฆืจื ืฉืืื ื ืืืื ืืืฉืืืช ืืืืืื ืฉืื ืืืืช ืขืืืื, ืืื ืกืคืง ืืืืข ืืื ืจืง ืขื ืืืื ืฉืื ื ืืฆื ืืจืฉืืื: **{one_display}**, ืืืชืืกืก ืขื ืืืงืฉืจ ืืืื.
Context from reviews:
"""
else:
comparison_prompt = f"""
The user asked for a comparison. One or more models they mentioned are **not in my knowledge base** โ I cannot compare to models I haven't learned about.
In your response: briefly state that you cannot compare to models you haven't learned about, then provide full information only about the model that is in my list: **{one_display}**, based on the context below.
Context from reviews:
"""
else:
context_results = self._hybrid_search(search_query, top_k=self.max_chunks_general)
comparison_prompt = "Answer in the same language as the user's question. "
steps_log.append(f"โ
Retrieved {len(context_results)} chunks")
else:
# ืึพfollow-up ืืืคืฉืื ืจืง ืืืงืฉืจ ืืืืืื ืฉืืืฉืชืืฉ ืืืจ ืืืืจ ืขืืืื
retrieval_query = search_query
if is_follow_up and mentioned_in_session:
retrieval_query = " ".join(
self.CANONICAL_TO_DISPLAY.get(c, c) for c in sorted(mentioned_in_session)[:4]
)
context_results = self._hybrid_search(retrieval_query, top_k=self.max_chunks_general)
steps_log.append(f"โ
Retrieved {len(context_results)} relevant chunks")
context_text = ""
for r in context_results:
meta = r['metadata']
context_text += f"""
Source: {meta['title']}
Topic: {meta['topic']}
Content: {r['text'][:self.max_context_chars_per_chunk]}...
"""
conversation_context = self._get_context_from_history()
session_models_instruction = ""
if is_follow_up and mentioned_in_session:
session_names = ", ".join(self.CANONICAL_TO_DISPLAY.get(c, c) for c in sorted(mentioned_in_session))
session_models_instruction = f"""
5. **Session context:** In this chat the user has been discussing only these models: {session_names}. For this follow-up question, answer ONLY in the context of these models. Do not introduce or recommend other models unless (a) the user explicitly asks to compare with another model, or (b) you are giving a brief tip like "ืื ืชืจืฆื ืืฉืืืื ืืืื ืืืจ, ืืคืฉืจ ืืฉืืื ืขื..." / "If you want to compare with another model, you can ask about...". Stick to the session models for the main answer.
"""
system_prompt = """You are an expert automotive assistant. Your reply in the chat MUST be a single, concrete verbal answer that the user will see directly.
Your task:
1. Use ONLY the "Context from car reviews" provided in the user message.
2. Combine everything you understood from the user's question with everything relevant you retrieved from the sources into ONE coherent, verbal answerโas a car expert would say to a friend in the chat. Do not output raw snippets, bullet-only lists, or separate fragments; write a unified answer (paragraphs and/or structured sections) that directly answers the question.
3. Respond in the same language as the user (Hebrew or English). For comparison questions, provide a structured analysis with clear advantages for each vehicle, still in one cohesive reply.
4. If the context is empty or irrelevant, say clearly that you have no information from your knowledge base for this question.
""" + session_models_instruction + """
The user expects to see this single aggregated answer in the chatโmake it complete and concrete."""
user_prompt = f"""Context from car reviews:
{context_text if context_text.strip() else "(No matching chunks found.)"}
Previous conversation context (last turns):
{conversation_context}
User question: {query}
{comparison_prompt}
Synthesize the context above into one clear, verbal answer that aggregates all relevant information and directly answers the user's question. Your entire response will be shown to the user as the chat reply:"""
PIPELINE_LOG.info("prepare_generation END refusal=%s has_system_prompt=%s has_user_prompt=%s steps=%d",
False, bool(system_prompt), bool(user_prompt), len(steps_log))
return (None, system_prompt, user_prompt, steps_log)
def generate_response(self, query: str, history, api_key: str):
"""ืืฆืืจืช ืชืฉืืื ืืืื ืขื ืื 10 ืืขืฆืืช"""
if not api_key:
return "Error: Gemini API Key is missing."
# Prepare processing log for UX transparency
processing_steps = []
# Check cache for identical queries
cache_key = self._get_cache_key(query)
if cache_key in self.response_cache:
# Return cached response but include note about cache
cached = self.response_cache[cache_key]
return f"๐ Returned cached result\n\n{cached}"
genai.configure(api_key=api_key)
# Step 1 - Normalization
processing_steps.append("๐ Normalizing car names...")
canonical = self._normalize_car_name(query)
if canonical:
search_query = canonical
else:
search_query = query
# ืขืฆื 7: ืืืืื ืฉืืืืช ืืฉืืืืชืืืช
is_comparison = self._is_comparison_question(query)
# Policy guard: do not recommend models without auto.co.il articles in our KB
ordered_supported = self._get_ordered_supported_canonicals_in_text(query)
# Show user which cars were identified (for comparison: both; for single: one)
if is_comparison:
if len(ordered_supported) >= 2:
names = ", ".join(self.CANONICAL_TO_DISPLAY.get(c, c) for c in ordered_supported[:2])
processing_steps.append(f"โ
ืืืืื ืืืืื ืืืฉืืืื: {names}")
elif len(ordered_supported) == 1:
one_display = self.CANONICAL_TO_DISPLAY.get(ordered_supported[0], ordered_supported[0])
processing_steps.append(f"โ
ืืืืื ืืื ืืื (ืืฉื ื ืื ืืจืฉืืื): {one_display}")
else:
if canonical:
processing_steps.append(f"โ
Recognized canonical id: {canonical}")
else:
processing_steps.append("โน๏ธ No canonical car found; using full query for search")
if is_comparison:
if len(ordered_supported) == 0:
return self._unsupported_car_refusal(query, is_comparison=True)
else:
if not canonical and not ordered_supported and self._looks_like_specific_car_question(query):
return self._unsupported_car_refusal(query, is_comparison=False)
if is_comparison:
if len(ordered_supported) >= 2:
car1_can, car2_can = ordered_supported[0], ordered_supported[1]
car1_display = self.CANONICAL_TO_DISPLAY.get(car1_can, car1_can)
car2_display = self.CANONICAL_TO_DISPLAY.get(car2_can, car2_can)
processing_steps.append("๐ Extracting structured comparison data...")
comparison_data = self._extract_comparison_data(car1_can, car2_can)
context_results = self._hybrid_search(search_query, top_k=self.max_chunks_comparison)
comparison_prompt = f"""
Based on the car reviews, create a structured comparison between {car1_display} and {car2_display}:
Format your response as:
**ืืชืจืื ืืช {car1_display}:**
- [list advantages]
**ืืชืจืื ืืช {car2_display}:**
- [list advantages]
**ืืืืฆื ืืคื ืคืจืืคืื ืืฉืชืืฉ:**
- [personalized recommendation]
Structured Data:
{json.dumps(comparison_data, ensure_ascii=False, indent=2)}
Context from reviews:
"""
elif len(ordered_supported) == 1:
one_can = ordered_supported[0]
one_display = self.CANONICAL_TO_DISPLAY.get(one_can, one_can)
processing_steps.append(f"๐ One supported model ({one_display}); providing info only for it")
context_results = self._hybrid_search(one_can, top_k=self.max_chunks_general)
if self._is_hebrew(query):
comparison_prompt = f"""
ืืืฉืชืืฉ ืืืงืฉ ืืฉืืืื. ืืื ืืืืืื (ืื ืืืชืจ) ืฉืืื ืฆืืื **ืื ื ืืฆื ืืืกืืก ืืืืข ืฉืื** โ ืื ืืคืฉืจ ืืืฉืืืช ืืืืืื ืฉืื ืืืืชื ืขืืืื.
ืืชืฉืืืชื: ืฆืืื ืืงืฆืจื ืฉืืื ื ืืืื ืืืฉืืืช ืืืืืื ืฉืื ืืืืช ืขืืืื, ืืื ืกืคืง ืืืืข ืืื ืจืง ืขื ืืืื ืฉืื ื ืืฆื ืืจืฉืืื: **{one_display}**, ืืืชืืกืก ืขื ืืืงืฉืจ ืืืื.
Context from reviews:
"""
else:
comparison_prompt = f"""
The user asked for a comparison. One or more models they mentioned are **not in my knowledge base** โ I cannot compare to models I haven't learned about.
In your response: briefly state that you cannot compare to models you haven't learned about, then provide full information only about the model that is in my list: **{one_display}**, based on the context below.
Context from reviews:
"""
else:
context_results = self._hybrid_search(search_query, top_k=self.max_chunks_general)
comparison_prompt = "Answer in the same language as the user's question. "
else:
processing_steps.append("๐ Searching knowledge base (hybrid vectors + keywords)...")
context_results = self._hybrid_search(search_query, top_k=self.max_chunks_general)
comparison_prompt = ""
# ืื ืืื ืฉื ืืงืฉืจ ืขื ืืื-ืืืื
context_text = ""
for r in context_results:
meta = r['metadata']
context_text += f"""
Source: {meta['title']}
Topic: {meta['topic']}
Content: {r['text'][:self.max_context_chars_per_chunk]}...
"""
# ืขืฆื 10: ืฉืืืจืช ืืงืฉืจ ืืืืกืืืจืื
conversation_context = self._get_context_from_history()
# ืขืฆื 9: ืคืจืืืคื ืืืชืื
system_prompt = """You are an expert automotive assistant. Your answer MUST be based only on the "Context from car reviews" provided in the user message.
Your task: aggregate and summarize the information from that context and give a detailed, verbal answer as a car expert would to a friend. Always output a full paragraph (or more) that directly answers the user's questionโnever leave the answer empty or vague.
Respond in the same language as the user (Hebrew or English). For comparison questions, provide a structured analysis with clear advantages for each vehicle.
If the context is empty or irrelevant, say you have no information from your knowledge base for this question."""
prompt = f"""Context from car reviews:
{context_text if context_text.strip() else "(No matching chunks found.)"}
Previous conversation context (last turns):
{conversation_context}
User question: {query}
{comparison_prompt}
Based on the context above, provide a clear answer that aggregates the information and answers the user's question:"""
# Prepare generation step
processing_steps.append("๐ญ Generating response with Gemini...")
# Use new rate-limited API call with backoff and caching
# Prefer fast Flash models for latency; fall back only within Flash tier.
models_to_try = ['gemini-2.0-flash', 'gemini-1.5-flash']
response_text = self._call_api_with_backoff(system_prompt, prompt, models_to_try)
# Cache only successful responses (avoid caching transient rate limit/errors)
if not (response_text.startswith("โ ๏ธ") or response_text.startswith("โ")):
self.response_cache[cache_key] = response_text
# ืขืฆื 10: ืฉืืืจืช ืืชืฉืืื ืืืืกืืืจืื
self._maintain_conversation_history(query, response_text)
# Prepend processing steps for UX transparency
processing_header = "\n".join(processing_steps)
full_response = f"{processing_header}\n\n{response_text}"
return full_response
def generate_response_stream(self, query: str, history, api_key: str):
"""
Stream progress: show each pipeline step as it completes (no generic placeholders).
Only one LLM call at the end; normalization, comparison detection, and search are offline/rule-based.
"""
if not api_key:
yield "Error: Gemini API Key is missing."
return
def steps_text() -> str:
return "\n".join(processing_steps)
processing_steps: List[str] = []
cache_key = self._get_cache_key(query)
if cache_key in self.response_cache:
yield f"๐ Returned cached result\n\n{self.response_cache[cache_key]}"
return
genai.configure(api_key=api_key)
# --- Step 1: Normalization (rule-based, no LLM) ---
processing_steps.append("๐ Normalizing car names...")
yield steps_text()
canonical = self._normalize_car_name(query)
if canonical:
search_query = canonical
else:
search_query = query
# --- Step 2: Question type (rule-based regex/keywords, no LLM) ---
is_comparison = self._is_comparison_question(query)
if is_comparison:
processing_steps.append("๐ Detected: comparison question (rule-based)")
else:
processing_steps.append("๐ Detected: single-model question (rule-based)")
yield steps_text()
ordered_supported = self._get_ordered_supported_canonicals_in_text(query)
# Show user which cars were identified (for comparison: both; for single: one)
if is_comparison:
if len(ordered_supported) >= 2:
names = ", ".join(self.CANONICAL_TO_DISPLAY.get(c, c) for c in ordered_supported[:2])
processing_steps.append(f"โ
ืืืืื ืืืืื ืืืฉืืืื: {names}")
elif len(ordered_supported) == 1:
one_display = self.CANONICAL_TO_DISPLAY.get(ordered_supported[0], ordered_supported[0])
processing_steps.append(f"โ
ืืืืื ืืื ืืื (ืืฉื ื ืื ืืจืฉืืื): {one_display}")
else:
if canonical:
processing_steps.append(f"โ
Recognized canonical id: {canonical}")
else:
processing_steps.append("โน๏ธ No canonical car found; using full query for search")
yield steps_text()
if is_comparison:
if len(ordered_supported) == 0:
yield self._unsupported_car_refusal(query, is_comparison=True)
return
else:
if not canonical and not ordered_supported and self._looks_like_specific_car_question(query):
yield self._unsupported_car_refusal(query, is_comparison=False)
return
# --- Step 3: Search (index built offline; only query embedding at runtime) ---
processing_steps.append("๐ Searching knowledge base (vectors + keywords)...")
yield steps_text()
comparison_prompt = ""
context_results = []
if is_comparison:
if len(ordered_supported) >= 2:
car1_can, car2_can = ordered_supported[0], ordered_supported[1]
car1_display = self.CANONICAL_TO_DISPLAY.get(car1_can, car1_can)
car2_display = self.CANONICAL_TO_DISPLAY.get(car2_can, car2_can)
processing_steps.append("๐ Extracting structured comparison data (regex)...")
yield steps_text()
comparison_data = self._extract_comparison_data(car1_can, car2_can)
context_results = self._hybrid_search(search_query, top_k=self.max_chunks_comparison)
processing_steps.append(f"โ
Retrieved {len(context_results)} chunks for comparison")
yield steps_text()
comparison_prompt = f"""
Based on the car reviews, create a structured comparison between {car1_display} and {car2_display}:
Format your response as:
**ืืชืจืื ืืช {car1_display}:**
- [list advantages]
**ืืชืจืื ืืช {car2_display}:**
- [list advantages]
**ืืืืฆื ืืคื ืคืจืืคืื ืืฉืชืืฉ:**
- [personalized recommendation]
Structured Data:
{json.dumps(comparison_data, ensure_ascii=False, indent=2)}
Context from reviews:
"""
elif len(ordered_supported) == 1:
one_can = ordered_supported[0]
one_display = self.CANONICAL_TO_DISPLAY.get(one_can, one_can)
processing_steps.append(f"๐ One supported model ({one_display}); providing info only for it")
context_results = self._hybrid_search(one_can, top_k=self.max_chunks_general)
processing_steps.append(f"โ
Retrieved {len(context_results)} chunks")
yield steps_text()
if self._is_hebrew(query):
comparison_prompt = f"""
ืืืฉืชืืฉ ืืืงืฉ ืืฉืืืื. ืืื ืืืืืื (ืื ืืืชืจ) ืฉืืื ืฆืืื **ืื ื ืืฆื ืืืกืืก ืืืืข ืฉืื** โ ืื ืืคืฉืจ ืืืฉืืืช ืืืืืื ืฉืื ืืืืชื ืขืืืื.
ืืชืฉืืืชื: ืฆืืื ืืงืฆืจื ืฉืืื ื ืืืื ืืืฉืืืช ืืืืืื ืฉืื ืืืืช ืขืืืื, ืืื ืกืคืง ืืืืข ืืื ืจืง ืขื ืืืื ืฉืื ื ืืฆื ืืจืฉืืื: **{one_display}**, ืืืชืืกืก ืขื ืืืงืฉืจ ืืืื.
Context from reviews:
"""
else:
comparison_prompt = f"""
The user asked for a comparison. One or more models they mentioned are **not in my knowledge base** โ I cannot compare to models I haven't learned about.
In your response: briefly state that you cannot compare to models you haven't learned about, then provide full information only about the model that is in my list: **{one_display}**, based on the context below.
Context from reviews:
"""
else:
context_results = self._hybrid_search(search_query, top_k=self.max_chunks_general)
comparison_prompt = "Answer in the same language as the user's question. "
processing_steps.append(f"โ
Retrieved {len(context_results)} chunks")
yield steps_text()
else:
context_results = self._hybrid_search(search_query, top_k=self.max_chunks_general)
processing_steps.append(f"โ
Retrieved {len(context_results)} relevant chunks")
yield steps_text()
# --- Build prompt (no LLM) ---
context_text = ""
for r in context_results:
meta = r['metadata']
context_text += f"""
Source: {meta['title']}
Topic: {meta['topic']}
Content: {r['text'][:self.max_context_chars_per_chunk]}...
"""
conversation_context = self._get_context_from_history()
system_prompt = """You are an expert automotive assistant. Your answer MUST be based only on the "Context from car reviews" provided in the user message.
Your task: aggregate and summarize the information from that context and give a detailed, verbal answer as a car expert would to a friend. Always output a full paragraph (or more) that directly answers the user's questionโnever leave the answer empty or vague.
Respond in the same language as the user (Hebrew or English). For comparison questions, provide a structured analysis with clear advantages for each vehicle.
If the context is empty or irrelevant, say you have no information from your knowledge base for this question."""
prompt = f"""Context from car reviews:
{context_text if context_text.strip() else "(No matching chunks found.)"}
Previous conversation context (last turns):
{conversation_context}
User question: {query}
{comparison_prompt}
Based on the context above, provide a clear answer that aggregates the information and answers the user's question:"""
# --- Step 4: Single LLM call (streamed) ---
processing_steps.append("๐ญ Generating response with Gemini...")
yield steps_text()
models_to_try = ['gemini-2.0-flash', 'gemini-1.5-flash']
response_text = ""
for partial in self._call_api_with_backoff_stream(system_prompt, prompt, models_to_try):
response_text = partial
yield f"{steps_text()}\n\n{response_text}"
# Don't cache errors or timeouts
if not any(response_text.startswith(p) for p in ("โ ๏ธ", "โ", "โฑ๏ธ")):
self.response_cache[cache_key] = response_text
self._maintain_conversation_history(query, response_text)
processing_steps.append("โ
Done")
yield f"{steps_text()}\n\n{response_text}"
# Simple test block
if __name__ == "__main__":
# Create dummy file if not exists for testing import
if not os.path.exists("scraped_data.json"):
print("No data found, skipping test.")
else:
engine = RAGEngine()
res = engine.retrieve("How is the Kia EV9?")
print(f"Top result: {res[0]['text'][:100]}...")
|