Spaces:
Running
Running
SamiKoen commited on
Commit ·
245c27f
1
Parent(s): e9857a9
Parse XML once into hash index; build in background thread (no event loop blocking)
Browse files
app.py
CHANGED
|
@@ -26,6 +26,19 @@ app = FastAPI(title="BF-Realtime")
|
|
| 26 |
app.mount("/static", StaticFiles(directory="static"), name="static")
|
| 27 |
|
| 28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
@app.get("/")
|
| 30 |
async def root():
|
| 31 |
return FileResponse("static/index.html")
|
|
@@ -271,129 +284,208 @@ async def handle_tool_call(name: str, arguments: dict) -> str:
|
|
| 271 |
return f"Hata: {e}"
|
| 272 |
|
| 273 |
|
| 274 |
-
|
| 275 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 276 |
import re
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 282 |
|
| 283 |
-
# Galeri: picture1Path .. picture8Path
|
| 284 |
images = []
|
| 285 |
for i in range(1, 9):
|
| 286 |
-
m =
|
| 287 |
if m and m.group(1).strip():
|
| 288 |
images.append(m.group(1).strip())
|
| 289 |
|
| 290 |
-
# Varyant ise label'dan color/size parse et: "XS - YEŞİL", "M - SİYAH 56", "RED 56" vb.
|
| 291 |
color = None
|
| 292 |
size = None
|
| 293 |
-
is_variant
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
# Boy: XS/S/M/L/XL/XXL veya 50/52/54/56/58 cm benzeri sayılar
|
| 299 |
-
size_pat = re.compile(r'^(?:XX?S|XS|S|M|L|XL|XXL|XXXL|\d{2}(?:\.\d)?(?:\s*CM)?)$', re.I)
|
| 300 |
for p in parts:
|
| 301 |
if size_pat.match(p) and not size:
|
| 302 |
size = p.upper()
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
|
|
|
|
|
|
| 307 |
|
| 308 |
return {
|
| 309 |
-
'name':
|
| 310 |
'image': images[0] if images else None,
|
| 311 |
'images': images,
|
| 312 |
-
'link':
|
| 313 |
-
'sku':
|
| 314 |
'color': color,
|
| 315 |
'size': size,
|
| 316 |
'is_variant': is_variant,
|
|
|
|
|
|
|
|
|
|
| 317 |
}
|
| 318 |
|
| 319 |
|
| 320 |
-
def
|
| 321 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 322 |
try:
|
| 323 |
from smart_warehouse_with_price import get_cached_trek_xml
|
| 324 |
-
import re
|
| 325 |
-
|
| 326 |
xml = get_cached_trek_xml()
|
| 327 |
if not xml:
|
| 328 |
return None
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
# Varyant mi? -> ana urune cik
|
| 342 |
-
is_variant_m = re.search(r'<isOptionOfAProduct>(\d+)</isOptionOfAProduct>', matched)
|
| 343 |
-
if is_variant_m and is_variant_m.group(1) == '1':
|
| 344 |
-
root_m = re.search(r'<rootProductStockCode><!\[CDATA\[(.*?)\]\]></rootProductStockCode>', matched)
|
| 345 |
-
if root_m and root_m.group(1).strip() and root_m.group(1).strip() != '0':
|
| 346 |
-
root_code = root_m.group(1).strip()
|
| 347 |
-
for it in items:
|
| 348 |
-
sc_m = re.search(r'<stockCode><!\[CDATA\[(.*?)\]\]></stockCode>', it)
|
| 349 |
-
iv_m = re.search(r'<isOptionOfAProduct>(\d+)</isOptionOfAProduct>', it)
|
| 350 |
-
if (sc_m and sc_m.group(1).strip() == root_code
|
| 351 |
-
and iv_m and iv_m.group(1) == '0'):
|
| 352 |
-
return _item_to_product(it)
|
| 353 |
-
|
| 354 |
-
return _item_to_product(matched)
|
| 355 |
except Exception:
|
| 356 |
-
logger.exception('
|
| 357 |
return None
|
| 358 |
|
| 359 |
|
| 360 |
-
def
|
| 361 |
-
"""
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
root_m = re.search(r'<rootProductStockCode><!\[CDATA\[(.*?)\]\]></rootProductStockCode>', it)
|
| 390 |
-
if (iv_m and iv_m.group(1) == '1'
|
| 391 |
-
and root_m and root_m.group(1).strip() == main_code):
|
| 392 |
-
variants.append(_item_to_product(it))
|
| 393 |
-
return variants
|
| 394 |
-
except Exception:
|
| 395 |
-
logger.exception('find_variants_of_product hatasi')
|
| 396 |
return []
|
|
|
|
| 397 |
|
| 398 |
|
| 399 |
def extract_product_link_from_result(result: str) -> str | None:
|
|
@@ -405,16 +497,6 @@ def extract_product_link_from_result(result: str) -> str | None:
|
|
| 405 |
return m.group(0) if m else None
|
| 406 |
|
| 407 |
|
| 408 |
-
_TR_MAP = {'İ': 'i', 'I': 'i', 'ı': 'i', 'Ğ': 'g', 'ğ': 'g',
|
| 409 |
-
'Ü': 'u', 'ü': 'u', 'Ş': 's', 'ş': 's',
|
| 410 |
-
'Ö': 'o', 'ö': 'o', 'Ç': 'c', 'ç': 'c'}
|
| 411 |
-
|
| 412 |
-
def _norm(s: str) -> str:
|
| 413 |
-
for tr, en in _TR_MAP.items():
|
| 414 |
-
s = s.replace(tr, en)
|
| 415 |
-
return s.lower()
|
| 416 |
-
|
| 417 |
-
|
| 418 |
# Renk eşanlam haritası (TR söylüyorlar, XML'de TR/EN karışık olabilir)
|
| 419 |
_COLOR_SYNONYMS = {
|
| 420 |
'siyah': ['siyah', 'black', 'noir', 'negro'],
|
|
@@ -459,47 +541,27 @@ def _detect_sizes_in_text(text_norm: str) -> set[str]:
|
|
| 459 |
|
| 460 |
|
| 461 |
def find_product_in_text(text: str, current_main_link: str | None = None) -> dict | None:
|
| 462 |
-
"""Transkriptte gecen urunu/varyanti bul.
|
| 463 |
-
- Default: ana urunu tercih eder
|
| 464 |
-
-
|
| 465 |
-
- current_main_link
|
| 466 |
try:
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
xml = get_cached_trek_xml()
|
| 471 |
-
if not xml:
|
| 472 |
return None
|
| 473 |
-
xml_text = xml.decode('utf-8', errors='replace') if isinstance(xml, bytes) else str(xml)
|
| 474 |
text_norm = _norm(text)
|
| 475 |
-
items = re.findall(r'<item>(.*?)</item>', xml_text, re.DOTALL)
|
| 476 |
|
| 477 |
-
#
|
| 478 |
-
scoped_items = items
|
| 479 |
if current_main_link:
|
| 480 |
-
|
| 481 |
-
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
| 485 |
-
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
|
| 489 |
-
break
|
| 490 |
-
if main_code:
|
| 491 |
-
scoped = []
|
| 492 |
-
for it in items:
|
| 493 |
-
link_m = re.search(r'<productLink><!\[CDATA\[(.*?)\]\]></productLink>', it)
|
| 494 |
-
root_m = re.search(r'<rootProductStockCode><!\[CDATA\[(.*?)\]\]></rootProductStockCode>', it)
|
| 495 |
-
is_self = link_m and link_m.group(1).strip() == current_main_link.strip()
|
| 496 |
-
is_variant_of = root_m and root_m.group(1).strip() == main_code
|
| 497 |
-
if is_self or is_variant_of:
|
| 498 |
-
scoped.append(it)
|
| 499 |
-
if scoped:
|
| 500 |
-
scoped_items = scoped
|
| 501 |
-
|
| 502 |
-
# Metinde gecen renk/beden hint'leri
|
| 503 |
text_colors = _detect_colors_in_text(text_norm)
|
| 504 |
text_sizes = _detect_sizes_in_text(text_norm)
|
| 505 |
has_variant_hint = bool(text_colors or text_sizes)
|
|
@@ -507,29 +569,19 @@ def find_product_in_text(text: str, current_main_link: str | None = None) -> dic
|
|
| 507 |
best = None
|
| 508 |
best_score = 0
|
| 509 |
best_is_main = False
|
| 510 |
-
for
|
| 511 |
-
|
| 512 |
-
if not label_m:
|
| 513 |
-
continue
|
| 514 |
-
label = label_m.group(1)
|
| 515 |
-
label_norm = _norm(label)
|
| 516 |
-
tokens = [t for t in re.findall(r'[a-z0-9+]+', label_norm) if len(t) >= 2]
|
| 517 |
if not tokens:
|
| 518 |
continue
|
| 519 |
match = sum(1 for t in tokens if t in text_norm)
|
| 520 |
threshold = max(2, int(len(tokens) * 0.5))
|
| 521 |
if match < threshold:
|
| 522 |
continue
|
| 523 |
-
|
| 524 |
-
is_main = not (iv_m and iv_m.group(1) == '1')
|
| 525 |
-
|
| 526 |
-
# Renk/beden bonus: variant ise label'inden parse edip metinle eslestir
|
| 527 |
bonus = 0
|
| 528 |
if not is_main and has_variant_hint:
|
| 529 |
-
|
| 530 |
-
|
| 531 |
-
v_size = (product.get('size') or '').upper()
|
| 532 |
-
# Renk eslesmesi: variant rengi metindeki rekleri sinonim'le karsilastir
|
| 533 |
if v_color:
|
| 534 |
for key, syns in _COLOR_SYNONYMS.items():
|
| 535 |
if key in text_colors and any(_norm(s) in _norm(v_color) for s in syns):
|
|
@@ -537,18 +589,13 @@ def find_product_in_text(text: str, current_main_link: str | None = None) -> dic
|
|
| 537 |
break
|
| 538 |
if v_size and v_size in text_sizes:
|
| 539 |
bonus += 3
|
| 540 |
-
|
| 541 |
score = match + bonus
|
| 542 |
-
# Daha yuksek skor kazanir; esitlikte ana urun tercih edilir
|
| 543 |
if (score > best_score
|
| 544 |
or (score == best_score and is_main and not best_is_main)):
|
| 545 |
best_score = score
|
| 546 |
-
best =
|
| 547 |
best_is_main = is_main
|
| 548 |
-
|
| 549 |
-
if not best:
|
| 550 |
-
return None
|
| 551 |
-
return _item_to_product(best)
|
| 552 |
except Exception:
|
| 553 |
logger.exception('find_product_in_text hatasi')
|
| 554 |
return None
|
|
|
|
| 26 |
app.mount("/static", StaticFiles(directory="static"), name="static")
|
| 27 |
|
| 28 |
|
| 29 |
+
@app.on_event("startup")
|
| 30 |
+
async def _startup_warmup():
|
| 31 |
+
"""XML cache + parse index'i thread'de baslat — event loop'u bloklama."""
|
| 32 |
+
async def loop():
|
| 33 |
+
while True:
|
| 34 |
+
try:
|
| 35 |
+
await asyncio.to_thread(_ensure_index)
|
| 36 |
+
except Exception:
|
| 37 |
+
logger.exception('xml/index refresh hatasi')
|
| 38 |
+
await asyncio.sleep(1800) # 30 dakika
|
| 39 |
+
asyncio.create_task(loop())
|
| 40 |
+
|
| 41 |
+
|
| 42 |
@app.get("/")
|
| 43 |
async def root():
|
| 44 |
return FileResponse("static/index.html")
|
|
|
|
| 284 |
return f"Hata: {e}"
|
| 285 |
|
| 286 |
|
| 287 |
+
_TR_MAP = {'İ': 'i', 'I': 'i', 'ı': 'i', 'Ğ': 'g', 'ğ': 'g',
|
| 288 |
+
'Ü': 'u', 'ü': 'u', 'Ş': 's', 'ş': 's',
|
| 289 |
+
'Ö': 'o', 'ö': 'o', 'Ç': 'c', 'ç': 'c'}
|
| 290 |
+
|
| 291 |
+
def _norm(s: str) -> str:
|
| 292 |
+
if not s:
|
| 293 |
+
return ''
|
| 294 |
+
for tr, en in _TR_MAP.items():
|
| 295 |
+
s = s.replace(tr, en)
|
| 296 |
+
return s.lower()
|
| 297 |
+
|
| 298 |
+
|
| 299 |
+
# Parse-once index — XML her degistiginde yeniden build edilir.
|
| 300 |
+
# Realtime aramalarda 7MB regex re-parse yapilmaz, hash lookup'lar kullanilir.
|
| 301 |
+
_INDEX = {
|
| 302 |
+
"xml_id": None, # ham XML'in id'si — degisirse yeniden parse
|
| 303 |
+
"products": [], # list of parsed product dicts
|
| 304 |
+
"by_link": {}, # productLink -> product
|
| 305 |
+
"by_sku": {}, # stockCode -> product
|
| 306 |
+
"main_skus": set(), # ana urunlerin stockCode set'i
|
| 307 |
+
"variants_by_root": {}, # root_sku -> [variant products]
|
| 308 |
+
"main_links": [], # sadece ana urunlerin linkleri (hizli iterasyon)
|
| 309 |
+
}
|
| 310 |
+
|
| 311 |
+
_VARIANT_LABEL_PARTS_RE = None
|
| 312 |
+
_ITEM_RE = None
|
| 313 |
+
_FIELD_RES = None
|
| 314 |
+
|
| 315 |
+
|
| 316 |
+
def _compile_regexes():
|
| 317 |
+
"""Modul ilk yuklemede regex'leri compile et."""
|
| 318 |
+
global _VARIANT_LABEL_PARTS_RE, _ITEM_RE, _FIELD_RES
|
| 319 |
+
if _ITEM_RE is not None:
|
| 320 |
+
return
|
| 321 |
import re
|
| 322 |
+
_ITEM_RE = re.compile(r'<item>(.*?)</item>', re.DOTALL)
|
| 323 |
+
_VARIANT_LABEL_PARTS_RE = re.compile(r'\s*[-/]\s*')
|
| 324 |
+
_FIELD_RES = {
|
| 325 |
+
'rootlabel': re.compile(r'<rootlabel><!\[CDATA\[(.*?)\]\]></rootlabel>'),
|
| 326 |
+
'label': re.compile(r'<label><!\[CDATA\[(.*?)\]\]></label>'),
|
| 327 |
+
'productLink': re.compile(r'<productLink><!\[CDATA\[(.*?)\]\]></productLink>'),
|
| 328 |
+
'stockCode': re.compile(r'<stockCode><!\[CDATA\[(.*?)\]\]></stockCode>'),
|
| 329 |
+
'isOptionOfAProduct': re.compile(r'<isOptionOfAProduct>(\d+)</isOptionOfAProduct>'),
|
| 330 |
+
'rootProductStockCode': re.compile(r'<rootProductStockCode><!\[CDATA\[(.*?)\]\]></rootProductStockCode>'),
|
| 331 |
+
}
|
| 332 |
+
# picture1Path - picture8Path
|
| 333 |
+
for i in range(1, 9):
|
| 334 |
+
_FIELD_RES[f'picture{i}'] = re.compile(
|
| 335 |
+
rf'<picture{i}Path><!\[CDATA\[(.*?)\]\]></picture{i}Path>'
|
| 336 |
+
)
|
| 337 |
+
|
| 338 |
+
_compile_regexes()
|
| 339 |
+
|
| 340 |
+
|
| 341 |
+
def _parse_item(it: str) -> dict:
|
| 342 |
+
"""Tek bir XML item'ini dict'e cevir + arama icin token cache'i hazirla."""
|
| 343 |
+
import re as _re
|
| 344 |
+
fr = _FIELD_RES
|
| 345 |
+
|
| 346 |
+
def grab(name):
|
| 347 |
+
m = fr[name].search(it)
|
| 348 |
+
return m.group(1).strip() if m else ''
|
| 349 |
+
|
| 350 |
+
rootlabel = grab('rootlabel')
|
| 351 |
+
var_label = grab('label')
|
| 352 |
+
link = grab('productLink')
|
| 353 |
+
sku = grab('stockCode')
|
| 354 |
+
iv_m = fr['isOptionOfAProduct'].search(it)
|
| 355 |
+
is_variant = bool(iv_m and iv_m.group(1) == '1')
|
| 356 |
+
root_sku = grab('rootProductStockCode')
|
| 357 |
|
|
|
|
| 358 |
images = []
|
| 359 |
for i in range(1, 9):
|
| 360 |
+
m = fr[f'picture{i}'].search(it)
|
| 361 |
if m and m.group(1).strip():
|
| 362 |
images.append(m.group(1).strip())
|
| 363 |
|
|
|
|
| 364 |
color = None
|
| 365 |
size = None
|
| 366 |
+
if is_variant and var_label:
|
| 367 |
+
parts = [p.strip() for p in _VARIANT_LABEL_PARTS_RE.split(var_label) if p.strip()]
|
| 368 |
+
size_pat = _re.compile(
|
| 369 |
+
r'^(?:XX?S|XS|S|M|L|XL|XXL|XXXL|\d{2}(?:\.\d)?(?:\s*CM)?)$', _re.I
|
| 370 |
+
)
|
|
|
|
|
|
|
| 371 |
for p in parts:
|
| 372 |
if size_pat.match(p) and not size:
|
| 373 |
size = p.upper()
|
| 374 |
+
elif not color:
|
| 375 |
+
color = p.upper()
|
| 376 |
+
|
| 377 |
+
# Arama icin onceden hazirlanmis token'lar
|
| 378 |
+
label_norm = _norm(rootlabel)
|
| 379 |
+
tokens = [t for t in _re.findall(r'[a-z0-9+]+', label_norm) if len(t) >= 2]
|
| 380 |
|
| 381 |
return {
|
| 382 |
+
'name': rootlabel,
|
| 383 |
'image': images[0] if images else None,
|
| 384 |
'images': images,
|
| 385 |
+
'link': link,
|
| 386 |
+
'sku': sku,
|
| 387 |
'color': color,
|
| 388 |
'size': size,
|
| 389 |
'is_variant': is_variant,
|
| 390 |
+
'root_sku': root_sku if root_sku and root_sku != '0' else None,
|
| 391 |
+
'_tokens': tokens,
|
| 392 |
+
'_label_norm': label_norm,
|
| 393 |
}
|
| 394 |
|
| 395 |
|
| 396 |
+
def _build_index(xml_text: str) -> dict:
|
| 397 |
+
"""7MB XML'i bir kez parse et, lookup index'leri hazirla."""
|
| 398 |
+
products = []
|
| 399 |
+
by_link = {}
|
| 400 |
+
by_sku = {}
|
| 401 |
+
main_skus = set()
|
| 402 |
+
variants_by_root = {}
|
| 403 |
+
main_links = []
|
| 404 |
+
|
| 405 |
+
items = _ITEM_RE.findall(xml_text)
|
| 406 |
+
for it in items:
|
| 407 |
+
p = _parse_item(it)
|
| 408 |
+
products.append(p)
|
| 409 |
+
if p['link']:
|
| 410 |
+
by_link[p['link']] = p
|
| 411 |
+
if p['sku']:
|
| 412 |
+
by_sku[p['sku']] = p
|
| 413 |
+
if not p['is_variant']:
|
| 414 |
+
if p['sku']:
|
| 415 |
+
main_skus.add(p['sku'])
|
| 416 |
+
if p['link']:
|
| 417 |
+
main_links.append(p['link'])
|
| 418 |
+
else:
|
| 419 |
+
if p['root_sku']:
|
| 420 |
+
variants_by_root.setdefault(p['root_sku'], []).append(p)
|
| 421 |
+
|
| 422 |
+
logger.info(f"[index] parsed {len(products)} items, {len(main_skus)} main, "
|
| 423 |
+
f"{sum(len(v) for v in variants_by_root.values())} variants")
|
| 424 |
+
return {
|
| 425 |
+
"products": products,
|
| 426 |
+
"by_link": by_link,
|
| 427 |
+
"by_sku": by_sku,
|
| 428 |
+
"main_skus": main_skus,
|
| 429 |
+
"variants_by_root": variants_by_root,
|
| 430 |
+
"main_links": main_links,
|
| 431 |
+
}
|
| 432 |
+
|
| 433 |
+
|
| 434 |
+
def _ensure_index() -> dict | None:
|
| 435 |
+
"""Cache'deki XML icin (id() degisirse) index'i guncelle."""
|
| 436 |
try:
|
| 437 |
from smart_warehouse_with_price import get_cached_trek_xml
|
|
|
|
|
|
|
| 438 |
xml = get_cached_trek_xml()
|
| 439 |
if not xml:
|
| 440 |
return None
|
| 441 |
+
xml_id = id(xml)
|
| 442 |
+
if _INDEX["xml_id"] != xml_id:
|
| 443 |
+
text = xml.decode('utf-8', errors='replace') if isinstance(xml, bytes) else str(xml)
|
| 444 |
+
built = _build_index(text)
|
| 445 |
+
_INDEX["xml_id"] = xml_id
|
| 446 |
+
_INDEX["products"] = built["products"]
|
| 447 |
+
_INDEX["by_link"] = built["by_link"]
|
| 448 |
+
_INDEX["by_sku"] = built["by_sku"]
|
| 449 |
+
_INDEX["main_skus"] = built["main_skus"]
|
| 450 |
+
_INDEX["variants_by_root"] = built["variants_by_root"]
|
| 451 |
+
_INDEX["main_links"] = built["main_links"]
|
| 452 |
+
return _INDEX
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 453 |
except Exception:
|
| 454 |
+
logger.exception('_ensure_index hatasi')
|
| 455 |
return None
|
| 456 |
|
| 457 |
|
| 458 |
+
def _public_view(p: dict | None) -> dict | None:
|
| 459 |
+
"""Index'teki internal field'lari (_tokens, _label_norm) cikar — client'a gonderilebilir."""
|
| 460 |
+
if not p:
|
| 461 |
+
return None
|
| 462 |
+
return {k: v for k, v in p.items() if not k.startswith('_')}
|
| 463 |
|
| 464 |
+
|
| 465 |
+
def find_product_by_link(product_link: str) -> dict | None:
|
| 466 |
+
"""O(1) lookup. Varyant ise ana urune cik."""
|
| 467 |
+
idx = _ensure_index()
|
| 468 |
+
if not idx:
|
| 469 |
+
return None
|
| 470 |
+
p = idx["by_link"].get(product_link.strip())
|
| 471 |
+
if not p:
|
| 472 |
+
return None
|
| 473 |
+
if p['is_variant'] and p['root_sku']:
|
| 474 |
+
main = idx["by_sku"].get(p['root_sku'])
|
| 475 |
+
if main and not main['is_variant']:
|
| 476 |
+
return _public_view(main)
|
| 477 |
+
return _public_view(p)
|
| 478 |
+
|
| 479 |
+
|
| 480 |
+
def find_variants_of_product(main_link: str) -> list[dict]:
|
| 481 |
+
"""Bir ana urunun tum varyantlari (hash lookup)."""
|
| 482 |
+
idx = _ensure_index()
|
| 483 |
+
if not idx:
|
| 484 |
+
return []
|
| 485 |
+
main = idx["by_link"].get(main_link.strip())
|
| 486 |
+
if not main or main['is_variant'] or not main['sku']:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 487 |
return []
|
| 488 |
+
return [_public_view(v) for v in idx["variants_by_root"].get(main['sku'], [])]
|
| 489 |
|
| 490 |
|
| 491 |
def extract_product_link_from_result(result: str) -> str | None:
|
|
|
|
| 497 |
return m.group(0) if m else None
|
| 498 |
|
| 499 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 500 |
# Renk eşanlam haritası (TR söylüyorlar, XML'de TR/EN karışık olabilir)
|
| 501 |
_COLOR_SYNONYMS = {
|
| 502 |
'siyah': ['siyah', 'black', 'noir', 'negro'],
|
|
|
|
| 541 |
|
| 542 |
|
| 543 |
def find_product_in_text(text: str, current_main_link: str | None = None) -> dict | None:
|
| 544 |
+
"""Transkriptte gecen urunu/varyanti index uzerinden bul (regex re-parse YOK).
|
| 545 |
+
- Default: ana urunu tercih eder.
|
| 546 |
+
- Variant token bonus'u sadece renk/boy hint'i varsa devreye girer.
|
| 547 |
+
- current_main_link: o ana urun + varyantlari ile scope'u kisitla."""
|
| 548 |
try:
|
| 549 |
+
idx = _ensure_index()
|
| 550 |
+
if not idx:
|
|
|
|
|
|
|
|
|
|
| 551 |
return None
|
|
|
|
| 552 |
text_norm = _norm(text)
|
|
|
|
| 553 |
|
| 554 |
+
# Scope: ya tum ana urunler + varyantlari, ya da sadece context'tekiler
|
|
|
|
| 555 |
if current_main_link:
|
| 556 |
+
main = idx["by_link"].get(current_main_link.strip())
|
| 557 |
+
if main and not main['is_variant'] and main['sku']:
|
| 558 |
+
scoped = [main] + idx["variants_by_root"].get(main['sku'], [])
|
| 559 |
+
else:
|
| 560 |
+
scoped = idx["products"]
|
| 561 |
+
else:
|
| 562 |
+
scoped = idx["products"]
|
| 563 |
+
|
| 564 |
+
# Renk/beden hint'leri
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 565 |
text_colors = _detect_colors_in_text(text_norm)
|
| 566 |
text_sizes = _detect_sizes_in_text(text_norm)
|
| 567 |
has_variant_hint = bool(text_colors or text_sizes)
|
|
|
|
| 569 |
best = None
|
| 570 |
best_score = 0
|
| 571 |
best_is_main = False
|
| 572 |
+
for p in scoped:
|
| 573 |
+
tokens = p.get('_tokens') or []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 574 |
if not tokens:
|
| 575 |
continue
|
| 576 |
match = sum(1 for t in tokens if t in text_norm)
|
| 577 |
threshold = max(2, int(len(tokens) * 0.5))
|
| 578 |
if match < threshold:
|
| 579 |
continue
|
| 580 |
+
is_main = not p['is_variant']
|
|
|
|
|
|
|
|
|
|
| 581 |
bonus = 0
|
| 582 |
if not is_main and has_variant_hint:
|
| 583 |
+
v_color = (p.get('color') or '').lower()
|
| 584 |
+
v_size = (p.get('size') or '').upper()
|
|
|
|
|
|
|
| 585 |
if v_color:
|
| 586 |
for key, syns in _COLOR_SYNONYMS.items():
|
| 587 |
if key in text_colors and any(_norm(s) in _norm(v_color) for s in syns):
|
|
|
|
| 589 |
break
|
| 590 |
if v_size and v_size in text_sizes:
|
| 591 |
bonus += 3
|
|
|
|
| 592 |
score = match + bonus
|
|
|
|
| 593 |
if (score > best_score
|
| 594 |
or (score == best_score and is_main and not best_is_main)):
|
| 595 |
best_score = score
|
| 596 |
+
best = p
|
| 597 |
best_is_main = is_main
|
| 598 |
+
return _public_view(best)
|
|
|
|
|
|
|
|
|
|
| 599 |
except Exception:
|
| 600 |
logger.exception('find_product_in_text hatasi')
|
| 601 |
return None
|