SamiKoen commited on
Commit
245c27f
·
1 Parent(s): e9857a9

Parse XML once into hash index; build in background thread (no event loop blocking)

Browse files
Files changed (1) hide show
  1. app.py +205 -158
app.py CHANGED
@@ -26,6 +26,19 @@ app = FastAPI(title="BF-Realtime")
26
  app.mount("/static", StaticFiles(directory="static"), name="static")
27
 
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  @app.get("/")
30
  async def root():
31
  return FileResponse("static/index.html")
@@ -271,129 +284,208 @@ async def handle_tool_call(name: str, arguments: dict) -> str:
271
  return f"Hata: {e}"
272
 
273
 
274
- def _item_to_product(it: str) -> dict:
275
- """XML item bloku -> {name, image, images[], link, color, size, sku} dict."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
  import re
277
- label_m = re.search(r'<rootlabel><!\[CDATA\[(.*?)\]\]></rootlabel>', it)
278
- var_label_m = re.search(r'<label><!\[CDATA\[(.*?)\]\]></label>', it)
279
- link_m = re.search(r'<productLink><!\[CDATA\[(.*?)\]\]></productLink>', it)
280
- sku_m = re.search(r'<stockCode><!\[CDATA\[(.*?)\]\]></stockCode>', it)
281
- iv_m = re.search(r'<isOptionOfAProduct>(\d+)</isOptionOfAProduct>', it)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
282
 
283
- # Galeri: picture1Path .. picture8Path
284
  images = []
285
  for i in range(1, 9):
286
- m = re.search(rf'<picture{i}Path><!\[CDATA\[(.*?)\]\]></picture{i}Path>', it)
287
  if m and m.group(1).strip():
288
  images.append(m.group(1).strip())
289
 
290
- # Varyant ise label'dan color/size parse et: "XS - YEŞİL", "M - SİYAH 56", "RED 56" vb.
291
  color = None
292
  size = None
293
- is_variant = bool(iv_m and iv_m.group(1) == '1')
294
- if is_variant and var_label_m:
295
- vlabel = var_label_m.group(1).strip()
296
- # Format: "BOY - RENK" veya "RENK - BOY" veya tek alan
297
- parts = [p.strip() for p in re.split(r'\s*[-/]\s*', vlabel) if p.strip()]
298
- # Boy: XS/S/M/L/XL/XXL veya 50/52/54/56/58 cm benzeri sayılar
299
- size_pat = re.compile(r'^(?:XX?S|XS|S|M|L|XL|XXL|XXXL|\d{2}(?:\.\d)?(?:\s*CM)?)$', re.I)
300
  for p in parts:
301
  if size_pat.match(p) and not size:
302
  size = p.upper()
303
- else:
304
- # color olabilir
305
- if not color:
306
- color = p.upper()
 
 
307
 
308
  return {
309
- 'name': label_m.group(1) if label_m else '',
310
  'image': images[0] if images else None,
311
  'images': images,
312
- 'link': link_m.group(1) if link_m else None,
313
- 'sku': sku_m.group(1) if sku_m else None,
314
  'color': color,
315
  'size': size,
316
  'is_variant': is_variant,
 
 
 
317
  }
318
 
319
 
320
- def find_product_by_link(product_link: str) -> dict | None:
321
- """productLink ile eslesen urunu bul. Varyant ise ana urune cik (rootProductStockCode)."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
322
  try:
323
  from smart_warehouse_with_price import get_cached_trek_xml
324
- import re
325
-
326
  xml = get_cached_trek_xml()
327
  if not xml:
328
  return None
329
- text = xml.decode('utf-8', errors='replace') if isinstance(xml, bytes) else str(xml)
330
- items = re.findall(r'<item>(.*?)</item>', text, re.DOTALL)
331
-
332
- matched = None
333
- for it in items:
334
- link_m = re.search(r'<productLink><!\[CDATA\[(.*?)\]\]></productLink>', it)
335
- if link_m and link_m.group(1).strip() == product_link.strip():
336
- matched = it
337
- break
338
- if matched is None:
339
- return None
340
-
341
- # Varyant mi? -> ana urune cik
342
- is_variant_m = re.search(r'<isOptionOfAProduct>(\d+)</isOptionOfAProduct>', matched)
343
- if is_variant_m and is_variant_m.group(1) == '1':
344
- root_m = re.search(r'<rootProductStockCode><!\[CDATA\[(.*?)\]\]></rootProductStockCode>', matched)
345
- if root_m and root_m.group(1).strip() and root_m.group(1).strip() != '0':
346
- root_code = root_m.group(1).strip()
347
- for it in items:
348
- sc_m = re.search(r'<stockCode><!\[CDATA\[(.*?)\]\]></stockCode>', it)
349
- iv_m = re.search(r'<isOptionOfAProduct>(\d+)</isOptionOfAProduct>', it)
350
- if (sc_m and sc_m.group(1).strip() == root_code
351
- and iv_m and iv_m.group(1) == '0'):
352
- return _item_to_product(it)
353
-
354
- return _item_to_product(matched)
355
  except Exception:
356
- logger.exception('find_product_by_link hatasi')
357
  return None
358
 
359
 
360
- def find_variants_of_product(main_link: str) -> list[dict]:
361
- """Verilen ana urunun tum varyantlarini dondur (renk, boy vb. icin)."""
362
- try:
363
- from smart_warehouse_with_price import get_cached_trek_xml
364
- import re
365
 
366
- xml = get_cached_trek_xml()
367
- if not xml:
368
- return []
369
- text = xml.decode('utf-8', errors='replace') if isinstance(xml, bytes) else str(xml)
370
- items = re.findall(r'<item>(.*?)</item>', text, re.DOTALL)
371
-
372
- # Once main'i bul, stockCode'unu al
373
- main_code = None
374
- for it in items:
375
- link_m = re.search(r'<productLink><!\[CDATA\[(.*?)\]\]></productLink>', it)
376
- iv_m = re.search(r'<isOptionOfAProduct>(\d+)</isOptionOfAProduct>', it)
377
- if (link_m and link_m.group(1).strip() == main_link.strip()
378
- and iv_m and iv_m.group(1) == '0'):
379
- sc_m = re.search(r'<stockCode><!\[CDATA\[(.*?)\]\]></stockCode>', it)
380
- if sc_m:
381
- main_code = sc_m.group(1).strip()
382
- break
383
- if not main_code:
384
- return []
385
-
386
- variants = []
387
- for it in items:
388
- iv_m = re.search(r'<isOptionOfAProduct>(\d+)</isOptionOfAProduct>', it)
389
- root_m = re.search(r'<rootProductStockCode><!\[CDATA\[(.*?)\]\]></rootProductStockCode>', it)
390
- if (iv_m and iv_m.group(1) == '1'
391
- and root_m and root_m.group(1).strip() == main_code):
392
- variants.append(_item_to_product(it))
393
- return variants
394
- except Exception:
395
- logger.exception('find_variants_of_product hatasi')
396
  return []
 
397
 
398
 
399
  def extract_product_link_from_result(result: str) -> str | None:
@@ -405,16 +497,6 @@ def extract_product_link_from_result(result: str) -> str | None:
405
  return m.group(0) if m else None
406
 
407
 
408
- _TR_MAP = {'İ': 'i', 'I': 'i', 'ı': 'i', 'Ğ': 'g', 'ğ': 'g',
409
- 'Ü': 'u', 'ü': 'u', 'Ş': 's', 'ş': 's',
410
- 'Ö': 'o', 'ö': 'o', 'Ç': 'c', 'ç': 'c'}
411
-
412
- def _norm(s: str) -> str:
413
- for tr, en in _TR_MAP.items():
414
- s = s.replace(tr, en)
415
- return s.lower()
416
-
417
-
418
  # Renk eşanlam haritası (TR söylüyorlar, XML'de TR/EN karışık olabilir)
419
  _COLOR_SYNONYMS = {
420
  'siyah': ['siyah', 'black', 'noir', 'negro'],
@@ -459,47 +541,27 @@ def _detect_sizes_in_text(text_norm: str) -> set[str]:
459
 
460
 
461
  def find_product_in_text(text: str, current_main_link: str | None = None) -> dict | None:
462
- """Transkriptte gecen urunu/varyanti bul.
463
- - Default: ana urunu tercih eder (skor esitse main).
464
- - Renk/boy gibi ek tokenlar varyantta eslesirse varyant skor olarak ana urunu gecer ve varyant gosterilir.
465
- - current_main_link verilmisse, sadece o ana urunun varyantlarina ve kendisine bakilir (context-aware)."""
466
  try:
467
- from smart_warehouse_with_price import get_cached_trek_xml
468
- import re
469
-
470
- xml = get_cached_trek_xml()
471
- if not xml:
472
  return None
473
- xml_text = xml.decode('utf-8', errors='replace') if isinstance(xml, bytes) else str(xml)
474
  text_norm = _norm(text)
475
- items = re.findall(r'<item>(.*?)</item>', xml_text, re.DOTALL)
476
 
477
- # Eger context (mevcut ana urun) varsa, scope'u o ana urun + varyantlariyla sinirla
478
- scoped_items = items
479
  if current_main_link:
480
- main_code = None
481
- for it in items:
482
- link_m = re.search(r'<productLink><!\[CDATA\[(.*?)\]\]></productLink>', it)
483
- iv_m = re.search(r'<isOptionOfAProduct>(\d+)</isOptionOfAProduct>', it)
484
- if (link_m and link_m.group(1).strip() == current_main_link.strip()
485
- and iv_m and iv_m.group(1) == '0'):
486
- sc_m = re.search(r'<stockCode><!\[CDATA\[(.*?)\]\]></stockCode>', it)
487
- if sc_m:
488
- main_code = sc_m.group(1).strip()
489
- break
490
- if main_code:
491
- scoped = []
492
- for it in items:
493
- link_m = re.search(r'<productLink><!\[CDATA\[(.*?)\]\]></productLink>', it)
494
- root_m = re.search(r'<rootProductStockCode><!\[CDATA\[(.*?)\]\]></rootProductStockCode>', it)
495
- is_self = link_m and link_m.group(1).strip() == current_main_link.strip()
496
- is_variant_of = root_m and root_m.group(1).strip() == main_code
497
- if is_self or is_variant_of:
498
- scoped.append(it)
499
- if scoped:
500
- scoped_items = scoped
501
-
502
- # Metinde gecen renk/beden hint'leri
503
  text_colors = _detect_colors_in_text(text_norm)
504
  text_sizes = _detect_sizes_in_text(text_norm)
505
  has_variant_hint = bool(text_colors or text_sizes)
@@ -507,29 +569,19 @@ def find_product_in_text(text: str, current_main_link: str | None = None) -> dic
507
  best = None
508
  best_score = 0
509
  best_is_main = False
510
- for it in scoped_items:
511
- label_m = re.search(r'<rootlabel><!\[CDATA\[(.*?)\]\]></rootlabel>', it)
512
- if not label_m:
513
- continue
514
- label = label_m.group(1)
515
- label_norm = _norm(label)
516
- tokens = [t for t in re.findall(r'[a-z0-9+]+', label_norm) if len(t) >= 2]
517
  if not tokens:
518
  continue
519
  match = sum(1 for t in tokens if t in text_norm)
520
  threshold = max(2, int(len(tokens) * 0.5))
521
  if match < threshold:
522
  continue
523
- iv_m = re.search(r'<isOptionOfAProduct>(\d+)</isOptionOfAProduct>', it)
524
- is_main = not (iv_m and iv_m.group(1) == '1')
525
-
526
- # Renk/beden bonus: variant ise label'inden parse edip metinle eslestir
527
  bonus = 0
528
  if not is_main and has_variant_hint:
529
- product = _item_to_product(it)
530
- v_color = (product.get('color') or '').lower()
531
- v_size = (product.get('size') or '').upper()
532
- # Renk eslesmesi: variant rengi metindeki rekleri sinonim'le karsilastir
533
  if v_color:
534
  for key, syns in _COLOR_SYNONYMS.items():
535
  if key in text_colors and any(_norm(s) in _norm(v_color) for s in syns):
@@ -537,18 +589,13 @@ def find_product_in_text(text: str, current_main_link: str | None = None) -> dic
537
  break
538
  if v_size and v_size in text_sizes:
539
  bonus += 3
540
-
541
  score = match + bonus
542
- # Daha yuksek skor kazanir; esitlikte ana urun tercih edilir
543
  if (score > best_score
544
  or (score == best_score and is_main and not best_is_main)):
545
  best_score = score
546
- best = it
547
  best_is_main = is_main
548
-
549
- if not best:
550
- return None
551
- return _item_to_product(best)
552
  except Exception:
553
  logger.exception('find_product_in_text hatasi')
554
  return None
 
26
  app.mount("/static", StaticFiles(directory="static"), name="static")
27
 
28
 
29
+ @app.on_event("startup")
30
+ async def _startup_warmup():
31
+ """XML cache + parse index'i thread'de baslat — event loop'u bloklama."""
32
+ async def loop():
33
+ while True:
34
+ try:
35
+ await asyncio.to_thread(_ensure_index)
36
+ except Exception:
37
+ logger.exception('xml/index refresh hatasi')
38
+ await asyncio.sleep(1800) # 30 dakika
39
+ asyncio.create_task(loop())
40
+
41
+
42
  @app.get("/")
43
  async def root():
44
  return FileResponse("static/index.html")
 
284
  return f"Hata: {e}"
285
 
286
 
287
+ _TR_MAP = {'İ': 'i', 'I': 'i', 'ı': 'i', 'Ğ': 'g', 'ğ': 'g',
288
+ 'Ü': 'u', 'ü': 'u', 'Ş': 's', 'ş': 's',
289
+ 'Ö': 'o', 'ö': 'o', 'Ç': 'c', 'ç': 'c'}
290
+
291
+ def _norm(s: str) -> str:
292
+ if not s:
293
+ return ''
294
+ for tr, en in _TR_MAP.items():
295
+ s = s.replace(tr, en)
296
+ return s.lower()
297
+
298
+
299
+ # Parse-once index — XML her degistiginde yeniden build edilir.
300
+ # Realtime aramalarda 7MB regex re-parse yapilmaz, hash lookup'lar kullanilir.
301
+ _INDEX = {
302
+ "xml_id": None, # ham XML'in id'si — degisirse yeniden parse
303
+ "products": [], # list of parsed product dicts
304
+ "by_link": {}, # productLink -> product
305
+ "by_sku": {}, # stockCode -> product
306
+ "main_skus": set(), # ana urunlerin stockCode set'i
307
+ "variants_by_root": {}, # root_sku -> [variant products]
308
+ "main_links": [], # sadece ana urunlerin linkleri (hizli iterasyon)
309
+ }
310
+
311
+ _VARIANT_LABEL_PARTS_RE = None
312
+ _ITEM_RE = None
313
+ _FIELD_RES = None
314
+
315
+
316
+ def _compile_regexes():
317
+ """Modul ilk yuklemede regex'leri compile et."""
318
+ global _VARIANT_LABEL_PARTS_RE, _ITEM_RE, _FIELD_RES
319
+ if _ITEM_RE is not None:
320
+ return
321
  import re
322
+ _ITEM_RE = re.compile(r'<item>(.*?)</item>', re.DOTALL)
323
+ _VARIANT_LABEL_PARTS_RE = re.compile(r'\s*[-/]\s*')
324
+ _FIELD_RES = {
325
+ 'rootlabel': re.compile(r'<rootlabel><!\[CDATA\[(.*?)\]\]></rootlabel>'),
326
+ 'label': re.compile(r'<label><!\[CDATA\[(.*?)\]\]></label>'),
327
+ 'productLink': re.compile(r'<productLink><!\[CDATA\[(.*?)\]\]></productLink>'),
328
+ 'stockCode': re.compile(r'<stockCode><!\[CDATA\[(.*?)\]\]></stockCode>'),
329
+ 'isOptionOfAProduct': re.compile(r'<isOptionOfAProduct>(\d+)</isOptionOfAProduct>'),
330
+ 'rootProductStockCode': re.compile(r'<rootProductStockCode><!\[CDATA\[(.*?)\]\]></rootProductStockCode>'),
331
+ }
332
+ # picture1Path - picture8Path
333
+ for i in range(1, 9):
334
+ _FIELD_RES[f'picture{i}'] = re.compile(
335
+ rf'<picture{i}Path><!\[CDATA\[(.*?)\]\]></picture{i}Path>'
336
+ )
337
+
338
+ _compile_regexes()
339
+
340
+
341
+ def _parse_item(it: str) -> dict:
342
+ """Tek bir XML item'ini dict'e cevir + arama icin token cache'i hazirla."""
343
+ import re as _re
344
+ fr = _FIELD_RES
345
+
346
+ def grab(name):
347
+ m = fr[name].search(it)
348
+ return m.group(1).strip() if m else ''
349
+
350
+ rootlabel = grab('rootlabel')
351
+ var_label = grab('label')
352
+ link = grab('productLink')
353
+ sku = grab('stockCode')
354
+ iv_m = fr['isOptionOfAProduct'].search(it)
355
+ is_variant = bool(iv_m and iv_m.group(1) == '1')
356
+ root_sku = grab('rootProductStockCode')
357
 
 
358
  images = []
359
  for i in range(1, 9):
360
+ m = fr[f'picture{i}'].search(it)
361
  if m and m.group(1).strip():
362
  images.append(m.group(1).strip())
363
 
 
364
  color = None
365
  size = None
366
+ if is_variant and var_label:
367
+ parts = [p.strip() for p in _VARIANT_LABEL_PARTS_RE.split(var_label) if p.strip()]
368
+ size_pat = _re.compile(
369
+ r'^(?:XX?S|XS|S|M|L|XL|XXL|XXXL|\d{2}(?:\.\d)?(?:\s*CM)?)$', _re.I
370
+ )
 
 
371
  for p in parts:
372
  if size_pat.match(p) and not size:
373
  size = p.upper()
374
+ elif not color:
375
+ color = p.upper()
376
+
377
+ # Arama icin onceden hazirlanmis token'lar
378
+ label_norm = _norm(rootlabel)
379
+ tokens = [t for t in _re.findall(r'[a-z0-9+]+', label_norm) if len(t) >= 2]
380
 
381
  return {
382
+ 'name': rootlabel,
383
  'image': images[0] if images else None,
384
  'images': images,
385
+ 'link': link,
386
+ 'sku': sku,
387
  'color': color,
388
  'size': size,
389
  'is_variant': is_variant,
390
+ 'root_sku': root_sku if root_sku and root_sku != '0' else None,
391
+ '_tokens': tokens,
392
+ '_label_norm': label_norm,
393
  }
394
 
395
 
396
+ def _build_index(xml_text: str) -> dict:
397
+ """7MB XML'i bir kez parse et, lookup index'leri hazirla."""
398
+ products = []
399
+ by_link = {}
400
+ by_sku = {}
401
+ main_skus = set()
402
+ variants_by_root = {}
403
+ main_links = []
404
+
405
+ items = _ITEM_RE.findall(xml_text)
406
+ for it in items:
407
+ p = _parse_item(it)
408
+ products.append(p)
409
+ if p['link']:
410
+ by_link[p['link']] = p
411
+ if p['sku']:
412
+ by_sku[p['sku']] = p
413
+ if not p['is_variant']:
414
+ if p['sku']:
415
+ main_skus.add(p['sku'])
416
+ if p['link']:
417
+ main_links.append(p['link'])
418
+ else:
419
+ if p['root_sku']:
420
+ variants_by_root.setdefault(p['root_sku'], []).append(p)
421
+
422
+ logger.info(f"[index] parsed {len(products)} items, {len(main_skus)} main, "
423
+ f"{sum(len(v) for v in variants_by_root.values())} variants")
424
+ return {
425
+ "products": products,
426
+ "by_link": by_link,
427
+ "by_sku": by_sku,
428
+ "main_skus": main_skus,
429
+ "variants_by_root": variants_by_root,
430
+ "main_links": main_links,
431
+ }
432
+
433
+
434
+ def _ensure_index() -> dict | None:
435
+ """Cache'deki XML icin (id() degisirse) index'i guncelle."""
436
  try:
437
  from smart_warehouse_with_price import get_cached_trek_xml
 
 
438
  xml = get_cached_trek_xml()
439
  if not xml:
440
  return None
441
+ xml_id = id(xml)
442
+ if _INDEX["xml_id"] != xml_id:
443
+ text = xml.decode('utf-8', errors='replace') if isinstance(xml, bytes) else str(xml)
444
+ built = _build_index(text)
445
+ _INDEX["xml_id"] = xml_id
446
+ _INDEX["products"] = built["products"]
447
+ _INDEX["by_link"] = built["by_link"]
448
+ _INDEX["by_sku"] = built["by_sku"]
449
+ _INDEX["main_skus"] = built["main_skus"]
450
+ _INDEX["variants_by_root"] = built["variants_by_root"]
451
+ _INDEX["main_links"] = built["main_links"]
452
+ return _INDEX
 
 
 
 
 
 
 
 
 
 
 
 
 
 
453
  except Exception:
454
+ logger.exception('_ensure_index hatasi')
455
  return None
456
 
457
 
458
+ def _public_view(p: dict | None) -> dict | None:
459
+ """Index'teki internal field'lari (_tokens, _label_norm) cikar client'a gonderilebilir."""
460
+ if not p:
461
+ return None
462
+ return {k: v for k, v in p.items() if not k.startswith('_')}
463
 
464
+
465
+ def find_product_by_link(product_link: str) -> dict | None:
466
+ """O(1) lookup. Varyant ise ana urune cik."""
467
+ idx = _ensure_index()
468
+ if not idx:
469
+ return None
470
+ p = idx["by_link"].get(product_link.strip())
471
+ if not p:
472
+ return None
473
+ if p['is_variant'] and p['root_sku']:
474
+ main = idx["by_sku"].get(p['root_sku'])
475
+ if main and not main['is_variant']:
476
+ return _public_view(main)
477
+ return _public_view(p)
478
+
479
+
480
+ def find_variants_of_product(main_link: str) -> list[dict]:
481
+ """Bir ana urunun tum varyantlari (hash lookup)."""
482
+ idx = _ensure_index()
483
+ if not idx:
484
+ return []
485
+ main = idx["by_link"].get(main_link.strip())
486
+ if not main or main['is_variant'] or not main['sku']:
 
 
 
 
 
 
 
487
  return []
488
+ return [_public_view(v) for v in idx["variants_by_root"].get(main['sku'], [])]
489
 
490
 
491
  def extract_product_link_from_result(result: str) -> str | None:
 
497
  return m.group(0) if m else None
498
 
499
 
 
 
 
 
 
 
 
 
 
 
500
  # Renk eşanlam haritası (TR söylüyorlar, XML'de TR/EN karışık olabilir)
501
  _COLOR_SYNONYMS = {
502
  'siyah': ['siyah', 'black', 'noir', 'negro'],
 
541
 
542
 
543
  def find_product_in_text(text: str, current_main_link: str | None = None) -> dict | None:
544
+ """Transkriptte gecen urunu/varyanti index uzerinden bul (regex re-parse YOK).
545
+ - Default: ana urunu tercih eder.
546
+ - Variant token bonus'u sadece renk/boy hint'i varsa devreye girer.
547
+ - current_main_link: o ana urun + varyantlari ile scope'u kisitla."""
548
  try:
549
+ idx = _ensure_index()
550
+ if not idx:
 
 
 
551
  return None
 
552
  text_norm = _norm(text)
 
553
 
554
+ # Scope: ya tum ana urunler + varyantlari, ya da sadece context'tekiler
 
555
  if current_main_link:
556
+ main = idx["by_link"].get(current_main_link.strip())
557
+ if main and not main['is_variant'] and main['sku']:
558
+ scoped = [main] + idx["variants_by_root"].get(main['sku'], [])
559
+ else:
560
+ scoped = idx["products"]
561
+ else:
562
+ scoped = idx["products"]
563
+
564
+ # Renk/beden hint'leri
 
 
 
 
 
 
 
 
 
 
 
 
 
 
565
  text_colors = _detect_colors_in_text(text_norm)
566
  text_sizes = _detect_sizes_in_text(text_norm)
567
  has_variant_hint = bool(text_colors or text_sizes)
 
569
  best = None
570
  best_score = 0
571
  best_is_main = False
572
+ for p in scoped:
573
+ tokens = p.get('_tokens') or []
 
 
 
 
 
574
  if not tokens:
575
  continue
576
  match = sum(1 for t in tokens if t in text_norm)
577
  threshold = max(2, int(len(tokens) * 0.5))
578
  if match < threshold:
579
  continue
580
+ is_main = not p['is_variant']
 
 
 
581
  bonus = 0
582
  if not is_main and has_variant_hint:
583
+ v_color = (p.get('color') or '').lower()
584
+ v_size = (p.get('size') or '').upper()
 
 
585
  if v_color:
586
  for key, syns in _COLOR_SYNONYMS.items():
587
  if key in text_colors and any(_norm(s) in _norm(v_color) for s in syns):
 
589
  break
590
  if v_size and v_size in text_sizes:
591
  bonus += 3
 
592
  score = match + bonus
 
593
  if (score > best_score
594
  or (score == best_score and is_main and not best_is_main)):
595
  best_score = score
596
+ best = p
597
  best_is_main = is_main
598
+ return _public_view(best)
 
 
 
599
  except Exception:
600
  logger.exception('find_product_in_text hatasi')
601
  return None