| |
| """Fix missing images across all cached cities using parallel enrichment.""" |
| import sys, os, json, time |
| sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src")) |
| from dotenv import load_dotenv |
| load_dotenv(dotenv_path=os.path.join(os.path.dirname(__file__), "..", ".env"), override=True) |
|
|
| from services.recommender import ( |
| _LLM_CACHE, _IMAGE_CACHE, _save_image_cache, |
| _enrich_with_images, |
| ) |
|
|
| |
| CITIES = ['Paris','London','Rome','Barcelona','New York','Tokyo','Bangkok','Sydney', |
| 'Cape Town','Rio de Janeiro','Istanbul','Dubai','Seoul','Bali','Prague', |
| 'San Francisco','Marrakech','Kyoto'] |
| CATS = ['Landmark','Culture','Nature','Gems','Photo','Food','Shopping'] |
|
|
| def cat_hash(name): |
| d = {c: (c==name) for c in CATS} |
| return json.dumps(d, sort_keys=True) |
|
|
| |
| by_city = {} |
| total_missing = 0 |
| for city in CITIES: |
| city_items = [] |
| for cat in CATS: |
| key = (city, cat_hash(cat)) |
| items = _LLM_CACHE.get(key, []) |
| if items: |
| for item in items: |
| if not item.get("image_url"): |
| city_items.append(item) |
| if city_items: |
| by_city[city] = city_items |
| total_missing += len(city_items) |
| print(f'{city}: {len(city_items)} items missing images') |
|
|
| print(f'\nTotal items missing images: {total_missing}') |
|
|
| |
| import concurrent.futures |
| with concurrent.futures.ThreadPoolExecutor(max_workers=4) as pool: |
| futures = {} |
| for city, items in by_city.items(): |
| f = pool.submit(_enrich_with_images, items, city=city) |
| futures[f] = city |
| |
| for f in concurrent.futures.as_completed(futures): |
| city = futures[f] |
| try: |
| result = f.result() |
| fixed = sum(1 for it in result if it.get("image_url")) |
| print(f' {city}: fixed {fixed}/{len(by_city[city])} remaining') |
| except Exception as e: |
| print(f' {city}: error - {e}') |
|
|
| _save_image_cache() |
|
|
| |
| still_missing = sum(1 for v in _LLM_CACHE.values() if v for it in v if not it.get("image_url")) |
| print(f'\nStill missing after fix: {still_missing} (from {total_missing})') |
| print(f'Image cache entries: {len(_IMAGE_CACHE)}') |
|
|