File size: 2,362 Bytes
83adb51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/env python3
"""Fix missing images across all cached cities using parallel enrichment."""
import sys, os, json, time
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src"))
from dotenv import load_dotenv
load_dotenv(dotenv_path=os.path.join(os.path.dirname(__file__), "..", ".env"), override=True)

from services.recommender import (
    _LLM_CACHE, _IMAGE_CACHE, _save_image_cache,
    _enrich_with_images,
)

# Collect all items that have no image_url
CITIES = ['Paris','London','Rome','Barcelona','New York','Tokyo','Bangkok','Sydney',
          'Cape Town','Rio de Janeiro','Istanbul','Dubai','Seoul','Bali','Prague',
          'San Francisco','Marrakech','Kyoto']
CATS = ['Landmark','Culture','Nature','Gems','Photo','Food','Shopping']

def cat_hash(name):
    d = {c: (c==name) for c in CATS}
    return json.dumps(d, sort_keys=True)

# Group missing-image items by city for parallel enrichment
by_city = {}
total_missing = 0
for city in CITIES:
    city_items = []
    for cat in CATS:
        key = (city, cat_hash(cat))
        items = _LLM_CACHE.get(key, [])
        if items:
            for item in items:
                if not item.get("image_url"):
                    city_items.append(item)
    if city_items:
        by_city[city] = city_items
        total_missing += len(city_items)
        print(f'{city}: {len(city_items)} items missing images')

print(f'\nTotal items missing images: {total_missing}')

# Enrich each city's items in parallel (6 workers per batch)
import concurrent.futures
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as pool:
    futures = {}
    for city, items in by_city.items():
        f = pool.submit(_enrich_with_images, items, city=city)
        futures[f] = city
    
    for f in concurrent.futures.as_completed(futures):
        city = futures[f]
        try:
            result = f.result()
            fixed = sum(1 for it in result if it.get("image_url"))
            print(f'  {city}: fixed {fixed}/{len(by_city[city])} remaining')
        except Exception as e:
            print(f'  {city}: error - {e}')

_save_image_cache()

# Final tally
still_missing = sum(1 for v in _LLM_CACHE.values() if v for it in v if not it.get("image_url"))
print(f'\nStill missing after fix: {still_missing} (from {total_missing})')
print(f'Image cache entries: {len(_IMAGE_CACHE)}')