File size: 3,966 Bytes
4668bf6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#!/usr/bin/env python3
"""
Pre-warm caches for popular travel cities — all categories.

Iterates top cities × all 7 category presets, populating all 3 disk caches:
    - .llm_cache.json   → instant repeat lookups (city + category)
    - .image_cache.json → instant image loads
    - .geocode_cache.json → instant geocoding

Cache keys are (city, categories_hash) so any num_attractions value hits the cache.

Usage:
    cd roamify && python scripts/prewarm_cache.py
"""

import os
import sys
import time
import json

sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src"))

from dotenv import load_dotenv
load_dotenv(dotenv_path=os.path.join(os.path.dirname(__file__), "..", ".env"), override=True)

from services.recommender import (
    get_recommendations_cached,
    _LLM_CACHE,
    _IMAGE_CACHE,
    _GEOCODE_CACHE,
    _save_llm_cache,
    _save_image_cache,
    _save_geocode_cache,
)

# Must match the category names in streamlit_app.py exactly
CATEGORY_NAMES = ["Landmark", "Culture", "Nature", "Gems", "Photo", "Food", "Shopping"]

# Top 5 cities — covers the most-searched destinations for a smooth HF demo
TOP_CITIES = [
    "Paris",
    "London",
    "Rome",
    "Barcelona",
    "New York",
]


def prewarm():
    total_combos = len(TOP_CITIES) * len(CATEGORY_NAMES)
    success = 0
    skipped = 0
    fail = 0

    llm_before = len(_LLM_CACHE)
    image_before = len(_IMAGE_CACHE)
    geo_before = len(_GEOCODE_CACHE)

    print(f"Pre-warming caches: {len(TOP_CITIES)} cities × {len(CATEGORY_NAMES)} categories = {total_combos} combos")
    print(f"  Existing: LLM={llm_before} | Images={image_before} | Geocode={geo_before}")
    print()

    combo_idx = 0
    for city in TOP_CITIES:
        for cat_name in CATEGORY_NAMES:
            combo_idx += 1
            # Build categories dict matching the UI format exactly
            categories = {name: (name == cat_name) for name in CATEGORY_NAMES}
            cat_hash = json.dumps(categories, sort_keys=True)

            # Check if already cached
            if (city, cat_hash) in _LLM_CACHE:
                print(f"  [{combo_idx:>2}/{total_combos}] ⏭️  {city} / {cat_name} — already cached")
                skipped += 1
                continue

            print(f"  [{combo_idx:>2}/{total_combos}] 🔍 {city} / {cat_name}...", end=" ", flush=True)
            start = time.time()
            try:
                result = get_recommendations_cached(
                    city=city,
                    num_attractions=6,  # UI default; cache key ignores this
                    categories=categories,
                    temperature=0,  # deterministic, cached
                )
                elapsed = time.time() - start
                if result:
                    items = len(result)
                    print(f"✅ {items} items in {elapsed:.1f}s")
                    success += 1
                else:
                    print(f"❌ returned None in {elapsed:.1f}s")
                    fail += 1
            except Exception as e:
                elapsed = time.time() - start
                print(f"❌ error after {elapsed:.1f}s: {e}")
                fail += 1

            # Brief pause for Nominatim rate limit
            time.sleep(1.5)

    # Summary
    llm_new = len(_LLM_CACHE) - llm_before
    image_new = len(_IMAGE_CACHE) - image_before
    geo_new = len(_GEOCODE_CACHE) - geo_before

    print()
    print("═" * 55)
    print("Pre-warm complete!")
    print(f"  Combos: {success} succeeded, {skipped} skipped, {fail} failed")
    print(f"  New cache entries: LLM={llm_new}, Images={image_new}, Geocode={geo_new}")
    print(f"  Total entries: LLM={len(_LLM_CACHE)}, Images={len(_IMAGE_CACHE)}, Geocode={len(_GEOCODE_CACHE)}")

    # Force save all caches
    _save_llm_cache()
    _save_image_cache()
    _save_geocode_cache()
    print()
    print("All caches saved to disk ✅")


if __name__ == "__main__":
    prewarm()