File size: 3,966 Bytes
4668bf6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 | #!/usr/bin/env python3
"""
Pre-warm caches for popular travel cities — all categories.
Iterates top cities × all 7 category presets, populating all 3 disk caches:
- .llm_cache.json → instant repeat lookups (city + category)
- .image_cache.json → instant image loads
- .geocode_cache.json → instant geocoding
Cache keys are (city, categories_hash) so any num_attractions value hits the cache.
Usage:
cd roamify && python scripts/prewarm_cache.py
"""
import os
import sys
import time
import json
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src"))
from dotenv import load_dotenv
load_dotenv(dotenv_path=os.path.join(os.path.dirname(__file__), "..", ".env"), override=True)
from services.recommender import (
get_recommendations_cached,
_LLM_CACHE,
_IMAGE_CACHE,
_GEOCODE_CACHE,
_save_llm_cache,
_save_image_cache,
_save_geocode_cache,
)
# Must match the category names in streamlit_app.py exactly
CATEGORY_NAMES = ["Landmark", "Culture", "Nature", "Gems", "Photo", "Food", "Shopping"]
# Top 5 cities — covers the most-searched destinations for a smooth HF demo
TOP_CITIES = [
"Paris",
"London",
"Rome",
"Barcelona",
"New York",
]
def prewarm():
total_combos = len(TOP_CITIES) * len(CATEGORY_NAMES)
success = 0
skipped = 0
fail = 0
llm_before = len(_LLM_CACHE)
image_before = len(_IMAGE_CACHE)
geo_before = len(_GEOCODE_CACHE)
print(f"Pre-warming caches: {len(TOP_CITIES)} cities × {len(CATEGORY_NAMES)} categories = {total_combos} combos")
print(f" Existing: LLM={llm_before} | Images={image_before} | Geocode={geo_before}")
print()
combo_idx = 0
for city in TOP_CITIES:
for cat_name in CATEGORY_NAMES:
combo_idx += 1
# Build categories dict matching the UI format exactly
categories = {name: (name == cat_name) for name in CATEGORY_NAMES}
cat_hash = json.dumps(categories, sort_keys=True)
# Check if already cached
if (city, cat_hash) in _LLM_CACHE:
print(f" [{combo_idx:>2}/{total_combos}] ⏭️ {city} / {cat_name} — already cached")
skipped += 1
continue
print(f" [{combo_idx:>2}/{total_combos}] 🔍 {city} / {cat_name}...", end=" ", flush=True)
start = time.time()
try:
result = get_recommendations_cached(
city=city,
num_attractions=6, # UI default; cache key ignores this
categories=categories,
temperature=0, # deterministic, cached
)
elapsed = time.time() - start
if result:
items = len(result)
print(f"✅ {items} items in {elapsed:.1f}s")
success += 1
else:
print(f"❌ returned None in {elapsed:.1f}s")
fail += 1
except Exception as e:
elapsed = time.time() - start
print(f"❌ error after {elapsed:.1f}s: {e}")
fail += 1
# Brief pause for Nominatim rate limit
time.sleep(1.5)
# Summary
llm_new = len(_LLM_CACHE) - llm_before
image_new = len(_IMAGE_CACHE) - image_before
geo_new = len(_GEOCODE_CACHE) - geo_before
print()
print("═" * 55)
print("Pre-warm complete!")
print(f" Combos: {success} succeeded, {skipped} skipped, {fail} failed")
print(f" New cache entries: LLM={llm_new}, Images={image_new}, Geocode={geo_new}")
print(f" Total entries: LLM={len(_LLM_CACHE)}, Images={len(_IMAGE_CACHE)}, Geocode={len(_GEOCODE_CACHE)}")
# Force save all caches
_save_llm_cache()
_save_image_cache()
_save_geocode_cache()
print()
print("All caches saved to disk ✅")
if __name__ == "__main__":
prewarm()
|