#!/usr/bin/env python3 """ Pre-warm caches for popular travel cities — all categories. Iterates top cities × all 7 category presets, populating all 3 disk caches: - .llm_cache.json → instant repeat lookups (city + category) - .image_cache.json → instant image loads - .geocode_cache.json → instant geocoding Cache keys are (city, categories_hash) so any num_attractions value hits the cache. Usage: cd roamify && python scripts/prewarm_cache.py """ import os import sys import time import json sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src")) from dotenv import load_dotenv load_dotenv(dotenv_path=os.path.join(os.path.dirname(__file__), "..", ".env"), override=True) from services.recommender import ( get_recommendations_cached, _LLM_CACHE, _IMAGE_CACHE, _GEOCODE_CACHE, _save_llm_cache, _save_image_cache, _save_geocode_cache, ) # Must match the category names in streamlit_app.py exactly CATEGORY_NAMES = ["Landmark", "Culture", "Nature", "Gems", "Photo", "Food", "Shopping"] # Top 5 cities — covers the most-searched destinations for a smooth HF demo TOP_CITIES = [ "Paris", "London", "Rome", "Barcelona", "New York", ] def prewarm(): total_combos = len(TOP_CITIES) * len(CATEGORY_NAMES) success = 0 skipped = 0 fail = 0 llm_before = len(_LLM_CACHE) image_before = len(_IMAGE_CACHE) geo_before = len(_GEOCODE_CACHE) print(f"Pre-warming caches: {len(TOP_CITIES)} cities × {len(CATEGORY_NAMES)} categories = {total_combos} combos") print(f" Existing: LLM={llm_before} | Images={image_before} | Geocode={geo_before}") print() combo_idx = 0 for city in TOP_CITIES: for cat_name in CATEGORY_NAMES: combo_idx += 1 # Build categories dict matching the UI format exactly categories = {name: (name == cat_name) for name in CATEGORY_NAMES} cat_hash = json.dumps(categories, sort_keys=True) # Check if already cached if (city, cat_hash) in _LLM_CACHE: print(f" [{combo_idx:>2}/{total_combos}] ⏭️ {city} / {cat_name} — already cached") skipped += 1 continue print(f" [{combo_idx:>2}/{total_combos}] 🔍 {city} / {cat_name}...", end=" ", flush=True) start = time.time() try: result = get_recommendations_cached( city=city, num_attractions=6, # UI default; cache key ignores this categories=categories, temperature=0, # deterministic, cached ) elapsed = time.time() - start if result: items = len(result) print(f"✅ {items} items in {elapsed:.1f}s") success += 1 else: print(f"❌ returned None in {elapsed:.1f}s") fail += 1 except Exception as e: elapsed = time.time() - start print(f"❌ error after {elapsed:.1f}s: {e}") fail += 1 # Brief pause for Nominatim rate limit time.sleep(1.5) # Summary llm_new = len(_LLM_CACHE) - llm_before image_new = len(_IMAGE_CACHE) - image_before geo_new = len(_GEOCODE_CACHE) - geo_before print() print("═" * 55) print("Pre-warm complete!") print(f" Combos: {success} succeeded, {skipped} skipped, {fail} failed") print(f" New cache entries: LLM={llm_new}, Images={image_new}, Geocode={geo_new}") print(f" Total entries: LLM={len(_LLM_CACHE)}, Images={len(_IMAGE_CACHE)}, Geocode={len(_GEOCODE_CACHE)}") # Force save all caches _save_llm_cache() _save_image_cache() _save_geocode_cache() print() print("All caches saved to disk ✅") if __name__ == "__main__": prewarm()