#!/usr/bin/env python3 """Retry 17 missing/None cache combos single-threaded to avoid save races. Usage: cd roamify && python scripts/prewarm_retry_missing.py """ import json import os import sys import time sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src")) from dotenv import load_dotenv load_dotenv(dotenv_path=os.path.join(os.path.dirname(__file__), "..", ".env"), override=True) from services.recommender import ( get_recommendations_cached, _LLM_CACHE, _save_llm_cache, _save_image_cache, _save_geocode_cache, ) CATEGORY_NAMES = ["Landmark", "Culture", "Nature", "Gems", "Photo", "Food", "Shopping"] # 17 combos missing from cache — identified by cross-referencing cache keys # against all 12 cities × 7 categories MISSING = [ ("Montreal", "Landmark"), ("Montreal", "Gems"), ("Moscow", "Photo"), ("Oslo", "Landmark"), ("Reykjavik", "Culture"), ("Reykjavik", "Photo"), ("Santiago", "Nature"), ("Stockholm", "Landmark"), ("Tel Aviv", "Nature"), ("Tel Aviv", "Photo"), ("Toronto", "Photo"), ("Vancouver", "Gems"), ("Venice", "Nature"), ("Warsaw", "Culture"), ("Washington", "Culture"), ("Washington", "Nature"), ("Washington", "Photo"), ] STATS = {"success": 0, "fail": 0, "total": len(MISSING)} def process_one(city: str, cat_name: str, idx: int) -> None: """Retry a single city/category combo.""" categories = {name: (name == cat_name) for name in CATEGORY_NAMES} cat_hash = json.dumps(categories, sort_keys=True) # Check if already cached (e.g. from an earlier retry or interleaved save) cache_key = json.dumps([city, cat_hash]) if cache_key in _LLM_CACHE and _LLM_CACHE[cache_key] is not None: STATS["skipped"] = STATS.get("skipped", 0) + 1 print(f" [{idx:>2}/{STATS['total']}] ⏭️ {city} / {cat_name} — already cached", flush=True) return print(f" [{idx:>2}/{STATS['total']}] 🔍 {city} / {cat_name}...", end=" ", flush=True) start = time.time() try: result = get_recommendations_cached( city=city, num_attractions=6, categories=categories, temperature=0, ) elapsed = time.time() - start if result: items = len(result) STATS["success"] += 1 print(f"✅ {items} items in {elapsed:.1f}s", flush=True) else: STATS["fail"] += 1 print(f"❌ returned None in {elapsed:.1f}s", flush=True) except Exception as e: elapsed = time.time() - start STATS["fail"] += 1 print(f"❌ error after {elapsed:.1f}s: {e}", flush=True) def main(): llm_before = len(_LLM_CACHE) print(f"Retrying {STATS['total']} missing cache combos (single worker — no race conditions)") print(f" Existing LLM cache entries: {llm_before}") print() # Single-threaded — one at a time, no save races for idx, (city, cat) in enumerate(MISSING, 1): process_one(city, cat, idx) # Force save after each combo to persist progress _save_llm_cache() _save_image_cache() _save_geocode_cache() llm_new = len(_LLM_CACHE) - llm_before print() print("═" * 55) print("Retry complete!") print(f" Results: {STATS['success']} succeeded, {STATS.get('skipped', 0)} skipped, {STATS['fail']} failed") print(f" New LLM cache entries: {llm_new} (total: {len(_LLM_CACHE)})") _save_llm_cache() _save_image_cache() _save_geocode_cache() print("All caches saved to disk ✅") if __name__ == "__main__": main()