| |
| """Retry 17 missing/None cache combos single-threaded to avoid save races. |
| |
| Usage: |
| cd roamify && python scripts/prewarm_retry_missing.py |
| """ |
|
|
| import json |
| import os |
| import sys |
| import time |
|
|
| sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src")) |
|
|
| from dotenv import load_dotenv |
| load_dotenv(dotenv_path=os.path.join(os.path.dirname(__file__), "..", ".env"), override=True) |
|
|
| from services.recommender import ( |
| get_recommendations_cached, |
| _LLM_CACHE, |
| _save_llm_cache, |
| _save_image_cache, |
| _save_geocode_cache, |
| ) |
|
|
| CATEGORY_NAMES = ["Landmark", "Culture", "Nature", "Gems", "Photo", "Food", "Shopping"] |
|
|
| |
| |
| MISSING = [ |
| ("Montreal", "Landmark"), |
| ("Montreal", "Gems"), |
| ("Moscow", "Photo"), |
| ("Oslo", "Landmark"), |
| ("Reykjavik", "Culture"), |
| ("Reykjavik", "Photo"), |
| ("Santiago", "Nature"), |
| ("Stockholm", "Landmark"), |
| ("Tel Aviv", "Nature"), |
| ("Tel Aviv", "Photo"), |
| ("Toronto", "Photo"), |
| ("Vancouver", "Gems"), |
| ("Venice", "Nature"), |
| ("Warsaw", "Culture"), |
| ("Washington", "Culture"), |
| ("Washington", "Nature"), |
| ("Washington", "Photo"), |
| ] |
|
|
| STATS = {"success": 0, "fail": 0, "total": len(MISSING)} |
|
|
|
|
| def process_one(city: str, cat_name: str, idx: int) -> None: |
| """Retry a single city/category combo.""" |
| categories = {name: (name == cat_name) for name in CATEGORY_NAMES} |
| cat_hash = json.dumps(categories, sort_keys=True) |
|
|
| |
| cache_key = json.dumps([city, cat_hash]) |
| if cache_key in _LLM_CACHE and _LLM_CACHE[cache_key] is not None: |
| STATS["skipped"] = STATS.get("skipped", 0) + 1 |
| print(f" [{idx:>2}/{STATS['total']}] ⏭️ {city} / {cat_name} — already cached", flush=True) |
| return |
|
|
| print(f" [{idx:>2}/{STATS['total']}] 🔍 {city} / {cat_name}...", end=" ", flush=True) |
| start = time.time() |
| try: |
| result = get_recommendations_cached( |
| city=city, |
| num_attractions=6, |
| categories=categories, |
| temperature=0, |
| ) |
| elapsed = time.time() - start |
| if result: |
| items = len(result) |
| STATS["success"] += 1 |
| print(f"✅ {items} items in {elapsed:.1f}s", flush=True) |
| else: |
| STATS["fail"] += 1 |
| print(f"❌ returned None in {elapsed:.1f}s", flush=True) |
| except Exception as e: |
| elapsed = time.time() - start |
| STATS["fail"] += 1 |
| print(f"❌ error after {elapsed:.1f}s: {e}", flush=True) |
|
|
|
|
| def main(): |
| llm_before = len(_LLM_CACHE) |
|
|
| print(f"Retrying {STATS['total']} missing cache combos (single worker — no race conditions)") |
| print(f" Existing LLM cache entries: {llm_before}") |
| print() |
|
|
| |
| for idx, (city, cat) in enumerate(MISSING, 1): |
| process_one(city, cat, idx) |
| |
| _save_llm_cache() |
| _save_image_cache() |
| _save_geocode_cache() |
|
|
| llm_new = len(_LLM_CACHE) - llm_before |
|
|
| print() |
| print("═" * 55) |
| print("Retry complete!") |
| print(f" Results: {STATS['success']} succeeded, {STATS.get('skipped', 0)} skipped, {STATS['fail']} failed") |
| print(f" New LLM cache entries: {llm_new} (total: {len(_LLM_CACHE)})") |
|
|
| _save_llm_cache() |
| _save_image_cache() |
| _save_geocode_cache() |
| print("All caches saved to disk ✅") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|