roamify / scripts /prewarm_retry_missing.py
jofaichow's picture
v0.1.17 — Missing combo retry + post-push docs sync
c4ec807
#!/usr/bin/env python3
"""Retry 17 missing/None cache combos single-threaded to avoid save races.
Usage:
cd roamify && python scripts/prewarm_retry_missing.py
"""
import json
import os
import sys
import time
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src"))
from dotenv import load_dotenv
load_dotenv(dotenv_path=os.path.join(os.path.dirname(__file__), "..", ".env"), override=True)
from services.recommender import (
get_recommendations_cached,
_LLM_CACHE,
_save_llm_cache,
_save_image_cache,
_save_geocode_cache,
)
CATEGORY_NAMES = ["Landmark", "Culture", "Nature", "Gems", "Photo", "Food", "Shopping"]
# 17 combos missing from cache — identified by cross-referencing cache keys
# against all 12 cities × 7 categories
MISSING = [
("Montreal", "Landmark"),
("Montreal", "Gems"),
("Moscow", "Photo"),
("Oslo", "Landmark"),
("Reykjavik", "Culture"),
("Reykjavik", "Photo"),
("Santiago", "Nature"),
("Stockholm", "Landmark"),
("Tel Aviv", "Nature"),
("Tel Aviv", "Photo"),
("Toronto", "Photo"),
("Vancouver", "Gems"),
("Venice", "Nature"),
("Warsaw", "Culture"),
("Washington", "Culture"),
("Washington", "Nature"),
("Washington", "Photo"),
]
STATS = {"success": 0, "fail": 0, "total": len(MISSING)}
def process_one(city: str, cat_name: str, idx: int) -> None:
"""Retry a single city/category combo."""
categories = {name: (name == cat_name) for name in CATEGORY_NAMES}
cat_hash = json.dumps(categories, sort_keys=True)
# Check if already cached (e.g. from an earlier retry or interleaved save)
cache_key = json.dumps([city, cat_hash])
if cache_key in _LLM_CACHE and _LLM_CACHE[cache_key] is not None:
STATS["skipped"] = STATS.get("skipped", 0) + 1
print(f" [{idx:>2}/{STATS['total']}] ⏭️ {city} / {cat_name} — already cached", flush=True)
return
print(f" [{idx:>2}/{STATS['total']}] 🔍 {city} / {cat_name}...", end=" ", flush=True)
start = time.time()
try:
result = get_recommendations_cached(
city=city,
num_attractions=6,
categories=categories,
temperature=0,
)
elapsed = time.time() - start
if result:
items = len(result)
STATS["success"] += 1
print(f"✅ {items} items in {elapsed:.1f}s", flush=True)
else:
STATS["fail"] += 1
print(f"❌ returned None in {elapsed:.1f}s", flush=True)
except Exception as e:
elapsed = time.time() - start
STATS["fail"] += 1
print(f"❌ error after {elapsed:.1f}s: {e}", flush=True)
def main():
llm_before = len(_LLM_CACHE)
print(f"Retrying {STATS['total']} missing cache combos (single worker — no race conditions)")
print(f" Existing LLM cache entries: {llm_before}")
print()
# Single-threaded — one at a time, no save races
for idx, (city, cat) in enumerate(MISSING, 1):
process_one(city, cat, idx)
# Force save after each combo to persist progress
_save_llm_cache()
_save_image_cache()
_save_geocode_cache()
llm_new = len(_LLM_CACHE) - llm_before
print()
print("═" * 55)
print("Retry complete!")
print(f" Results: {STATS['success']} succeeded, {STATS.get('skipped', 0)} skipped, {STATS['fail']} failed")
print(f" New LLM cache entries: {llm_new} (total: {len(_LLM_CACHE)})")
_save_llm_cache()
_save_image_cache()
_save_geocode_cache()
print("All caches saved to disk ✅")
if __name__ == "__main__":
main()