File size: 3,648 Bytes
c4ec807 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 | #!/usr/bin/env python3
"""Retry 17 missing/None cache combos single-threaded to avoid save races.
Usage:
cd roamify && python scripts/prewarm_retry_missing.py
"""
import json
import os
import sys
import time
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src"))
from dotenv import load_dotenv
load_dotenv(dotenv_path=os.path.join(os.path.dirname(__file__), "..", ".env"), override=True)
from services.recommender import (
get_recommendations_cached,
_LLM_CACHE,
_save_llm_cache,
_save_image_cache,
_save_geocode_cache,
)
CATEGORY_NAMES = ["Landmark", "Culture", "Nature", "Gems", "Photo", "Food", "Shopping"]
# 17 combos missing from cache — identified by cross-referencing cache keys
# against all 12 cities × 7 categories
MISSING = [
("Montreal", "Landmark"),
("Montreal", "Gems"),
("Moscow", "Photo"),
("Oslo", "Landmark"),
("Reykjavik", "Culture"),
("Reykjavik", "Photo"),
("Santiago", "Nature"),
("Stockholm", "Landmark"),
("Tel Aviv", "Nature"),
("Tel Aviv", "Photo"),
("Toronto", "Photo"),
("Vancouver", "Gems"),
("Venice", "Nature"),
("Warsaw", "Culture"),
("Washington", "Culture"),
("Washington", "Nature"),
("Washington", "Photo"),
]
STATS = {"success": 0, "fail": 0, "total": len(MISSING)}
def process_one(city: str, cat_name: str, idx: int) -> None:
"""Retry a single city/category combo."""
categories = {name: (name == cat_name) for name in CATEGORY_NAMES}
cat_hash = json.dumps(categories, sort_keys=True)
# Check if already cached (e.g. from an earlier retry or interleaved save)
cache_key = json.dumps([city, cat_hash])
if cache_key in _LLM_CACHE and _LLM_CACHE[cache_key] is not None:
STATS["skipped"] = STATS.get("skipped", 0) + 1
print(f" [{idx:>2}/{STATS['total']}] ⏭️ {city} / {cat_name} — already cached", flush=True)
return
print(f" [{idx:>2}/{STATS['total']}] 🔍 {city} / {cat_name}...", end=" ", flush=True)
start = time.time()
try:
result = get_recommendations_cached(
city=city,
num_attractions=6,
categories=categories,
temperature=0,
)
elapsed = time.time() - start
if result:
items = len(result)
STATS["success"] += 1
print(f"✅ {items} items in {elapsed:.1f}s", flush=True)
else:
STATS["fail"] += 1
print(f"❌ returned None in {elapsed:.1f}s", flush=True)
except Exception as e:
elapsed = time.time() - start
STATS["fail"] += 1
print(f"❌ error after {elapsed:.1f}s: {e}", flush=True)
def main():
llm_before = len(_LLM_CACHE)
print(f"Retrying {STATS['total']} missing cache combos (single worker — no race conditions)")
print(f" Existing LLM cache entries: {llm_before}")
print()
# Single-threaded — one at a time, no save races
for idx, (city, cat) in enumerate(MISSING, 1):
process_one(city, cat, idx)
# Force save after each combo to persist progress
_save_llm_cache()
_save_image_cache()
_save_geocode_cache()
llm_new = len(_LLM_CACHE) - llm_before
print()
print("═" * 55)
print("Retry complete!")
print(f" Results: {STATS['success']} succeeded, {STATS.get('skipped', 0)} skipped, {STATS['fail']} failed")
print(f" New LLM cache entries: {llm_new} (total: {len(_LLM_CACHE)})")
_save_llm_cache()
_save_image_cache()
_save_geocode_cache()
print("All caches saved to disk ✅")
if __name__ == "__main__":
main()
|