Spaces:

jofaichow
/

roamify

Running

File size: 3,648 Bytes

c4ec807

#!/usr/bin/env python3
"""Retry 17 missing/None cache combos single-threaded to avoid save races.

Usage:
    cd roamify && python scripts/prewarm_retry_missing.py
"""

import json
import os
import sys
import time

sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src"))

from dotenv import load_dotenv
load_dotenv(dotenv_path=os.path.join(os.path.dirname(__file__), "..", ".env"), override=True)

from services.recommender import (
    get_recommendations_cached,
    _LLM_CACHE,
    _save_llm_cache,
    _save_image_cache,
    _save_geocode_cache,
)

CATEGORY_NAMES = ["Landmark", "Culture", "Nature", "Gems", "Photo", "Food", "Shopping"]

# 17 combos missing from cache — identified by cross-referencing cache keys
# against all 12 cities × 7 categories
MISSING = [
    ("Montreal", "Landmark"),
    ("Montreal", "Gems"),
    ("Moscow", "Photo"),
    ("Oslo", "Landmark"),
    ("Reykjavik", "Culture"),
    ("Reykjavik", "Photo"),
    ("Santiago", "Nature"),
    ("Stockholm", "Landmark"),
    ("Tel Aviv", "Nature"),
    ("Tel Aviv", "Photo"),
    ("Toronto", "Photo"),
    ("Vancouver", "Gems"),
    ("Venice", "Nature"),
    ("Warsaw", "Culture"),
    ("Washington", "Culture"),
    ("Washington", "Nature"),
    ("Washington", "Photo"),
]

STATS = {"success": 0, "fail": 0, "total": len(MISSING)}


def process_one(city: str, cat_name: str, idx: int) -> None:
    """Retry a single city/category combo."""
    categories = {name: (name == cat_name) for name in CATEGORY_NAMES}
    cat_hash = json.dumps(categories, sort_keys=True)

    # Check if already cached (e.g. from an earlier retry or interleaved save)
    cache_key = json.dumps([city, cat_hash])
    if cache_key in _LLM_CACHE and _LLM_CACHE[cache_key] is not None:
        STATS["skipped"] = STATS.get("skipped", 0) + 1
        print(f"  [{idx:>2}/{STATS['total']}] ⏭️  {city} / {cat_name} — already cached", flush=True)
        return

    print(f"  [{idx:>2}/{STATS['total']}] 🔍 {city} / {cat_name}...", end=" ", flush=True)
    start = time.time()
    try:
        result = get_recommendations_cached(
            city=city,
            num_attractions=6,
            categories=categories,
            temperature=0,
        )
        elapsed = time.time() - start
        if result:
            items = len(result)
            STATS["success"] += 1
            print(f"✅ {items} items in {elapsed:.1f}s", flush=True)
        else:
            STATS["fail"] += 1
            print(f"❌ returned None in {elapsed:.1f}s", flush=True)
    except Exception as e:
        elapsed = time.time() - start
        STATS["fail"] += 1
        print(f"❌ error after {elapsed:.1f}s: {e}", flush=True)


def main():
    llm_before = len(_LLM_CACHE)

    print(f"Retrying {STATS['total']} missing cache combos (single worker — no race conditions)")
    print(f"  Existing LLM cache entries: {llm_before}")
    print()

    # Single-threaded — one at a time, no save races
    for idx, (city, cat) in enumerate(MISSING, 1):
        process_one(city, cat, idx)
        # Force save after each combo to persist progress
        _save_llm_cache()
        _save_image_cache()
        _save_geocode_cache()

    llm_new = len(_LLM_CACHE) - llm_before

    print()
    print("═" * 55)
    print("Retry complete!")
    print(f"  Results: {STATS['success']} succeeded, {STATS.get('skipped', 0)} skipped, {STATS['fail']} failed")
    print(f"  New LLM cache entries: {llm_new} (total: {len(_LLM_CACHE)})")

    _save_llm_cache()
    _save_image_cache()
    _save_geocode_cache()
    print("All caches saved to disk ✅")


if __name__ == "__main__":
    main()