| |
| """ |
| Pre-warm LLM cache for remaining uncached cities. |
| |
| Processes combos concurrently (2 workers) to maximize throughput while |
| respecting Nominatim's 1 req/s rate limit via a thread-safe limiter. |
| |
| Each worker randomly picks between OpenRouter DeepSeek and Ollama Cloud |
| DeepSeek as the primary provider (via _get_providers_randomized), splitting |
| the workload and reducing rate-limit pressure on either provider. |
| |
| Usage: |
| cd roamify && python scripts/prewarm_remaining.py |
| """ |
|
|
| import json |
| import os |
| import random |
| import sys |
| import threading |
| import time |
| from concurrent.futures import ThreadPoolExecutor, as_completed |
|
|
| sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src")) |
|
|
| from dotenv import load_dotenv |
| load_dotenv(dotenv_path=os.path.join(os.path.dirname(__file__), "..", ".env"), override=True) |
|
|
| from services.recommender import ( |
| get_recommendations_cached, |
| _LLM_CACHE, |
| _save_llm_cache, |
| _save_image_cache, |
| _save_geocode_cache, |
| ) |
|
|
| CATEGORY_NAMES = ["Landmark", "Culture", "Nature", "Gems", "Photo", "Food", "Shopping"] |
|
|
| |
| UNCATEGORIZED_CITIES = [ |
| "Montreal", "Moscow", "Osaka", "Oslo", "Reykjavik", |
| "Santiago", "Shanghai", "Stockholm", "Taipei", "Tel Aviv", |
| "Toronto", "Vancouver", "Venice", "Warsaw", "Washington", |
| ] |
|
|
| _COMBO_STATS = {"success": 0, "skipped": 0, "fail": 0, "total": 0} |
| _COMBO_LOCK = threading.Lock() |
|
|
|
|
| def process_combo(city: str, cat_name: str, combo_idx: int, total: int) -> None: |
| """Process a single city/category combo and update stats.""" |
| categories = {name: (name == cat_name) for name in CATEGORY_NAMES} |
| cat_hash = json.dumps(categories, sort_keys=True) |
|
|
| if (city, cat_hash) in _LLM_CACHE: |
| with _COMBO_LOCK: |
| _COMBO_STATS["skipped"] += 1 |
| print(f" [{combo_idx:>3}/{total}] ⏭️ {city} / {cat_name} — already cached", flush=True) |
| return |
|
|
| print(f" [{combo_idx:>3}/{total}] 🔍 {city} / {cat_name}...", end=" ", flush=True) |
| start = time.time() |
| try: |
| result = get_recommendations_cached( |
| city=city, |
| num_attractions=6, |
| categories=categories, |
| temperature=0, |
| ) |
| elapsed = time.time() - start |
| if result: |
| items = len(result) |
| with _COMBO_LOCK: |
| _COMBO_STATS["success"] += 1 |
| print(f"✅ {items} items in {elapsed:.1f}s", flush=True) |
| else: |
| with _COMBO_LOCK: |
| _COMBO_STATS["fail"] += 1 |
| print(f"❌ returned None in {elapsed:.1f}s", flush=True) |
| except Exception as e: |
| elapsed = time.time() - start |
| with _COMBO_LOCK: |
| _COMBO_STATS["fail"] += 1 |
| print(f"❌ error after {elapsed:.1f}s: {e}", flush=True) |
|
|
|
|
| def prewarm(): |
| """Run all combos concurrently with 2 workers.""" |
| total_combos = len(UNCATEGORIZED_CITIES) * len(CATEGORY_NAMES) |
| _COMBO_STATS["total"] = total_combos |
|
|
| llm_before = len(_LLM_CACHE) |
|
|
| print(f"Pre-warming caches: {len(UNCATEGORIZED_CITIES)} cities × {len(CATEGORY_NAMES)} categories = {total_combos} combos") |
| print(f" Workers: 2 (concurrent) — each uses random DeepSeek provider") |
| print(f" Existing LLM cache entries: {llm_before}") |
| print() |
|
|
| |
| all_combos = [] |
| idx = 0 |
| for city in UNCATEGORIZED_CITIES: |
| for cat_name in CATEGORY_NAMES: |
| idx += 1 |
| all_combos.append((city, cat_name, idx)) |
|
|
| random.shuffle(all_combos) |
| |
| for i, (city, cat_name, _) in enumerate(all_combos): |
| all_combos[i] = (city, cat_name, i + 1) |
|
|
| with ThreadPoolExecutor(max_workers=2) as pool: |
| futures = [ |
| pool.submit(process_combo, city, cat_name, idx, total_combos) |
| for city, cat_name, idx in all_combos |
| ] |
| |
| for future in as_completed(futures): |
| try: |
| future.result() |
| except Exception: |
| pass |
|
|
| |
| llm_new = len(_LLM_CACHE) - llm_before |
|
|
| print() |
| print("═" * 55) |
| print("Pre-warm complete!") |
| print(f" Combos: {_COMBO_STATS['success']} succeeded, {_COMBO_STATS['skipped']} skipped, {_COMBO_STATS['fail']} failed") |
| print(f" New LLM cache entries: {llm_new} (total: {len(_LLM_CACHE)})") |
|
|
| _save_llm_cache() |
| _save_image_cache() |
| _save_geocode_cache() |
| print() |
| print("All caches saved to disk ✅") |
|
|
|
|
| if __name__ == "__main__": |
| prewarm() |
|
|