File size: 4,708 Bytes
83adb51 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 | #!/usr/bin/env python3
"""
Fast warmup — generates LLM data for missing combos only.
Skips the slow sequential image fix; get_recommendations already does parallel enrichment.
"""
import os, sys, time, json
from datetime import datetime
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src"))
from dotenv import load_dotenv
load_dotenv(dotenv_path=os.path.join(os.path.dirname(__file__), "..", ".env"), override=True)
from services.recommender import (
get_recommendations_cached,
_LLM_CACHE,
_IMAGE_CACHE,
_GEOCODE_CACHE,
)
CITIES = [
"Paris", "London", "Rome", "Barcelona", "New York", "Tokyo",
"Bangkok", "Sydney", "Cape Town", "Rio de Janeiro", "Istanbul",
"Dubai", "Seoul", "Bali", "Prague", "San Francisco", "Marrakech", "Kyoto",
]
CATEGORIES = ["Landmark", "Culture", "Nature", "Gems", "Photo", "Food", "Shopping"]
PROGRESS_FILE = os.path.join(os.path.dirname(__file__), "..", ".warmup_progress.json")
def cat_dict(cat_name: str) -> dict:
return {name: (name == cat_name) for name in CATEGORIES}
def cat_hash(cat_name: str) -> str:
return json.dumps(cat_dict(cat_name), sort_keys=True)
def load_progress() -> dict:
if not os.path.exists(PROGRESS_FILE):
return {"version": 1, "combos": {}}
try:
with open(PROGRESS_FILE) as f:
return json.load(f)
except (json.JSONDecodeError, OSError):
return {"version": 1, "combos": {}}
def save_progress(progress: dict):
with open(PROGRESS_FILE, "w") as f:
json.dump(progress, f, indent=2)
def combo_id(city: str, cat: str) -> str:
return f"{city}::{cat}"
def is_done(progress: dict, cid: str) -> bool:
entry = progress["combos"].get(cid)
return entry and entry.get("status") == "success"
progress = load_progress()
llm_before = len(_LLM_CACHE)
# Only process combos that actually need LLM generation
todo = []
for city in CITIES:
for cat in CATEGORIES:
cid = combo_id(city, cat)
if is_done(progress, cid):
continue
key = (city, cat_hash(cat))
if key in _LLM_CACHE:
# In cache but not in progress — mark done
continue
todo.append((city, cat))
total = len(todo)
print(f"Missing combos needing API calls: {total}")
print()
for i, (city, cat) in enumerate(todo, 1):
cid = combo_id(city, cat)
print(f"[{i}/{total}] 🔍 {city} / {cat}...", end=" ", flush=True)
start = time.time()
provider_log = []
try:
result = get_recommendations_cached(
city=city, num_attractions=19,
categories=cat_dict(cat),
temperature=0,
provider_log=provider_log,
)
elapsed = time.time() - start
for entry in provider_log:
label = entry.get("provider", "?")
status = "✅" if entry.get("status") == "success" else "❌"
items = entry.get("items", 0)
dur = entry.get("elapsed", "?")
print(f"\n {label} {status} {dur}s ({items}it)", end="", flush=True)
if result:
items = len(result)
print(f"\n✅ {items} items, {elapsed:.0f}s total")
progress["combos"][cid] = {
"status": "success", "items": items,
"elapsed": round(elapsed, 1),
"provider_chain": provider_log,
"timestamp": datetime.now().isoformat(),
}
else:
print(f"\n❌ returned None, {elapsed:.0f}s total")
progress["combos"][cid] = {
"status": "failed", "elapsed": round(elapsed, 1),
"provider_chain": provider_log,
"error": "all providers returned None",
"timestamp": datetime.now().isoformat(),
}
except Exception as e:
elapsed = time.time() - start
print(f"\n❌ {elapsed:.0f}s — {e}")
progress["combos"][cid] = {
"status": "failed", "elapsed": round(elapsed, 1),
"error": str(e), "timestamp": datetime.now().isoformat(),
}
save_progress(progress)
if i < total:
time.sleep(1.5) # Nominatim-friendly pause
# Summary
success = sum(1 for v in progress["combos"].values() if v.get("status") == "success")
failed = sum(1 for v in progress["combos"].values() if v.get("status") == "failed")
new_llm = len(_LLM_CACHE) - llm_before
print("\n" + "=" * 50)
print(f"Done! {success} success, {failed} failed, {new_llm} new cache entries")
failed_combos = [k for k,v in progress["combos"].items() if v.get("status") == "failed"]
if failed_combos:
print("Failed combos:")
for c in failed_combos:
print(f" ❌ {c.replace('::', ' / ')}")
|