File size: 4,708 Bytes
83adb51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#!/usr/bin/env python3
"""
Fast warmup — generates LLM data for missing combos only.
Skips the slow sequential image fix; get_recommendations already does parallel enrichment.
"""
import os, sys, time, json
from datetime import datetime

sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src"))
from dotenv import load_dotenv
load_dotenv(dotenv_path=os.path.join(os.path.dirname(__file__), "..", ".env"), override=True)

from services.recommender import (
    get_recommendations_cached,
    _LLM_CACHE,
    _IMAGE_CACHE,
    _GEOCODE_CACHE,
)

CITIES = [
    "Paris", "London", "Rome", "Barcelona", "New York", "Tokyo",
    "Bangkok", "Sydney", "Cape Town", "Rio de Janeiro", "Istanbul",
    "Dubai", "Seoul", "Bali", "Prague", "San Francisco", "Marrakech", "Kyoto",
]
CATEGORIES = ["Landmark", "Culture", "Nature", "Gems", "Photo", "Food", "Shopping"]

PROGRESS_FILE = os.path.join(os.path.dirname(__file__), "..", ".warmup_progress.json")

def cat_dict(cat_name: str) -> dict:
    return {name: (name == cat_name) for name in CATEGORIES}

def cat_hash(cat_name: str) -> str:
    return json.dumps(cat_dict(cat_name), sort_keys=True)

def load_progress() -> dict:
    if not os.path.exists(PROGRESS_FILE):
        return {"version": 1, "combos": {}}
    try:
        with open(PROGRESS_FILE) as f:
            return json.load(f)
    except (json.JSONDecodeError, OSError):
        return {"version": 1, "combos": {}}

def save_progress(progress: dict):
    with open(PROGRESS_FILE, "w") as f:
        json.dump(progress, f, indent=2)

def combo_id(city: str, cat: str) -> str:
    return f"{city}::{cat}"

def is_done(progress: dict, cid: str) -> bool:
    entry = progress["combos"].get(cid)
    return entry and entry.get("status") == "success"

progress = load_progress()
llm_before = len(_LLM_CACHE)

# Only process combos that actually need LLM generation
todo = []
for city in CITIES:
    for cat in CATEGORIES:
        cid = combo_id(city, cat)
        if is_done(progress, cid):
            continue
        key = (city, cat_hash(cat))
        if key in _LLM_CACHE:
            # In cache but not in progress — mark done
            continue
        todo.append((city, cat))

total = len(todo)
print(f"Missing combos needing API calls: {total}")
print()

for i, (city, cat) in enumerate(todo, 1):
    cid = combo_id(city, cat)
    print(f"[{i}/{total}] 🔍 {city} / {cat}...", end=" ", flush=True)
    start = time.time()
    provider_log = []
    try:
        result = get_recommendations_cached(
            city=city, num_attractions=19,
            categories=cat_dict(cat),
            temperature=0,
            provider_log=provider_log,
        )
        elapsed = time.time() - start

        for entry in provider_log:
            label = entry.get("provider", "?")
            status = "✅" if entry.get("status") == "success" else "❌"
            items = entry.get("items", 0)
            dur = entry.get("elapsed", "?")
            print(f"\n  {label} {status} {dur}s ({items}it)", end="", flush=True)

        if result:
            items = len(result)
            print(f"\n✅ {items} items, {elapsed:.0f}s total")
            progress["combos"][cid] = {
                "status": "success", "items": items,
                "elapsed": round(elapsed, 1),
                "provider_chain": provider_log,
                "timestamp": datetime.now().isoformat(),
            }
        else:
            print(f"\n❌ returned None, {elapsed:.0f}s total")
            progress["combos"][cid] = {
                "status": "failed", "elapsed": round(elapsed, 1),
                "provider_chain": provider_log,
                "error": "all providers returned None",
                "timestamp": datetime.now().isoformat(),
            }
    except Exception as e:
        elapsed = time.time() - start
        print(f"\n❌ {elapsed:.0f}s — {e}")
        progress["combos"][cid] = {
            "status": "failed", "elapsed": round(elapsed, 1),
            "error": str(e), "timestamp": datetime.now().isoformat(),
        }

    save_progress(progress)
    if i < total:
        time.sleep(1.5)  # Nominatim-friendly pause

# Summary
success = sum(1 for v in progress["combos"].values() if v.get("status") == "success")
failed = sum(1 for v in progress["combos"].values() if v.get("status") == "failed")
new_llm = len(_LLM_CACHE) - llm_before
print("\n" + "=" * 50)
print(f"Done! {success} success, {failed} failed, {new_llm} new cache entries")

failed_combos = [k for k,v in progress["combos"].items() if v.get("status") == "failed"]
if failed_combos:
    print("Failed combos:")
    for c in failed_combos:
        print(f"  ❌ {c.replace('::', ' / ')}")