Spaces:

jofaichow
/

roamify

Running

App Files Files Community

roamify / scripts /prewarm_retry_missing.py

jofaichow

v0.1.17 — Missing combo retry + post-push docs sync

c4ec807 10 days ago

raw

history blame contribute delete

3.65 kB

	#!/usr/bin/env python3
	"""Retry 17 missing/None cache combos single-threaded to avoid save races.

	Usage:
	cd roamify && python scripts/prewarm_retry_missing.py
	"""

	import json
	import os
	import sys
	import time

	sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src"))

	from dotenv import load_dotenv
	load_dotenv(dotenv_path=os.path.join(os.path.dirname(__file__), "..", ".env"), override=True)

	from services.recommender import (
	get_recommendations_cached,
	_LLM_CACHE,
	_save_llm_cache,
	_save_image_cache,
	_save_geocode_cache,
	)

	CATEGORY_NAMES = ["Landmark", "Culture", "Nature", "Gems", "Photo", "Food", "Shopping"]

	# 17 combos missing from cache — identified by cross-referencing cache keys
	# against all 12 cities × 7 categories
	MISSING = [
	("Montreal", "Landmark"),
	("Montreal", "Gems"),
	("Moscow", "Photo"),
	("Oslo", "Landmark"),
	("Reykjavik", "Culture"),
	("Reykjavik", "Photo"),
	("Santiago", "Nature"),
	("Stockholm", "Landmark"),
	("Tel Aviv", "Nature"),
	("Tel Aviv", "Photo"),
	("Toronto", "Photo"),
	("Vancouver", "Gems"),
	("Venice", "Nature"),
	("Warsaw", "Culture"),
	("Washington", "Culture"),
	("Washington", "Nature"),
	("Washington", "Photo"),
	]

	STATS = {"success": 0, "fail": 0, "total": len(MISSING)}


	def process_one(city: str, cat_name: str, idx: int) -> None:
	"""Retry a single city/category combo."""
	categories = {name: (name == cat_name) for name in CATEGORY_NAMES}
	cat_hash = json.dumps(categories, sort_keys=True)

	# Check if already cached (e.g. from an earlier retry or interleaved save)
	cache_key = json.dumps([city, cat_hash])
	if cache_key in _LLM_CACHE and _LLM_CACHE[cache_key] is not None:
	STATS["skipped"] = STATS.get("skipped", 0) + 1
	print(f" [{idx:>2}/{STATS['total']}] ⏭️ {city} / {cat_name} — already cached", flush=True)
	return

	print(f" [{idx:>2}/{STATS['total']}] 🔍 {city} / {cat_name}...", end=" ", flush=True)
	start = time.time()
	try:
	result = get_recommendations_cached(
	city=city,
	num_attractions=6,
	categories=categories,
	temperature=0,
	)
	elapsed = time.time() - start
	if result:
	items = len(result)
	STATS["success"] += 1
	print(f"✅ {items} items in {elapsed:.1f}s", flush=True)
	else:
	STATS["fail"] += 1
	print(f"❌ returned None in {elapsed:.1f}s", flush=True)
	except Exception as e:
	elapsed = time.time() - start
	STATS["fail"] += 1
	print(f"❌ error after {elapsed:.1f}s: {e}", flush=True)


	def main():
	llm_before = len(_LLM_CACHE)

	print(f"Retrying {STATS['total']} missing cache combos (single worker — no race conditions)")
	print(f" Existing LLM cache entries: {llm_before}")
	print()

	# Single-threaded — one at a time, no save races
	for idx, (city, cat) in enumerate(MISSING, 1):
	process_one(city, cat, idx)
	# Force save after each combo to persist progress
	_save_llm_cache()
	_save_image_cache()
	_save_geocode_cache()

	llm_new = len(_LLM_CACHE) - llm_before

	print()
	print("═" * 55)
	print("Retry complete!")
	print(f" Results: {STATS['success']} succeeded, {STATS.get('skipped', 0)} skipped, {STATS['fail']} failed")
	print(f" New LLM cache entries: {llm_new} (total: {len(_LLM_CACHE)})")

	_save_llm_cache()
	_save_image_cache()
	_save_geocode_cache()
	print("All caches saved to disk ✅")


	if __name__ == "__main__":
	main()