Spaces:

jofaichow
/

roamify

Running

jofaichow commited on 18 days ago

Commit

7b2fc2d

1 Parent(s): 584bf94

cache prewarm + concurrency improvements

- Prewarm Osaka (7 cats, 14 translations) — 47/61 cities cached
- Thread-safe Nominatim rate limiter for concurrent workers
- Provider randomization splits load across OpenRouter + Ollama Cloud
- Increased LLM timeout 60→120s to reduce retry cascades
- Concurrent 2-worker prewarm script

Files changed (6) hide show

.geocode_cache.json +0 -0
.image_cache.json +0 -0
.llm_cache.json +0 -0
.translation_cache.json +0 -0
scripts/prewarm_remaining.py +80 -56
src/services/recommender.py +30 -4

.geocode_cache.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

.image_cache.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

.llm_cache.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

.translation_cache.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

scripts/prewarm_remaining.py CHANGED Viewed

@@ -1,20 +1,25 @@
 #!/usr/bin/env python3
 """
-Pre-warm LLM cache for all 33 cities that have zero cache entries.
-Runs all 7 categories for each city. Skips translation — only populates:
-  - .llm_cache.json   → instant repeat lookups
-  - .image_cache.json → instant image loads
-  - .geocode_cache.json → instant geocoding
 Usage:
     cd roamify && python scripts/prewarm_remaining.py
 """
 import os
 import sys
 import time
-import json
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src"))
@@ -24,8 +29,6 @@ load_dotenv(dotenv_path=os.path.join(os.path.dirname(__file__), "..", ".env"), o
 from services.recommender import (
     get_recommendations_cached,
     _LLM_CACHE,
-    _IMAGE_CACHE,
-    _GEOCODE_CACHE,
     _save_llm_cache,
     _save_image_cache,
     _save_geocode_cache,
@@ -33,78 +36,99 @@ from services.recommender import (
 CATEGORY_NAMES = ["Landmark", "Culture", "Nature", "Gems", "Photo", "Food", "Shopping"]
 UNCATEGORIZED_CITIES = [
-    "Beijing", "Buenos Aires", "Cairo", "Chicago", "Copenhagen",
-    "Delhi", "Dublin", "Edinburgh", "Florence", "Hanoi",
-    "Honolulu", "Kuala Lumpur", "Las Vegas", "Los Angeles", "Madrid",
-    "Melbourne", "Miami", "Milan", "Montreal", "Moscow",
-    "Osaka", "Oslo", "Reykjavik", "Santiago", "Shanghai",
-    "Stockholm", "Taipei", "Tel Aviv", "Toronto", "Vancouver",
-    "Venice", "Warsaw", "Washington",
 ]
 def prewarm():
     total_combos = len(UNCATEGORIZED_CITIES) * len(CATEGORY_NAMES)
-    success = 0
-    skipped = 0
-    fail = 0
     llm_before = len(_LLM_CACHE)
     print(f"Pre-warming caches: {len(UNCATEGORIZED_CITIES)} cities × {len(CATEGORY_NAMES)} categories = {total_combos} combos")
     print(f"  Existing LLM cache entries: {llm_before}")
     print()
-    combo_idx = 0
     for city in UNCATEGORIZED_CITIES:
         for cat_name in CATEGORY_NAMES:
-            combo_idx += 1
-            categories = {name: (name == cat_name) for name in CATEGORY_NAMES}
-            cat_hash = json.dumps(categories, sort_keys=True)
-            if (city, cat_hash) in _LLM_CACHE:
-                print(f"  [{combo_idx:>3}/{total_combos}] ⏭️  {city} / {cat_name} — already cached")
-                skipped += 1
-                continue
-            print(f"  [{combo_idx:>3}/{total_combos}] 🔍 {city} / {cat_name}...", end=" ", flush=True)
-            start = time.time()
             try:
-                result = get_recommendations_cached(
-                    city=city,
-                    num_attractions=6,
-                    categories=categories,
-                    temperature=0,
-                )
-                elapsed = time.time() - start
-                if result:
-                    items = len(result)
-                    print(f"✅ {items} items in {elapsed:.1f}s")
-                    success += 1
-                else:
-                    print(f"❌ returned None in {elapsed:.1f}s")
-                    fail += 1
-            except Exception as e:
-                elapsed = time.time() - start
-                print(f"❌ error after {elapsed:.1f}s: {e}")
-                fail += 1
-            # Nominatim rate-limit pause
-            time.sleep(1.5)
     # Summary
     llm_new = len(_LLM_CACHE) - llm_before
-    image_new = len(_IMAGE_CACHE)
-    geo_new = len(_GEOCODE_CACHE)
     print()
     print("═" * 55)
     print("Pre-warm complete!")
-    print(f"  Combos: {success} succeeded, {skipped} skipped, {fail} failed")
     print(f"  New LLM cache entries: {llm_new} (total: {len(_LLM_CACHE)})")
-    print(f"  Image cache entries: {image_new}")
-    print(f"  Geocode cache entries: {geo_new}")
     _save_llm_cache()
     _save_image_cache()

 #!/usr/bin/env python3
 """
+Pre-warm LLM cache for remaining uncached cities.
+Processes combos concurrently (2 workers) to maximize throughput while
+respecting Nominatim's 1 req/s rate limit via a thread-safe limiter.
+Each worker randomly picks between OpenRouter DeepSeek and Ollama Cloud
+DeepSeek as the primary provider (via _get_providers_randomized), splitting
+the workload and reducing rate-limit pressure on either provider.
 Usage:
     cd roamify && python scripts/prewarm_remaining.py
 """
+import json
 import os
+import random
 import sys
+import threading
 import time
+from concurrent.futures import ThreadPoolExecutor, as_completed
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src"))
 from services.recommender import (
     get_recommendations_cached,
     _LLM_CACHE,
     _save_llm_cache,
     _save_image_cache,
     _save_geocode_cache,
 CATEGORY_NAMES = ["Landmark", "Culture", "Nature", "Gems", "Photo", "Food", "Shopping"]
+# 15 cities still uncached (updated as more get prewarmed)
 UNCATEGORIZED_CITIES = [
+    "Montreal", "Moscow", "Osaka", "Oslo", "Reykjavik",
+    "Santiago", "Shanghai", "Stockholm", "Taipei", "Tel Aviv",
+    "Toronto", "Vancouver", "Venice", "Warsaw", "Washington",
 ]
+_COMBO_STATS = {"success": 0, "skipped": 0, "fail": 0, "total": 0}
+_COMBO_LOCK = threading.Lock()
+def process_combo(city: str, cat_name: str, combo_idx: int, total: int) -> None:
+    """Process a single city/category combo and update stats."""
+    categories = {name: (name == cat_name) for name in CATEGORY_NAMES}
+    cat_hash = json.dumps(categories, sort_keys=True)
+    if (city, cat_hash) in _LLM_CACHE:
+        with _COMBO_LOCK:
+            _COMBO_STATS["skipped"] += 1
+        print(f"  [{combo_idx:>3}/{total}] ⏭️  {city} / {cat_name} — already cached", flush=True)
+        return
+    print(f"  [{combo_idx:>3}/{total}] 🔍 {city} / {cat_name}...", end=" ", flush=True)
+    start = time.time()
+    try:
+        result = get_recommendations_cached(
+            city=city,
+            num_attractions=6,
+            categories=categories,
+            temperature=0,
+        )
+        elapsed = time.time() - start
+        if result:
+            items = len(result)
+            with _COMBO_LOCK:
+                _COMBO_STATS["success"] += 1
+            print(f"✅ {items} items in {elapsed:.1f}s", flush=True)
+        else:
+            with _COMBO_LOCK:
+                _COMBO_STATS["fail"] += 1
+            print(f"❌ returned None in {elapsed:.1f}s", flush=True)
+    except Exception as e:
+        elapsed = time.time() - start
+        with _COMBO_LOCK:
+            _COMBO_STATS["fail"] += 1
+        print(f"❌ error after {elapsed:.1f}s: {e}", flush=True)
 def prewarm():
+    """Run all combos concurrently with 2 workers."""
     total_combos = len(UNCATEGORIZED_CITIES) * len(CATEGORY_NAMES)
+    _COMBO_STATS["total"] = total_combos
     llm_before = len(_LLM_CACHE)
     print(f"Pre-warming caches: {len(UNCATEGORIZED_CITIES)} cities × {len(CATEGORY_NAMES)} categories = {total_combos} combos")
+    print(f"  Workers: 2 (concurrent) — each uses random DeepSeek provider")
     print(f"  Existing LLM cache entries: {llm_before}")
     print()
+    # Build all combos, shuffle for load distribution across workers
+    all_combos = []
+    idx = 0
     for city in UNCATEGORIZED_CITIES:
         for cat_name in CATEGORY_NAMES:
+            idx += 1
+            all_combos.append((city, cat_name, idx))
+    random.shuffle(all_combos)
+    # Re-assign sequential indices after shuffle (for display only)
+    for i, (city, cat_name, _) in enumerate(all_combos):
+        all_combos[i] = (city, cat_name, i + 1)
+    with ThreadPoolExecutor(max_workers=2) as pool:
+        futures = [
+            pool.submit(process_combo, city, cat_name, idx, total_combos)
+            for city, cat_name, idx in all_combos
+        ]
+        # Process results as they complete
+        for future in as_completed(futures):
             try:
+                future.result()
+            except Exception:
+                pass  # Errors are already logged in process_combo
     # Summary
     llm_new = len(_LLM_CACHE) - llm_before
     print()
     print("═" * 55)
     print("Pre-warm complete!")
+    print(f"  Combos: {_COMBO_STATS['success']} succeeded, {_COMBO_STATS['skipped']} skipped, {_COMBO_STATS['fail']} failed")
     print(f"  New LLM cache entries: {llm_new} (total: {len(_LLM_CACHE)})")
     _save_llm_cache()
     _save_image_cache()

src/services/recommender.py CHANGED Viewed

@@ -109,6 +109,11 @@ def _save_image_cache() -> None:
 _GEOCODE_CACHE: dict[str, dict | None] = {}
 _load_geocode_cache()  # Restore persisted cache from disk
 # Module-level cache for image enrichment results — keyed by (name, city, country) -> image URL
 # Never cleared, survives "Clear" clicks. Image URLs are stable per attraction.
 _IMAGE_CACHE: dict[tuple[str, str, str], str] = {}
@@ -644,7 +649,14 @@ def _nominatim_search_cached(query: str, timeout: int = 10) -> tuple[dict | None
         "q": query, "format": "json", "limit": 1, "accept-language": "en",
     })
     data = _http_get_json(url, timeout=timeout, retries=2)
-    time.sleep(1.01)  # Nominatim rate limit: 1 req/s (only on actual API calls)
     if data and isinstance(data, list) and data:
         _GEOCODE_CACHE[query] = data[0]
         _save_geocode_cache()
@@ -871,6 +883,20 @@ def _get_providers() -> list[_Provider]:
     return providers
 def _parse_json_response(raw: str) -> list[dict] | None:
     """Robustly extract JSON array from LLM output.
     Returns None if parsing fails entirely (caller should show st.error)."""
@@ -994,7 +1020,7 @@ def _call_model(provider: _Provider, prompt: str, temperature: float = 0.1) -> l
         ],
         temperature=temperature,
         max_tokens=4096,
-        timeout=60,
     )
     # Ollama Cloud supports the "think" parameter natively via extra_body
     if provider.name == "ollama-cloud":
@@ -1054,7 +1080,7 @@ def get_recommendations(
     )
     prompt += "\n\nIMPORTANT: Do NOT include any politically controversial attractions, war museums, or memorials that might be offensive to some visitors. Focus on universally enjoyed tourist attractions."
-    providers = _get_providers()
     if not providers:
         return None
@@ -1203,7 +1229,7 @@ def translate_items(items: list[dict], second_language: str, tab: str) -> list[d
     if not second_language or not items:
         return items
-    providers = _get_providers()
     if not providers:
         return items

 _GEOCODE_CACHE: dict[str, dict | None] = {}
 _load_geocode_cache()  # Restore persisted cache from disk
+# Thread-safe Nominatim rate limiter — ensures max 1 API call per second
+# across all threads (prewarm with concurrent workers, image enrichment, etc.)
+_nominatim_lock = threading.Lock()
+_nominatim_last_call: float = 0.0
 # Module-level cache for image enrichment results — keyed by (name, city, country) -> image URL
 # Never cleared, survives "Clear" clicks. Image URLs are stable per attraction.
 _IMAGE_CACHE: dict[tuple[str, str, str], str] = {}
         "q": query, "format": "json", "limit": 1, "accept-language": "en",
     })
     data = _http_get_json(url, timeout=timeout, retries=2)
+    # Thread-safe Nominatim rate limit: 1 req/s (ensured across all concurrent workers)
+    global _nominatim_last_call
+    with _nominatim_lock:
+        now = time.time()
+        since_last = now - _nominatim_last_call
+        if since_last < 1.01:
+            time.sleep(1.01 - since_last)
+        _nominatim_last_call = time.time()
     if data and isinstance(data, list) and data:
         _GEOCODE_CACHE[query] = data[0]
         _save_geocode_cache()
     return providers
+def _get_providers_randomized() -> list[_Provider]:
+    """Same as _get_providers but randomly orders the two DeepSeek V4 Flash
+    providers (OpenRouter and Ollama Cloud) so load is distributed and rate
+    limits are less likely to be hit on either provider."""
+    providers = _get_providers()
+    # Shuffle the first two DeepSeek providers if both are present
+    if len(providers) >= 2 and all(p.name in ("openrouter-deepseek", "ollama-cloud") for p in providers[:2]):
+        import random
+        p0, p1 = providers[0], providers[1]
+        if random.random() < 0.5:
+            providers[0], providers[1] = p1, p0
+    return providers
 def _parse_json_response(raw: str) -> list[dict] | None:
     """Robustly extract JSON array from LLM output.
     Returns None if parsing fails entirely (caller should show st.error)."""
         ],
         temperature=temperature,
         max_tokens=4096,
+        timeout=120,
     )
     # Ollama Cloud supports the "think" parameter natively via extra_body
     if provider.name == "ollama-cloud":
     )
     prompt += "\n\nIMPORTANT: Do NOT include any politically controversial attractions, war museums, or memorials that might be offensive to some visitors. Focus on universally enjoyed tourist attractions."
+    providers = _get_providers_randomized()
     if not providers:
         return None
     if not second_language or not items:
         return items
+    providers = _get_providers_randomized()
     if not providers:
         return items