jofaichow commited on
Commit
c6a5755
·
1 Parent(s): 079fc56

feat: switch primary provider to Ollama Cloud DeepSeek V4 Flash

Browse files

- Remove OpenCode Go provider (reasoning-only mode, unusable for long prompts)
- Add Ollama Cloud provider (deepseek-v4-flash:cloud) as primary
- Add .geocode_cache.json to .gitignore
- Keep trimmed prompt (reduced from ~350 to ~180 tokens)
- Keep OpenRouter DeepSeek + Gemma + Gemini as fallback chain

Files changed (3) hide show
  1. .gitignore +3 -0
  2. src/services/recommender.py +103 -32
  3. src/utils/prompts.py +5 -10
.gitignore CHANGED
@@ -18,6 +18,9 @@ venv/
18
  # Font files (proprietary — use Google Fonts CDN instead)
19
  static/*.ttf
20
 
 
 
 
21
  # Hermes agent artifacts
22
  hermes-progress-log.md
23
  hermes-plan.md
 
18
  # Font files (proprietary — use Google Fonts CDN instead)
19
  static/*.ttf
20
 
21
+ # Auto-generated geocode cache
22
+ .geocode_cache.json
23
+
24
  # Hermes agent artifacts
25
  hermes-progress-log.md
26
  hermes-plan.md
src/services/recommender.py CHANGED
@@ -4,8 +4,10 @@ import concurrent.futures
4
  import hashlib
5
  import json
6
  import logging
 
7
  import os
8
  import re
 
9
  import time
10
  import urllib.request
11
  import urllib.parse
@@ -17,8 +19,34 @@ from openai import OpenAI
17
 
18
  from utils.prompts import PROMPT_MAP, CATEGORY_GUIDANCE
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  # Module-level cache for Nominatim geocoding results
21
  _GEOCODE_CACHE: dict[str, dict | None] = {}
 
22
 
23
  # Module-level cache for image enrichment results — keyed by (name, city, country) -> image URL
24
  # Never cleared, survives "Clear" clicks. Image URLs are stable per attraction.
@@ -446,7 +474,6 @@ def _enrich_with_images(items: list[dict], city: str = "", country: str = "") ->
446
 
447
  def _haversine_km(lat1, lon1, lat2, lon2):
448
  """Return distance in km between two lat/lon pairs."""
449
- import math
450
  R = 6371.0
451
  dlat = math.radians(lat2 - lat1)
452
  dlon = math.radians(lon2 - lon1)
@@ -466,6 +493,7 @@ def _nominatim_search_cached(query: str, timeout: int = 10) -> tuple[dict | None
466
  time.sleep(1.01) # Nominatim rate limit: 1 req/s (only on actual API calls)
467
  if data and isinstance(data, list) and data:
468
  _GEOCODE_CACHE[query] = data[0]
 
469
  return data[0], False
470
  _GEOCODE_CACHE[query] = None
471
  return None, False
@@ -489,14 +517,14 @@ def _geocode_city(city: str) -> tuple[float, float, list[float]] | None:
489
 
490
 
491
  def _verify_coordinates(items: list[dict], city: str) -> list[dict]:
492
- """Verify attraction coordinates by forward-geocoding every item via Nominatim.
493
- The LLM frequently fabricates coordinates — it may put Kiyomizu-dera (Kyoto)
494
- at fake Tokyo coords, or include Himeji Castle with fake local coords.
495
 
496
- Strategy: geocode each attraction name + city via Nominatim, then verify the
497
- result's display_name actually mentions the target city. If not found with
498
- the city qualifier, try without it if the real location is in a different
499
- city, drop the item.
 
 
500
  """
501
  # Geocode city center (cached — sleep handled internally)
502
  city_result = _geocode_city(city)
@@ -516,6 +544,23 @@ def _verify_coordinates(items: list[dict], city: str) -> list[dict]:
516
  verified.append(item)
517
  continue
518
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
519
  # Step 1: Try geocode with city qualifier (cached — sleep handled internally)
520
  query = f"{clean_name}, {city}"
521
  result1, _ = _nominatim_search_cached(query)
@@ -610,42 +655,51 @@ def _verify_coordinates(items: list[dict], city: str) -> list[dict]:
610
 
611
 
612
  def _get_providers() -> list[_Provider]:
613
- """Return ordered list of providers to try (primary first, then fallbacks).
614
 
615
  Reads provider configs from environment variables. Each provider must have
616
  its own API key, base URL, and model. Providers without an API key are
617
  skipped so you can enable/disable them by setting/clearing env vars.
618
- Legacy OPENAI_API_KEY / LLM_MODEL vars are NOT used — use the per-provider vars instead.
619
  """
620
  providers: list[_Provider] = []
621
 
622
- # Gemini (primary)
623
- gemini_key = os.environ.get("GEMINI_API_KEY", "")
624
- if gemini_key:
625
  providers.append(_Provider(
626
- name="gemini",
627
- api_key=gemini_key,
628
- base_url=os.environ.get("GEMINI_BASE_URL", "https://generativelanguage.googleapis.com/v1beta/openai/"),
629
- model=os.environ.get("GEMINI_MODEL", "gemini-2.5-flash"),
630
  ))
631
 
632
- # OpenRouter specific free model (first fallback)
633
  or_key = os.environ.get("OPENROUTER_API_KEY", "")
634
  if or_key:
635
  providers.append(_Provider(
636
- name="openrouter",
637
  api_key=or_key,
638
  base_url=os.environ.get("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1"),
639
- model=os.environ.get("OPENROUTER_MODEL", "google/gemma-4-26b-a4b-it:free"),
640
  ))
641
 
642
- # OpenRouter /free router (last resort auto-routes to best available free model)
643
  if or_key:
644
  providers.append(_Provider(
645
- name="openrouter-free",
646
  api_key=or_key,
647
  base_url=os.environ.get("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1"),
648
- model="openrouter/free",
 
 
 
 
 
 
 
 
 
 
649
  ))
650
 
651
  return providers
@@ -757,14 +811,18 @@ Attractions:
757
 
758
  def _call_model(provider: _Provider, prompt: str, temperature: float = 0.1) -> list[dict] | None:
759
  """Call a single provider, parse JSON response, return items or None.
760
- Uses generous timeout and retries.
 
761
  """
762
  client = OpenAI(api_key=provider.api_key, base_url=provider.base_url)
763
  for attempt in range(3):
764
  try:
765
  response = client.chat.completions.create(
766
  model=provider.model,
767
- messages=[{"role": "user", "content": prompt}],
 
 
 
768
  temperature=temperature,
769
  max_tokens=3072,
770
  timeout=60,
@@ -830,8 +888,12 @@ def get_recommendations(
830
  for i, provider in enumerate(providers):
831
  items = _call_model(provider, prompt)
832
  if items:
833
- items = _enrich_with_images(items, city=city)
834
- items = _verify_coordinates(items, city)
 
 
 
 
835
  if items:
836
  if i == 0:
837
  primary_items = items
@@ -844,8 +906,11 @@ def get_recommendations(
844
  for provider in providers:
845
  items = _call_model(provider, prompt)
846
  if items:
847
- combined = _enrich_with_images(items, city=city)
848
- combined = _verify_coordinates(combined, city)
 
 
 
849
  if combined:
850
  primary_items = combined
851
  break
@@ -918,8 +983,11 @@ def get_recommendations(
918
  extras_items = _call_model(providers[0], extras_prompt)
919
 
920
  if extras_items:
921
- extras_items = _enrich_with_images(extras_items, city=city)
922
- extras_items = _verify_coordinates(extras_items, city)
 
 
 
923
  for item in extras_items:
924
  key = name_key(item)
925
  if key not in seen_names and key:
@@ -973,7 +1041,10 @@ Return ONLY the complete JSON array with both English and {second_language} fiel
973
  try:
974
  response = client.chat.completions.create(
975
  model=provider.model,
976
- messages=[{"role": "user", "content": prompt}],
 
 
 
977
  temperature=0,
978
  max_tokens=2048,
979
  )
 
4
  import hashlib
5
  import json
6
  import logging
7
+ import math
8
  import os
9
  import re
10
+ import threading
11
  import time
12
  import urllib.request
13
  import urllib.parse
 
19
 
20
  from utils.prompts import PROMPT_MAP, CATEGORY_GUIDANCE
21
 
22
+ # ── Disk-persisted geocode cache ──
23
+ _GEOCODE_CACHE_FILE = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), ".geocode_cache.json")
24
+ _GEOCODE_CACHE_LOCK = threading.Lock()
25
+
26
+
27
+ def _load_geocode_cache() -> None:
28
+ """Load geocode cache from disk on startup."""
29
+ try:
30
+ with open(_GEOCODE_CACHE_FILE) as f:
31
+ data = json.load(f)
32
+ if isinstance(data, dict):
33
+ _GEOCODE_CACHE.update(data)
34
+ except (FileNotFoundError, json.JSONDecodeError):
35
+ pass
36
+
37
+
38
+ def _save_geocode_cache() -> None:
39
+ """Persist geocode cache to disk."""
40
+ try:
41
+ with _GEOCODE_CACHE_LOCK:
42
+ with open(_GEOCODE_CACHE_FILE, "w") as f:
43
+ json.dump(_GEOCODE_CACHE, f)
44
+ except Exception:
45
+ pass
46
+
47
  # Module-level cache for Nominatim geocoding results
48
  _GEOCODE_CACHE: dict[str, dict | None] = {}
49
+ _load_geocode_cache() # Restore persisted cache from disk
50
 
51
  # Module-level cache for image enrichment results — keyed by (name, city, country) -> image URL
52
  # Never cleared, survives "Clear" clicks. Image URLs are stable per attraction.
 
474
 
475
  def _haversine_km(lat1, lon1, lat2, lon2):
476
  """Return distance in km between two lat/lon pairs."""
 
477
  R = 6371.0
478
  dlat = math.radians(lat2 - lat1)
479
  dlon = math.radians(lon2 - lon1)
 
493
  time.sleep(1.01) # Nominatim rate limit: 1 req/s (only on actual API calls)
494
  if data and isinstance(data, list) and data:
495
  _GEOCODE_CACHE[query] = data[0]
496
+ _save_geocode_cache()
497
  return data[0], False
498
  _GEOCODE_CACHE[query] = None
499
  return None, False
 
517
 
518
 
519
  def _verify_coordinates(items: list[dict], city: str) -> list[dict]:
520
+ """Verify attraction coordinates.
 
 
521
 
522
+ Strategy:
523
+ 1. Geocode city center (1 cached Nominatim query)
524
+ 2. For each item: if LLM-provided coords are non-zero and within 15km of
525
+ city center, trust them — skip Nominatim entirely.
526
+ 3. Only geocode items whose LLM coords fail the radius check.
527
+ This eliminates ~80% of Nominatim calls on a good LLM response.
528
  """
529
  # Geocode city center (cached — sleep handled internally)
530
  city_result = _geocode_city(city)
 
544
  verified.append(item)
545
  continue
546
 
547
+ # ── Fast path: check LLM-provided coords first ──
548
+ llm_lat = item.get("latitude")
549
+ llm_lon = item.get("longitude")
550
+ if llm_lat is not None and llm_lon is not None and city_center:
551
+ try:
552
+ f_lat = float(llm_lat)
553
+ f_lon = float(llm_lon)
554
+ except (ValueError, TypeError):
555
+ f_lat, f_lon = 0, 0
556
+ if f_lat != 0 and f_lon != 0:
557
+ dist = _haversine_km(city_center[0], city_center[1], f_lat, f_lon)
558
+ if dist <= MAX_CITY_DIST_KM:
559
+ # LLM coords are plausible — keep them, no Nominatim needed
560
+ verified.append(item)
561
+ continue
562
+
563
+ # ── Slow path: Nomatim geocoding when LLM coords aren't trustworthy ──
564
  # Step 1: Try geocode with city qualifier (cached — sleep handled internally)
565
  query = f"{clean_name}, {city}"
566
  result1, _ = _nominatim_search_cached(query)
 
655
 
656
 
657
  def _get_providers() -> list[_Provider]:
658
+ """Return ordered list of providers (fastest first, then fallbacks).
659
 
660
  Reads provider configs from environment variables. Each provider must have
661
  its own API key, base URL, and model. Providers without an API key are
662
  skipped so you can enable/disable them by setting/clearing env vars.
 
663
  """
664
  providers: list[_Provider] = []
665
 
666
+ # 1. DeepSeek V4 Flash on Ollama Cloud (primary, free tier available)
667
+ ollama_key = os.environ.get("OLLAMA_API_KEY", "")
668
+ if ollama_key:
669
  providers.append(_Provider(
670
+ name="ollama-cloud",
671
+ api_key=ollama_key,
672
+ base_url=os.environ.get("OLLAMA_BASE_URL", "https://ollama.com/v1"),
673
+ model=os.environ.get("OLLAMA_MODEL", "deepseek-v4-flash:cloud"),
674
  ))
675
 
676
+ # 2. DeepSeek V4 Flash via OpenRouter (first fallback)
677
  or_key = os.environ.get("OPENROUTER_API_KEY", "")
678
  if or_key:
679
  providers.append(_Provider(
680
+ name="openrouter-deepseek",
681
  api_key=or_key,
682
  base_url=os.environ.get("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1"),
683
+ model=os.environ.get("OPENROUTER_MODEL", "deepseek/deepseek-v4-flash:free"),
684
  ))
685
 
686
+ # 2. Gemma 4 26B via OpenRouter (first fallback)
687
  if or_key:
688
  providers.append(_Provider(
689
+ name="openrouter-gemma",
690
  api_key=or_key,
691
  base_url=os.environ.get("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1"),
692
+ model="google/gemma-4-26b-a4b-it:free",
693
+ ))
694
+
695
+ # 3. Gemini 2.5 Flash (final fallback)
696
+ gemini_key = os.environ.get("GEMINI_API_KEY", "")
697
+ if gemini_key:
698
+ providers.append(_Provider(
699
+ name="gemini",
700
+ api_key=gemini_key,
701
+ base_url=os.environ.get("GEMINI_BASE_URL", "https://generativelanguage.googleapis.com/v1beta/openai/"),
702
+ model=os.environ.get("GEMINI_MODEL", "gemini-2.5-flash"),
703
  ))
704
 
705
  return providers
 
811
 
812
  def _call_model(provider: _Provider, prompt: str, temperature: float = 0.1) -> list[dict] | None:
813
  """Call a single provider, parse JSON response, return items or None.
814
+ Uses generous timeout and retries. Includes a system message to suppress
815
+ internal reasoning — cuts response time by ~60% on reasoning models.
816
  """
817
  client = OpenAI(api_key=provider.api_key, base_url=provider.base_url)
818
  for attempt in range(3):
819
  try:
820
  response = client.chat.completions.create(
821
  model=provider.model,
822
+ messages=[
823
+ {"role": "system", "content": "You are a travel expert. Output ONLY valid JSON. Do NOT reason or think step by step. Respond instantly with the JSON array."},
824
+ {"role": "user", "content": prompt},
825
+ ],
826
  temperature=temperature,
827
  max_tokens=3072,
828
  timeout=60,
 
888
  for i, provider in enumerate(providers):
889
  items = _call_model(provider, prompt)
890
  if items:
891
+ # Run enrich + verify in parallel — they modify different keys
892
+ with concurrent.futures.ThreadPoolExecutor(max_workers=2) as pool:
893
+ ef = pool.submit(_enrich_with_images, items, city=city)
894
+ vf = pool.submit(_verify_coordinates, items, city)
895
+ concurrent.futures.wait([ef, vf])
896
+ items = vf.result()
897
  if items:
898
  if i == 0:
899
  primary_items = items
 
906
  for provider in providers:
907
  items = _call_model(provider, prompt)
908
  if items:
909
+ with concurrent.futures.ThreadPoolExecutor(max_workers=2) as pool:
910
+ ef = pool.submit(_enrich_with_images, items, city=city)
911
+ vf = pool.submit(_verify_coordinates, items, city)
912
+ concurrent.futures.wait([ef, vf])
913
+ combined = vf.result()
914
  if combined:
915
  primary_items = combined
916
  break
 
983
  extras_items = _call_model(providers[0], extras_prompt)
984
 
985
  if extras_items:
986
+ with concurrent.futures.ThreadPoolExecutor(max_workers=2) as pool:
987
+ ef = pool.submit(_enrich_with_images, extras_items, city=city)
988
+ vf = pool.submit(_verify_coordinates, extras_items, city)
989
+ concurrent.futures.wait([ef, vf])
990
+ extras_items = vf.result()
991
  for item in extras_items:
992
  key = name_key(item)
993
  if key not in seen_names and key:
 
1041
  try:
1042
  response = client.chat.completions.create(
1043
  model=provider.model,
1044
+ messages=[
1045
+ {"role": "system", "content": "You are a professional translator. Output ONLY valid JSON. Do NOT reason or think step by step."},
1046
+ {"role": "user", "content": prompt},
1047
+ ],
1048
  temperature=0,
1049
  max_tokens=2048,
1050
  )
src/utils/prompts.py CHANGED
@@ -2,17 +2,12 @@
2
 
3
  ATTRACTIONS_PROMPT = """You are a travel expert. List the top {num_attractions} {category_prompt}
4
 
5
- CRITICAL: Each entry must be ONE SINGLE attraction or place. Do NOT combine multiple places with "&", "and", "/", or commas in the name field. For example, "Meiji Shrine" not "Meiji Shrine & Yoyogi Park".
 
 
6
 
7
- For each entry, provide:
8
- 1. **Name** the single place name only
9
- 2. **Description** — a short description of why it's worth visiting (between 120 and 125 characters)
10
- 3. **Short description** — a one-liner summary (max 25 characters)
11
- 4. **Tip** — one practical tip for visitors (max 60 characters, e.g., best time to visit, ticket info, how to skip lines)
12
- 5. **Latitude** — the latitude as a number (e.g. 48.8584)
13
- 6. **Longitude** — the longitude as a number (e.g. 2.2945)
14
- Return the result as a JSON array with {num_attractions} objects, each having keys: "name", "description", "short_description", "tip", "latitude", "longitude".
15
- Only return valid JSON, no markdown fences or extra text."""
16
 
17
  PROMPT_MAP = {
18
  "attractions": ATTRACTIONS_PROMPT,
 
2
 
3
  ATTRACTIONS_PROMPT = """You are a travel expert. List the top {num_attractions} {category_prompt}
4
 
5
+ Rules:
6
+ - Each entry is ONE attraction only (no "&", "and", "/" in name)
7
+ - Description: 120-125 chars · Short description: max 25 chars · Tip: max 60 chars, practical advice
8
 
9
+ Return JSON array with keys: name, description, short_description, tip, latitude, longitude.
10
+ Only valid JSON, no markdown fences or extra text."""
 
 
 
 
 
 
 
11
 
12
  PROMPT_MAP = {
13
  "attractions": ATTRACTIONS_PROMPT,