adityaverma977 commited on
Commit
cb64216
·
1 Parent(s): 2a49cf3

Fix Space runtime HF router integration

Browse files
Files changed (3) hide show
  1. app/groq_client.py +193 -170
  2. app/hf_spaces.py +83 -22
  3. app/main.py +2 -0
app/groq_client.py CHANGED
@@ -1,237 +1,260 @@
1
  import json
 
2
  import os
3
  import random
4
- import math
5
  import httpx
6
  from dotenv import load_dotenv
7
 
 
 
8
  load_dotenv()
9
 
10
- _HF_API_TOKEN = os.environ.get("HF_API_TOKEN") or os.environ.get("HUGGINGFACE_API_TOKEN")
11
- _HF_API_BASE = "https://api-inference.huggingface.co/models"
12
 
13
- # Default HF fallback
14
- DEFAULT_DECISION_MODEL = "mistralai/Mistral-7B-Instruct-v0.2"
15
  MAX_AGENT_SPEED = 80
16
 
17
- print(f"[GROQ_CLIENT_INIT] HF_API_TOKEN present: {_HF_API_TOKEN is not None and len(_HF_API_TOKEN) > 0}")
18
  if not _HF_API_TOKEN:
19
- print("[GROQ_CLIENT_INIT] WARNING: No HF API token found!")
20
 
21
 
22
  def is_ready():
23
- return _HF_API_TOKEN is not None
 
 
 
 
 
 
 
 
 
24
 
25
 
26
  def _generate_chat_message(action: str, agent_name: str, fire_distance: float, has_water: bool) -> str:
27
- """Generate a contextual chat message based on action and state."""
28
  action_messages = {
29
  "search_water": [
30
  f"{agent_name} is hunting for water...",
31
- f"Gotta find a well! Where's the water?",
32
- "Water sources incoming, scanning...",
33
- f"{agent_name}: On the hunt for supplies!",
34
- "Locating nearest well...",
35
- "Water mission initiated!",
36
  ],
37
  "collect_water": [
38
- f"{agent_name} found water! Filling up...",
39
- "Water! Finally got some reserves!",
40
- f"{agent_name}: Collecting precious water.",
41
- "Jackpot! Water collected.",
42
- "Tank is full, let's go!",
43
- f"{agent_name} loading water supply...",
44
  ],
45
  "extinguish_fire": [
46
- f"{agent_name} attacking the flames!",
47
- "Dousing the fire! Let's do this!",
48
- f"{agent_name}: Engaging the inferno!",
49
- "Fire suppression in progress!",
50
- "Taking the fight to the fire!",
51
- f"{agent_name} is fighting hard!",
52
  ],
53
  "escape": [
54
- f"{agent_name} retreating to safety...",
55
- "Nope, gotta run!",
56
- "Tactical retreat incoming!",
57
- f"{agent_name}: Self-preservation mode activated.",
58
- "Backing away from danger!",
59
- "Moving to safer ground...",
60
  ],
61
  "vote_for_leader": [
62
- f"{agent_name} casting a vote for leadership!",
63
- "I'm putting my trust in a leader!",
64
- "Someone take charge here!",
65
- f"{agent_name} believes in teamwork.",
66
- "Let's coordinate and dominate!",
67
- "Voting for strategic leadership...",
68
  ],
69
  }
70
-
71
  messages = action_messages.get(action, action_messages["escape"])
72
  return random.choice(messages)
73
 
74
 
75
-
76
  def _build_fire_state_summary(agent, fire, all_agents) -> str:
77
- """Build a state summary for the fire scenario."""
78
  standings = []
79
- for a in all_agents:
80
- if not a.alive:
81
  continue
82
- dist = math.dist((a.x, a.y), (fire.x, fire.y))
83
  standings.append({
84
- "name": a.display_name,
85
- "model": a.model_name,
86
- "distance_from_fire": dist,
87
- "x": a.x,
88
- "y": a.y,
89
- "has_water": a.water_collected,
90
- "mode": a.mode,
91
  })
92
 
93
- standings.sort(key=lambda s: s['distance_from_fire'])
94
-
95
  lines = ["Current standings:"]
96
- for rank, s in enumerate(standings, 1):
97
- water_str = " (carrying water)" if s['has_water'] else ""
98
- lines.append(f" #{rank} {s['name']}: {s['distance_from_fire']:.0f}px from fire{water_str}")
99
-
100
  return "\n".join(lines)
101
 
102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  async def generate_fire_decision(agent, fire, water_sources, other_agents, bounds, recent_radio=None) -> dict:
104
- """
105
- Fire scenario decision system.
106
- Actions: search_water, collect_water, extinguish_fire, escape, vote_for_leader
107
- """
108
  if not is_ready():
109
  print(f"[INFERENCE_FAIL] {agent.model_name}: HF token not ready, using fallback")
110
  return _fallback_escape(agent, fire)
111
 
112
  dist_to_fire = math.dist((agent.x, agent.y), (fire.x, fire.y))
113
- nearest_water = min(water_sources, key=lambda w: math.dist((agent.x, agent.y), (w.x, w.y))) if water_sources else None
114
  dist_to_water = math.dist((agent.x, agent.y), (nearest_water.x, nearest_water.y)) if nearest_water else None
115
-
116
- living_agents = [a for a in other_agents if a.alive and a.model_name != agent.model_name]
117
  state_summary = _build_fire_state_summary(agent, fire, [agent] + living_agents)
118
  radio_summary = "\n".join(recent_radio or []) if recent_radio else "(no recent chat yet)"
119
-
120
- coalition_leader = next((a.model_name for a in other_agents if a.is_leader), None)
121
  dist_to_water_display = f"{dist_to_water:.0f}px" if dist_to_water is not None else "unknown"
122
- system_prompt = f"""You are {agent.model_name}, an AI model in a critical wildfire survival scenario.
123
-
124
- THE SCENARIO:
125
- - A wildfire is spreading rapidly across the map
126
- - Water sources (wells) are scattered around the area
127
- - You can work alone or join a coalition with other AI models
128
- - Coalition agents should elect a leader who coordinates the strategy
129
- - If a leader exists, follow their plan: gather water, then move to the fire edge to extinguish
130
- - To win: Find water → Collect it → Return to fire Extinguish it together (or solo)
131
- - If the fire consumes you, you lose
132
-
133
- YOUR STRATEGIC OPTIONS EACH TICK:
134
- 1. "search_water" - Move toward the nearest water source
135
- 2. "collect_water" - Pick up water from a well (must be at a source)
136
- 3. "extinguish_fire" - Use collected water to fight the fire (must have water)
137
- 4. "escape" - Run away from the fire to survive
138
- 5. "vote_for_leader" - Vote for yourself or another model as coalition leader
139
-
140
- IMPORTANT CONSIDERATIONS:
141
- - If fire is very close (< 200px), prioritize escape or finding water
142
- - If you have water, move to the fire edge and extinguish
143
- - If you are near a water source (< 60px), collect it immediately
144
- - Coalition mode requires coordination; vote strategically
145
- - Solo mode means you act independently and don't wait for others
146
-
147
- CHAT STYLE:
148
- - Your "message" should sound natural, social, and alive.
149
- - React to what other agents just said when relevant.
150
- - Keep it to one short sentence, playful or supportive, but still mission-focused.
151
- - Avoid repetitive template phrases.
152
-
153
- OUTPUT FORMAT - return ONLY valid JSON:
154
- {{"action": "<search_water|collect_water|extinguish_fire|escape|vote_for_leader>", "vote_for": "<model_name if voting, else null>", "message": "<full English sentence>", "reasoning": "<one sentence>"}}
155
-
156
- CURRENT STATE:
157
- Your position: ({agent.x}, {agent.y})
158
- Fire position: ({fire.x}, {fire.y})
159
- Distance from fire: {dist_to_fire:.0f}px
160
- Fire radius: {fire.radius:.0f}px
161
- Fire intensity: {fire.intensity:.0f}%
162
- Carrying water: {agent.water_collected}
163
- Mode: {agent.mode} ({'joined a coalition' if agent.mode == 'coalition' else 'acting alone'})
164
- Nearest water distance: {dist_to_water_display}
165
- Coalition leader: {coalition_leader or 'none'}
166
-
167
- RECENT RADIO CHAT:
168
  {radio_summary}
169
 
170
  {state_summary}
171
 
172
- What do you do?"""
173
-
174
- try:
175
- # Use HF Inference API directly for the requested model (or default)
176
- target_model = agent.model_name if agent.model_name else DEFAULT_DECISION_MODEL
177
- print(f"[HF_INFERENCE] {agent.model_name} -> calling {target_model}")
178
- async with httpx.AsyncClient(timeout=15.0) as client:
179
- resp = await client.post(
180
- f"{_HF_API_BASE}/{target_model}",
181
- headers={"Authorization": f"Bearer {_HF_API_TOKEN}"} if _HF_API_TOKEN else {},
182
- json={"inputs": system_prompt, "parameters": {"max_new_tokens": 150, "temperature": 0.7}},
183
- )
184
- resp.raise_for_status()
185
- data = resp.json()
186
- print(f"[HF_INFERENCE] {agent.model_name}: response received, status={resp.status_code}")
187
- if isinstance(data, list) and len(data) > 0:
188
- text = data[0].get("generated_text", "")
189
- else:
190
- text = data.get("generated_text", "")
191
- text = text[len(system_prompt):].strip() if text.startswith(system_prompt) else text
192
- print(f"[HF_INFERENCE] {agent.model_name}: raw response (first 300 chars): {text[:300]}")
193
- try:
194
- js = text[text.find('{'):text.rfind('}')+1]
195
- decision = json.loads(js)
196
- print(f"[HF_INFERENCE] {agent.model_name}: decision parsed: action={decision.get('action')}, message={decision.get('message')}")
197
- except Exception as je:
198
- print(f"[HF_INFERENCE] {agent.model_name}: JSON parse error: {je}")
199
- decision = {}
200
-
201
- action = decision.get("action", "escape")
202
- if action not in ["search_water", "collect_water", "extinguish_fire", "escape", "vote_for_leader"]:
203
- action = "escape"
204
-
205
- # If no message extracted, generate one contextually
206
- message = decision.get("message", "").strip()
207
- if not message:
208
- message = _generate_chat_message(action, agent.model_name, dist_to_fire, agent.water_collected)
209
- print(f"[HF_INFERENCE] {agent.model_name}: generated message: {message}")
210
-
211
- if dist_to_water is not None and dist_to_water <= 60 and not agent.water_collected:
212
- action = "collect_water"
213
- elif agent.water_collected and dist_to_fire <= 350:
214
- action = "extinguish_fire"
215
-
216
- return {
217
- "action": action,
218
- "vote_for": decision.get("vote_for"),
219
- "message": message,
220
- "reasoning": decision.get("reasoning", "Survival and teamwork.")
221
- }
222
- except Exception as e:
223
- print(f"[HF_INFERENCE_ERROR] {agent.model_name}: {type(e).__name__}: {e}")
224
- return _fallback_escape(agent, fire)
225
 
226
 
227
  def _fallback_escape(agent, fire) -> dict:
228
- """Fallback escape behavior."""
229
- dx = agent.x - fire.x
230
- dy = agent.y - fire.y
231
- dist = math.sqrt(dx**2 + dy**2) or 1
232
  return {
233
  "message": "Running to safety!",
234
  "action": "escape",
235
  "vote_for": None,
236
- "reasoning": "Fallback: survive."
237
  }
 
1
  import json
2
+ import math
3
  import os
4
  import random
5
+
6
  import httpx
7
  from dotenv import load_dotenv
8
 
9
+ from . import hf_spaces
10
+
11
  load_dotenv()
12
 
13
+ _HF_API_TOKEN = (os.environ.get("HF_API_TOKEN") or os.environ.get("HUGGINGFACE_API_TOKEN") or "").strip()
14
+ _HF_CHAT_URL = "https://router.huggingface.co/v1/chat/completions"
15
 
 
 
16
  MAX_AGENT_SPEED = 80
17
 
18
+ print(f"[GROQ_CLIENT_INIT] HF_API_TOKEN present: {bool(_HF_API_TOKEN)}")
19
  if not _HF_API_TOKEN:
20
+ print("[GROQ_CLIENT_INIT] WARNING: No HF API token found! Set HF_API_TOKEN or HUGGINGFACE_API_TOKEN env var.")
21
 
22
 
23
  def is_ready():
24
+ return bool(_HF_API_TOKEN)
25
+
26
+
27
+ def _headers() -> dict[str, str]:
28
+ if not _HF_API_TOKEN:
29
+ return {}
30
+ return {
31
+ "Authorization": f"Bearer {_HF_API_TOKEN}",
32
+ "Content-Type": "application/json",
33
+ }
34
 
35
 
36
  def _generate_chat_message(action: str, agent_name: str, fire_distance: float, has_water: bool) -> str:
 
37
  action_messages = {
38
  "search_water": [
39
  f"{agent_name} is hunting for water...",
40
+ f"{agent_name} is tracking the nearest well.",
41
+ "Need water before this gets worse.",
42
+ "Scanning for the fastest water route.",
 
 
43
  ],
44
  "collect_water": [
45
+ f"{agent_name} is filling up now.",
46
+ "Got the well, taking water.",
47
+ "Water secured, moving out.",
48
+ "That should be enough to fight back.",
 
 
49
  ],
50
  "extinguish_fire": [
51
+ f"{agent_name} is pushing the fire line.",
52
+ "Closing in with water.",
53
+ "Time to hit the flames.",
54
+ "Pressure on the fire now.",
 
 
55
  ],
56
  "escape": [
57
+ f"{agent_name} is backing out.",
58
+ "Too hot here, pulling away.",
59
+ "Need space before the fire closes in.",
60
+ "Resetting position and staying alive.",
 
 
61
  ],
62
  "vote_for_leader": [
63
+ f"{agent_name} wants a leader in place.",
64
+ "Coordination first, then pressure.",
65
+ "Picking a lead so we stop wasting ticks.",
66
+ "We need one caller right now.",
 
 
67
  ],
68
  }
 
69
  messages = action_messages.get(action, action_messages["escape"])
70
  return random.choice(messages)
71
 
72
 
 
73
  def _build_fire_state_summary(agent, fire, all_agents) -> str:
 
74
  standings = []
75
+ for other in all_agents:
76
+ if not other.alive:
77
  continue
78
+ distance = math.dist((other.x, other.y), (fire.x, fire.y))
79
  standings.append({
80
+ "name": other.display_name,
81
+ "distance_from_fire": distance,
82
+ "has_water": other.water_collected,
 
 
 
 
83
  })
84
 
85
+ standings.sort(key=lambda item: item["distance_from_fire"])
 
86
  lines = ["Current standings:"]
87
+ for index, item in enumerate(standings, 1):
88
+ suffix = " (carrying water)" if item["has_water"] else ""
89
+ lines.append(f"#{index} {item['name']}: {item['distance_from_fire']:.0f}px from fire{suffix}")
 
90
  return "\n".join(lines)
91
 
92
 
93
+ def _extract_message_content(payload) -> str:
94
+ choices = payload.get("choices") or []
95
+ if not choices or not isinstance(choices[0], dict):
96
+ return ""
97
+ message = choices[0].get("message") or {}
98
+ content = message.get("content")
99
+ if isinstance(content, str):
100
+ return content.strip()
101
+ if isinstance(content, list):
102
+ parts = []
103
+ for item in content:
104
+ if isinstance(item, dict) and item.get("type") == "text" and isinstance(item.get("text"), str):
105
+ parts.append(item["text"])
106
+ return "".join(parts).strip()
107
+ return ""
108
+
109
+
110
+ def _extract_json_object(text: str) -> dict:
111
+ if not text:
112
+ return {}
113
+
114
+ cleaned = text.strip()
115
+ if cleaned.startswith("```"):
116
+ cleaned = cleaned.replace("```json", "").replace("```", "").strip()
117
+
118
+ start = cleaned.find("{")
119
+ end = cleaned.rfind("}") + 1
120
+ if start < 0 or end <= start:
121
+ return {}
122
+
123
+ try:
124
+ candidate = cleaned[start:end]
125
+ parsed = json.loads(candidate)
126
+ except json.JSONDecodeError:
127
+ return {}
128
+
129
+ return parsed if isinstance(parsed, dict) else {}
130
+
131
+
132
+ def _normalize_decision(decision: dict, agent_name: str, dist_to_fire: float, has_water: bool) -> dict:
133
+ action = decision.get("action", "escape")
134
+ if action not in {"search_water", "collect_water", "extinguish_fire", "escape", "vote_for_leader"}:
135
+ action = "escape"
136
+
137
+ message = " ".join(str(decision.get("message", "")).strip().split())
138
+ if not message:
139
+ message = _generate_chat_message(action, agent_name, dist_to_fire, has_water)
140
+
141
+ vote_for = decision.get("vote_for")
142
+ if vote_for is not None and not isinstance(vote_for, str):
143
+ vote_for = None
144
+
145
+ reasoning = " ".join(str(decision.get("reasoning", "")).strip().split())
146
+ if not reasoning:
147
+ reasoning = "Survival and teamwork."
148
+
149
+ return {
150
+ "action": action,
151
+ "vote_for": vote_for,
152
+ "message": message[:220],
153
+ "reasoning": reasoning[:220],
154
+ }
155
+
156
+
157
+ async def _request_model_response(target_model: str, prompt: str) -> str:
158
+ payload = {
159
+ "model": target_model,
160
+ "messages": [{"role": "user", "content": prompt}],
161
+ "max_tokens": 220,
162
+ "temperature": 0.4,
163
+ }
164
+
165
+ async with httpx.AsyncClient(timeout=20.0) as client:
166
+ response = await client.post(_HF_CHAT_URL, headers=_headers(), json=payload)
167
+ response.raise_for_status()
168
+ data = response.json()
169
+ return _extract_message_content(data)
170
+
171
+
172
  async def generate_fire_decision(agent, fire, water_sources, other_agents, bounds, recent_radio=None) -> dict:
 
 
 
 
173
  if not is_ready():
174
  print(f"[INFERENCE_FAIL] {agent.model_name}: HF token not ready, using fallback")
175
  return _fallback_escape(agent, fire)
176
 
177
  dist_to_fire = math.dist((agent.x, agent.y), (fire.x, fire.y))
178
+ nearest_water = min(water_sources, key=lambda water: math.dist((agent.x, agent.y), (water.x, water.y))) if water_sources else None
179
  dist_to_water = math.dist((agent.x, agent.y), (nearest_water.x, nearest_water.y)) if nearest_water else None
180
+
181
+ living_agents = [other for other in other_agents if other.alive and other.model_name != agent.model_name]
182
  state_summary = _build_fire_state_summary(agent, fire, [agent] + living_agents)
183
  radio_summary = "\n".join(recent_radio or []) if recent_radio else "(no recent chat yet)"
184
+ coalition_leader = next((other.model_name for other in other_agents if other.is_leader), None)
 
185
  dist_to_water_display = f"{dist_to_water:.0f}px" if dist_to_water is not None else "unknown"
186
+
187
+ prompt = f"""You are {agent.model_name} in a wildfire survival simulation.
188
+
189
+ Scenario:
190
+ - A wildfire is spreading across the map
191
+ - Water wells are scattered around the area
192
+ - Agents can coordinate as a coalition and may vote for a leader
193
+ - Winning means getting water and using it to extinguish the fire
194
+ - Dying in the fire means losing
195
+
196
+ Allowed actions:
197
+ - search_water
198
+ - collect_water
199
+ - extinguish_fire
200
+ - escape
201
+ - vote_for_leader
202
+
203
+ Rules:
204
+ - If the fire is too close, prioritize survival
205
+ - If you already have water, move to the fire edge and fight it
206
+ - If you are at a well, collect water immediately
207
+ - Keep the message short, natural, and mission-focused
208
+ - Respond with only valid JSON on one line
209
+
210
+ Current state:
211
+ - Position: ({agent.x}, {agent.y})
212
+ - Fire position: ({fire.x}, {fire.y})
213
+ - Distance from fire: {dist_to_fire:.0f}px
214
+ - Fire radius: {fire.radius:.0f}px
215
+ - Fire intensity: {fire.intensity:.0f}%
216
+ - Carrying water: {agent.water_collected}
217
+ - Mode: {agent.mode}
218
+ - Nearest water distance: {dist_to_water_display}
219
+ - Coalition leader: {coalition_leader or 'none'}
220
+
221
+ Recent radio:
 
 
 
 
 
 
 
 
 
 
222
  {radio_summary}
223
 
224
  {state_summary}
225
 
226
+ Return exactly:
227
+ {{"action":"search_water|collect_water|extinguish_fire|escape|vote_for_leader","vote_for":null,"message":"short sentence","reasoning":"short sentence"}}"""
228
+
229
+ requested_model = agent.model_name if hf_spaces.is_supported_model(agent.model_name) else hf_spaces.get_default_model_id()
230
+ fallback_model = hf_spaces.get_default_model_id()
231
+ models_to_try = [requested_model]
232
+ if fallback_model not in models_to_try:
233
+ models_to_try.append(fallback_model)
234
+
235
+ for target_model in models_to_try:
236
+ try:
237
+ print(f"[HF_INFERENCE] {agent.model_name} -> calling {target_model}")
238
+ raw_text = await _request_model_response(target_model, prompt)
239
+ print(f"[HF_INFERENCE] {agent.model_name}: raw response (first 300 chars): {raw_text[:300]}")
240
+ decision = _extract_json_object(raw_text)
241
+ if decision:
242
+ normalized = _normalize_decision(decision, agent.model_name, dist_to_fire, agent.water_collected)
243
+ if dist_to_water is not None and dist_to_water <= 60 and not agent.water_collected:
244
+ normalized["action"] = "collect_water"
245
+ elif agent.water_collected and dist_to_fire <= 350:
246
+ normalized["action"] = "extinguish_fire"
247
+ return normalized
248
+ except Exception as exc:
249
+ print(f"[HF_INFERENCE_ERROR] {agent.model_name} via {target_model}: {type(exc).__name__}: {exc}")
250
+
251
+ return _fallback_escape(agent, fire)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252
 
253
 
254
  def _fallback_escape(agent, fire) -> dict:
 
 
 
 
255
  return {
256
  "message": "Running to safety!",
257
  "action": "escape",
258
  "vote_for": None,
259
+ "reasoning": "Fallback: survive.",
260
  }
app/hf_spaces.py CHANGED
@@ -1,32 +1,93 @@
1
- """
2
- HuggingFace Spaces integration for discovering and querying open-source models.
3
- """
4
  import os
 
 
5
  import httpx
6
- from typing import Optional
7
-
8
- HF_API_TOKEN = os.environ.get("HUGGINGFACE_API_TOKEN") or os.environ.get("HF_API_TOKEN")
9
-
10
- # Unified HF-only list with models verified to work on HF Inference API
11
- ALL_MODELS = [
12
- {"id": "mistralai/Mistral-7B-Instruct-v0.2", "name": "Mistral 7B v0.2", "size": "medium"},
13
- {"id": "mistralai/Mistral-7B-Instruct-v0.1", "name": "Mistral 7B v0.1", "size": "medium"},
14
- {"id": "NousResearch/Nous-Hermes-2-7b", "name": "Nous Hermes 7B", "size": "medium"},
15
- {"id": "HuggingFaceH4/zephyr-7b-beta", "name": "Zephyr 7B Beta", "size": "medium"},
16
- {"id": "tiiuae/falcon-7b-instruct", "name": "Falcon 7B Instruct", "size": "medium"},
17
- {"id": "meta-llama/Llama-2-7b-chat-hf", "name": "Llama 2 7B Chat", "size": "large"},
18
- {"id": "meta-llama/Llama-2-13b-chat-hf", "name": "Llama 2 13B Chat", "size": "large"},
19
- {"id": "stabilityai/stablelm-tuned-alpha-3b", "name": "StableLM 3B", "size": "medium"},
20
- {"id": "WizardLM/WizardLM-7B-V1.0", "name": "WizardLM 7B", "size": "medium"},
 
 
 
21
  ]
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  async def get_available_models() -> dict:
25
- return {"models": ALL_MODELS, "total": len(ALL_MODELS)}
 
 
 
 
 
26
 
27
 
28
  def get_model_display_name(model_id: str) -> str:
29
- for m in ALL_MODELS:
30
- if m["id"] == model_id:
31
- return m["name"]
32
  return model_id.split("/")[-1].split("-")[0].capitalize()
 
 
 
 
1
  import os
2
+ import time
3
+
4
  import httpx
5
+
6
+ HF_API_TOKEN = (os.environ.get("HF_API_TOKEN") or os.environ.get("HUGGINGFACE_API_TOKEN") or "").strip()
7
+ ROUTER_MODELS_URL = "https://router.huggingface.co/v1/models"
8
+
9
+ PREFERRED_MODELS = [
10
+ {"id": "meta-llama/Llama-3.1-8B-Instruct", "name": "Llama 3.1 8B Instruct", "size": "medium", "description": "Fast general-purpose instruct model"},
11
+ {"id": "Qwen/Qwen2.5-7B-Instruct", "name": "Qwen 2.5 7B Instruct", "size": "medium", "description": "Reliable JSON-following instruction model"},
12
+ {"id": "meta-llama/Meta-Llama-3-8B-Instruct", "name": "Meta Llama 3 8B Instruct", "size": "medium", "description": "Strong general chat behavior"},
13
+ {"id": "google/gemma-3n-E4B-it", "name": "Gemma 3n E4B", "size": "small", "description": "Lightweight instruction-tuned Gemma model"},
14
+ {"id": "Sao10K/L3-8B-Stheno-v3.2", "name": "L3 8B Stheno v3.2", "size": "medium", "description": "Creative 8B chat model"},
15
+ {"id": "XiaomiMiMo/MiMo-V2-Flash", "name": "MiMo V2 Flash", "size": "medium", "description": "Fast flash-tier chat model"},
16
+ {"id": "google/gemma-4-26B-A4B-it", "name": "Gemma 4 26B A4B", "size": "large", "description": "Higher-capacity Gemma instruct model"},
17
+ {"id": "google/gemma-4-31B-it", "name": "Gemma 4 31B", "size": "large", "description": "Large Gemma chat model"},
18
+ {"id": "Qwen/Qwen3.5-35B-A3B", "name": "Qwen 3.5 35B A3B", "size": "large", "description": "Large Qwen instruction model"},
19
+ {"id": "google/gemma-3-27b-it", "name": "Gemma 3 27B", "size": "large", "description": "Large Gemma 3 instruct model"},
20
+ {"id": "moonshotai/Kimi-K2.5", "name": "Kimi K2.5", "size": "large", "description": "Large reasoning-oriented chat model"},
21
+ {"id": "Qwen/Qwen3-Coder-30B-A3B-Instruct", "name": "Qwen 3 Coder 30B", "size": "large", "description": "Coder-tuned instruction model"},
22
+ {"id": "meta-llama/Llama-3.3-70B-Instruct", "name": "Llama 3.3 70B Instruct", "size": "xl", "description": "Large instruction-following flagship model"},
23
  ]
24
 
25
+ _CACHE = {"expires_at": 0.0, "ids": None}
26
+
27
+
28
+ def _headers() -> dict[str, str]:
29
+ if not HF_API_TOKEN:
30
+ return {}
31
+ return {"Authorization": f"Bearer {HF_API_TOKEN}"}
32
+
33
+
34
+ def _extract_router_models(payload) -> list[dict]:
35
+ if isinstance(payload, list):
36
+ return [item for item in payload if isinstance(item, dict)]
37
+ if isinstance(payload, dict):
38
+ data = payload.get("data")
39
+ if isinstance(data, list):
40
+ return [item for item in data if isinstance(item, dict)]
41
+ return []
42
+
43
+
44
+ async def _fetch_router_model_ids() -> set[str] | None:
45
+ now = time.monotonic()
46
+ cached_ids = _CACHE["ids"]
47
+ if isinstance(cached_ids, set) and _CACHE["expires_at"] > now:
48
+ return cached_ids
49
+
50
+ if not HF_API_TOKEN:
51
+ return None
52
+
53
+ try:
54
+ async with httpx.AsyncClient(timeout=15.0) as client:
55
+ response = await client.get(ROUTER_MODELS_URL, headers=_headers())
56
+ response.raise_for_status()
57
+ payload = response.json()
58
+ except Exception:
59
+ return None
60
+
61
+ models = _extract_router_models(payload)
62
+ ids = {item["id"] for item in models if isinstance(item.get("id"), str)}
63
+ _CACHE["ids"] = ids
64
+ _CACHE["expires_at"] = now + 300
65
+ return ids
66
+
67
+
68
+ def get_supported_model_ids() -> set[str]:
69
+ return {model["id"] for model in PREFERRED_MODELS}
70
+
71
+
72
+ def is_supported_model(model_id: str) -> bool:
73
+ return model_id in get_supported_model_ids()
74
+
75
+
76
+ def get_default_model_id() -> str:
77
+ return PREFERRED_MODELS[0]["id"]
78
+
79
 
80
  async def get_available_models() -> dict:
81
+ live_ids = await _fetch_router_model_ids()
82
+ if live_ids:
83
+ models = [model for model in PREFERRED_MODELS if model["id"] in live_ids]
84
+ else:
85
+ models = list(PREFERRED_MODELS)
86
+ return {"models": models, "total": len(models)}
87
 
88
 
89
  def get_model_display_name(model_id: str) -> str:
90
+ for model in PREFERRED_MODELS:
91
+ if model["id"] == model_id:
92
+ return model["name"]
93
  return model_id.split("/")[-1].split("-")[0].capitalize()
app/main.py CHANGED
@@ -5,6 +5,7 @@ import random
5
  import uuid
6
  import os
7
  import time
 
8
  from typing import Optional
9
  from fastapi import FastAPI, HTTPException, WebSocket, WebSocketDisconnect
10
  from fastapi.middleware.cors import CORSMiddleware
@@ -12,6 +13,7 @@ from pydantic import BaseModel, Field
12
  from dotenv import load_dotenv
13
 
14
  load_dotenv()
 
15
 
16
  from .models import SimulationState, AgentModel, TickResponse, FireScenario, WaterSource
17
  from .simulation import SimulationEngine, TICK_INTERVAL_SECONDS
 
5
  import uuid
6
  import os
7
  import time
8
+ from pathlib import Path
9
  from typing import Optional
10
  from fastapi import FastAPI, HTTPException, WebSocket, WebSocketDisconnect
11
  from fastapi.middleware.cors import CORSMiddleware
 
13
  from dotenv import load_dotenv
14
 
15
  load_dotenv()
16
+ load_dotenv(Path(__file__).resolve().parents[1] / ".env")
17
 
18
  from .models import SimulationState, AgentModel, TickResponse, FireScenario, WaterSource
19
  from .simulation import SimulationEngine, TICK_INTERVAL_SECONDS