adityaverma977 commited on
Commit
8bcd5b0
·
1 Parent(s): c0a08aa

Add 15+ free HF models, expand model selection, add debug logging

Browse files
backend/app/groq_client.py CHANGED
@@ -22,11 +22,27 @@ GROQ_PREMIUM_MODELS = [
22
  ]
23
 
24
  # Open-source models available via HF Inference API (unlimited calls)
 
25
  HF_MODELS = [
 
26
  "mistralai/Mistral-7B-Instruct-v0.2",
 
 
 
 
27
  "NousResearch/Nous-Hermes-2-Mistral-7B-DPO",
 
 
28
  "meta-llama/Llama-2-7b-chat-hf",
 
 
 
29
  "google/flan-t5-large",
 
 
 
 
 
30
  ]
31
 
32
 
 
22
  ]
23
 
24
  # Open-source models available via HF Inference API (unlimited calls)
25
+ # Expanded list of free HF models (add your API token to access)
26
  HF_MODELS = [
27
+ # Fast, reliable models
28
  "mistralai/Mistral-7B-Instruct-v0.2",
29
+ "mistralai/Mistral-7B-Instruct-v0.1",
30
+ "HuggingFaceH4/zephyr-7b-beta",
31
+ "HuggingFaceH4/zephyr-7b",
32
+ # Quality-focused models
33
  "NousResearch/Nous-Hermes-2-Mistral-7B-DPO",
34
+ "NousResearch/Nous-Hermes-2-7b",
35
+ # Meta models
36
  "meta-llama/Llama-2-7b-chat-hf",
37
+ "meta-llama/Llama-2-13b-chat-hf",
38
+ "meta-llama/Llama-3-8b-Instruct",
39
+ # Instruction-tuned models
40
  "google/flan-t5-large",
41
+ "google/flan-t5-base",
42
+ # Falcon models
43
+ "tiiuae/falcon-7b-instruct",
44
+ # Other strong models
45
+ "EleutherAI/gpt-j-6B",
46
  ]
47
 
48
 
backend/app/hf_spaces.py CHANGED
@@ -20,30 +20,79 @@ ALL_MODELS = [
20
  "name": "Llama 2 70B",
21
  "description": "Meta's large 70B instruction-tuned model",
22
  },
23
- # Open-source HF models (unlimited calls, free)
24
  {
25
  "id": "mistralai/Mistral-7B-Instruct-v0.2",
26
- "name": "Mistral 7B Instruct",
27
  "description": "Fast, reliable 7B instruction-tuned model",
28
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  {
30
  "id": "NousResearch/Nous-Hermes-2-Mistral-7B-DPO",
31
- "name": "Nous Hermes 2",
32
  "description": "High-quality 7B with DPO training",
33
  },
 
 
 
 
 
 
34
  {
35
  "id": "meta-llama/Llama-2-7b-chat-hf",
36
  "name": "Llama 2 7B Chat",
37
  "description": "Meta's Llama 2 7B chat variant",
38
  },
 
 
 
 
 
 
 
 
 
 
 
39
  {
40
  "id": "google/flan-t5-large",
41
  "name": "FLAN-T5 Large",
42
  "description": "Google's instruction-tuned T5 model",
43
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  ]
45
 
46
 
 
47
  async def get_available_models() -> dict:
48
  """
49
  Get unified list of all available models (Groq + HF).
 
20
  "name": "Llama 2 70B",
21
  "description": "Meta's large 70B instruction-tuned model",
22
  },
23
+ # Open-source HF models - Fast & Reliable
24
  {
25
  "id": "mistralai/Mistral-7B-Instruct-v0.2",
26
+ "name": "Mistral 7B Instruct v0.2",
27
  "description": "Fast, reliable 7B instruction-tuned model",
28
  },
29
+ {
30
+ "id": "mistralai/Mistral-7B-Instruct-v0.1",
31
+ "name": "Mistral 7B Instruct v0.1",
32
+ "description": "Original Mistral 7B instruct version",
33
+ },
34
+ {
35
+ "id": "HuggingFaceH4/zephyr-7b-beta",
36
+ "name": "Zephyr 7B Beta",
37
+ "description": "HF's high-quality 7B chat model",
38
+ },
39
+ {
40
+ "id": "HuggingFaceH4/zephyr-7b",
41
+ "name": "Zephyr 7B",
42
+ "description": "Fast, well-aligned 7B model",
43
+ },
44
+ # Open-source HF models - Quality-Focused
45
  {
46
  "id": "NousResearch/Nous-Hermes-2-Mistral-7B-DPO",
47
+ "name": "Nous Hermes 2 Mistral",
48
  "description": "High-quality 7B with DPO training",
49
  },
50
+ {
51
+ "id": "NousResearch/Nous-Hermes-2-7b",
52
+ "name": "Nous Hermes 2 7B",
53
+ "description": "Quality-focused 7B model",
54
+ },
55
+ # Open-source HF models - Meta's Llama
56
  {
57
  "id": "meta-llama/Llama-2-7b-chat-hf",
58
  "name": "Llama 2 7B Chat",
59
  "description": "Meta's Llama 2 7B chat variant",
60
  },
61
+ {
62
+ "id": "meta-llama/Llama-2-13b-chat-hf",
63
+ "name": "Llama 2 13B Chat",
64
+ "description": "Meta's Llama 2 13B chat variant",
65
+ },
66
+ {
67
+ "id": "meta-llama/Llama-3-8b-Instruct",
68
+ "name": "Llama 3 8B Instruct",
69
+ "description": "Meta's latest Llama 3 8B model",
70
+ },
71
+ # Open-source HF models - Google & Others
72
  {
73
  "id": "google/flan-t5-large",
74
  "name": "FLAN-T5 Large",
75
  "description": "Google's instruction-tuned T5 model",
76
  },
77
+ {
78
+ "id": "google/flan-t5-base",
79
+ "name": "FLAN-T5 Base",
80
+ "description": "Google's FLAN-T5 base variant",
81
+ },
82
+ {
83
+ "id": "tiiuae/falcon-7b-instruct",
84
+ "name": "Falcon 7B Instruct",
85
+ "description": "TII's Falcon 7B instruction-tuned",
86
+ },
87
+ {
88
+ "id": "EleutherAI/gpt-j-6B",
89
+ "name": "GPT-J 6B",
90
+ "description": "EleutherAI's 6B GPT model",
91
+ },
92
  ]
93
 
94
 
95
+
96
  async def get_available_models() -> dict:
97
  """
98
  Get unified list of all available models (Groq + HF).
backend/app/main.py CHANGED
@@ -66,14 +66,14 @@ async def root():
66
  return {
67
  "service": "rush-agents-backend",
68
  "status": "ok",
69
- "groq_available": groq_client.is_ready(),
70
  }
71
 
72
  @app.get("/wake")
73
  async def wake():
74
  return {
75
  "warm": True,
76
- "groq_available": groq_client.is_ready(),
77
  "uptime_seconds": int(time.time() - START_TIME),
78
  }
79
 
@@ -106,6 +106,7 @@ async def start_simulation(req: StartSimulationRequest):
106
 
107
  @app.post("/place-fire", response_model=SimulationState)
108
  def place_fire(req: PlaceFireRequest):
 
109
  sim = _get_or_404(req.simulation_id)
110
  if sim.status != "waiting_for_scenario":
111
  raise HTTPException(status_code=409, detail="Fire already placed or simulation finished.")
 
66
  return {
67
  "service": "rush-agents-backend",
68
  "status": "ok",
69
+ "inference_ready": groq_client.is_ready(),
70
  }
71
 
72
  @app.get("/wake")
73
  async def wake():
74
  return {
75
  "warm": True,
76
+ "inference_ready": groq_client.is_ready(),
77
  "uptime_seconds": int(time.time() - START_TIME),
78
  }
79
 
 
106
 
107
  @app.post("/place-fire", response_model=SimulationState)
108
  def place_fire(req: PlaceFireRequest):
109
+ print(f"\n[PLACE_FIRE] Called - sim_id={req.simulation_id}, total_active={len(active_simulations)}, keys={list(active_simulations.keys())}")
110
  sim = _get_or_404(req.simulation_id)
111
  if sim.status != "waiting_for_scenario":
112
  raise HTTPException(status_code=409, detail="Fire already placed or simulation finished.")