danidanidani commited on
Commit
d7d564a
·
1 Parent(s): b0fa2a5

SIMPLIFY: Remove all legacy models, only support Llama 3.2-1B

Browse files
Files changed (2) hide show
  1. app.py +6 -4
  2. src/backend/chatbot.py +28 -114
app.py CHANGED
@@ -157,14 +157,16 @@ if page == "Garden Optimization":
157
  st.session_state.model = st.sidebar.radio(
158
  "Select an open-source LLM :",
159
  (
160
- "Llama3.2-1b_CPP ⚡ NEW & FASTEST",
161
- "Qwen2.5-7b_CPP ⭐ (need to download)",
162
- "Llama2-7b_CPP (legacy)",
163
- "deci-7b_CPP (legacy)",
164
  "lite_demo (no LLM)",
165
  ),
166
  )
167
 
 
 
 
 
 
168
  # Strip the labels for internal use
169
  if "⭐" in st.session_state.model or "⚡" in st.session_state.model or "(legacy)" in st.session_state.model:
170
  st.session_state.model = st.session_state.model.split()[0]
 
157
  st.session_state.model = st.sidebar.radio(
158
  "Select an open-source LLM :",
159
  (
160
+ "Llama3.2-1b_CPP ⚡ ACTIVE",
 
 
 
161
  "lite_demo (no LLM)",
162
  ),
163
  )
164
 
165
+ st.sidebar.caption("Legacy models (disabled):")
166
+ st.sidebar.text("❌ Llama2-7b (too large)")
167
+ st.sidebar.text("❌ Qwen2.5-7b (too large)")
168
+ st.sidebar.text("❌ deci-7b (too large)")
169
+
170
  # Strip the labels for internal use
171
  if "⭐" in st.session_state.model or "⚡" in st.session_state.model or "(legacy)" in st.session_state.model:
172
  st.session_state.model = st.session_state.model.split()[0]
src/backend/chatbot.py CHANGED
@@ -114,104 +114,29 @@ def init_llm(model, demo_lite):
114
  else:
115
  print("⚠️ Running on CPU (no GPU detected)")
116
 
117
- if model == "Qwen2.5-7b_CPP":
118
- model_path = os.path.join(model_base_path, "Qwen2.5-7B-Instruct-Q5_K_M.gguf")
119
- print("model path: ", model_path)
120
-
121
- # Check if model exists, if not and on HF, provide helpful message
122
- if not os.path.exists(model_path) and env_config["is_hf_space"]:
123
- st.error(f"⚠️ Model not found at {model_path}. Please ensure the model file is uploaded to your HuggingFace Space.")
124
- print(f"❌ Model file not found: {model_path}")
125
- return None
126
-
127
- llm = LlamaCPP(
128
- model_path=model_path,
129
- temperature=0.1,
130
- max_new_tokens=1500, # Increased for longer responses
131
- context_window=8192, # Qwen supports up to 128K, but 8K is enough for our use case
132
- generate_kwargs={},
133
- model_kwargs={"n_gpu_layers": n_gpu_layers},
134
- verbose=True,
135
- )
136
- elif model == "Llama3.2-1b_CPP":
137
- model_path = os.path.join(model_base_path, "Llama-3.2-1B-Instruct-Q4_K_M.gguf")
138
- print("model path: ", model_path)
139
-
140
- # Check if model exists, if not and on HF, provide helpful message
141
- if not os.path.exists(model_path) and env_config["is_hf_space"]:
142
- st.error(f"⚠️ Model not found at {model_path}. Please ensure the model file is uploaded to your HuggingFace Space.")
143
- print(f"❌ Model file not found: {model_path}")
144
- return None
145
-
146
- llm = LlamaCPP(
147
- model_path=model_path,
148
- temperature=0.1,
149
- max_new_tokens=1500,
150
- context_window=8192, # Llama 3.2 supports 128K context
151
- generate_kwargs={},
152
- model_kwargs={"n_gpu_layers": n_gpu_layers},
153
- verbose=True,
154
- )
155
- elif model == "Llama2-7b_CPP":
156
- model_path = os.path.join(model_base_path, "llama-2-7b-chat.Q4_K_M.gguf")
157
- print("model path: ", model_path)
158
-
159
- # Check if model exists, if not and on HF, provide helpful message
160
- if not os.path.exists(model_path) and env_config["is_hf_space"]:
161
- st.error(f"⚠️ Model not found at {model_path}. Please ensure the model file is uploaded to your HuggingFace Space.")
162
- print(f"❌ Model file not found: {model_path}")
163
- return None
164
-
165
- # Build kwargs for LlamaCPP
166
- llm_kwargs = {
167
- "model_path": model_path,
168
- "temperature": 0.1,
169
- "max_new_tokens": 1000,
170
- "context_window": 3000,
171
- "generate_kwargs": {},
172
- "model_kwargs": {"n_gpu_layers": n_gpu_layers},
173
- "verbose": True,
174
- }
175
- # Add prompt formatters if available (optional in newer versions)
176
- if messages_to_prompt is not None:
177
- llm_kwargs["messages_to_prompt"] = messages_to_prompt
178
- if completion_to_prompt is not None:
179
- llm_kwargs["completion_to_prompt"] = completion_to_prompt
180
-
181
- llm = LlamaCPP(**llm_kwargs)
182
- elif model == "deci-7b_CPP":
183
- model_path = os.path.join(model_base_path, "decilm-7b-uniform-gqa-q8_0.gguf")
184
- print("model path: ", model_path)
185
-
186
- # Check if model exists, if not and on HF, provide helpful message
187
- if not os.path.exists(model_path) and env_config["is_hf_space"]:
188
- st.error(f"⚠️ Model not found at {model_path}. Please ensure the model file is uploaded to your HuggingFace Space.")
189
- print(f"❌ Model file not found: {model_path}")
190
- return None
191
-
192
- llm = LlamaCPP(
193
- # You can pass in the URL to a GGML model to download it automatically
194
- # model_url=model_url,
195
- # optionally, you can set the path to a pre-downloaded model instead of model_url
196
- model_path=model_path,
197
- # model_url = "https://huggingface.co/Deci/DeciLM-7B-instruct-GGUF/resolve/main/decilm-7b-uniform-gqa-q8_0.gguf",
198
- temperature=0.1,
199
- max_new_tokens=1000,
200
- # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
201
- context_window=3000,
202
- # kwargs to pass to __call__()
203
- generate_kwargs={},
204
- # kwargs to pass to __init__()
205
- # set to at least 1 to use GPU, -1 to use all layers on GPU
206
- model_kwargs={"n_gpu_layers": n_gpu_layers},
207
- # transform inputs into Llama2 format
208
- # messages_to_prompt=messages_to_prompt,
209
- # completion_to_prompt=completion_to_prompt,
210
- verbose=True,
211
- )
212
- else:
213
- print("Error with chatbot model")
214
  return None
 
 
 
 
 
 
 
 
 
 
 
215
  return llm
216
 
217
 
@@ -246,15 +171,15 @@ def chat_response(template, prompt_text, model, demo_lite):
246
 
247
  return response
248
  # return response.content
249
- elif model in ["Llama2-7b_CPP", "deci-7b_CPP", "Llama3.2-1b_CPP", "Qwen2.5-7b_CPP"]:
250
- print("BP 5.1: running full demo, model: ", model)
 
251
  if "llm" not in st.session_state:
252
  st.session_state.llm = init_llm(model, demo_lite)
 
 
253
  response = st.session_state.llm.complete(template + prompt_text)
254
  return response.text
255
- else:
256
- print("Error with chatbot model: ", model)
257
- return None
258
 
259
 
260
  # # get the plant list from user input
@@ -277,13 +202,6 @@ def get_plant_care_tips(plant_list, model, demo_lite):
277
  + "], generate 1-2 plant care tips for each plant based on what you know. Return just the plant care tips in HTML markdown format. Make sure to use ### for headers. Do not include any other text or explanation before or after the markdown. It must be in HTML markdown format."
278
  )
279
 
280
- if model == "deci-7b_CPP":
281
- template = (
282
- "### System: \n\n You are a helpful assistant that knows all about gardening, plants, and companion planting."
283
- + "\n\n ### User: Generate gardening tips. Return just the plant care tips in HTML markdown format. Make sure to use ### for headers. Do not include any other text or explanation before or after the markdown. It must be in HTML markdown format. \n\n"
284
- )
285
- text = "### Assistant: \n\n"
286
- print("deci-7b_CPP")
287
  plant_care_tips = chat_response(template, text, model, demo_lite)
288
  # check to see if response contains ### or < for headers
289
  print("BP6", plant_care_tips)
@@ -293,11 +211,7 @@ def get_plant_care_tips(plant_list, model, demo_lite):
293
  if plant_care_tips is None:
294
  return "Error: Could not generate plant care tips. Please try again or select a different model."
295
 
296
- if (
297
- "###" not in plant_care_tips
298
- and "<" not in plant_care_tips
299
- and model != "deci-7b_CPP"
300
- ): # deci-7b_CPP has more general plant care tips
301
  st.write(plant_care_tips)
302
  print("Error with parsing plant care tips")
303
  # try again up to 5 times
 
114
  else:
115
  print("⚠️ Running on CPU (no GPU detected)")
116
 
117
+ # Only Llama 3.2-1B is supported (legacy models removed for simplicity)
118
+ model_path = os.path.join(model_base_path, "Llama-3.2-1B-Instruct-Q4_K_M.gguf")
119
+ print(f"Loading Llama 3.2-1B from: {model_path}")
120
+
121
+ # Check if model exists
122
+ if not os.path.exists(model_path):
123
+ error_msg = f"⚠️ Model not found at {model_path}"
124
+ if env_config["is_hf_space"]:
125
+ error_msg += ". Please ensure the model file is uploaded to your HuggingFace Space."
126
+ st.error(error_msg)
127
+ print(f"❌ {error_msg}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  return None
129
+
130
+ # Initialize Llama 3.2-1B with GPU support
131
+ llm = LlamaCPP(
132
+ model_path=model_path,
133
+ temperature=0.1,
134
+ max_new_tokens=1500,
135
+ context_window=8192, # Llama 3.2 supports 128K context
136
+ generate_kwargs={},
137
+ model_kwargs={"n_gpu_layers": n_gpu_layers},
138
+ verbose=True,
139
+ )
140
  return llm
141
 
142
 
 
171
 
172
  return response
173
  # return response.content
174
+ else:
175
+ # Use Llama 3.2-1B (only supported model)
176
+ print("Using Llama 3.2-1B")
177
  if "llm" not in st.session_state:
178
  st.session_state.llm = init_llm(model, demo_lite)
179
+ if st.session_state.llm is None:
180
+ return "Error: Could not initialize LLM. Please check the logs."
181
  response = st.session_state.llm.complete(template + prompt_text)
182
  return response.text
 
 
 
183
 
184
 
185
  # # get the plant list from user input
 
202
  + "], generate 1-2 plant care tips for each plant based on what you know. Return just the plant care tips in HTML markdown format. Make sure to use ### for headers. Do not include any other text or explanation before or after the markdown. It must be in HTML markdown format."
203
  )
204
 
 
 
 
 
 
 
 
205
  plant_care_tips = chat_response(template, text, model, demo_lite)
206
  # check to see if response contains ### or < for headers
207
  print("BP6", plant_care_tips)
 
211
  if plant_care_tips is None:
212
  return "Error: Could not generate plant care tips. Please try again or select a different model."
213
 
214
+ if "###" not in plant_care_tips and "<" not in plant_care_tips:
 
 
 
 
215
  st.write(plant_care_tips)
216
  print("Error with parsing plant care tips")
217
  # try again up to 5 times