Spaces:
Sleeping
Sleeping
Commit ·
d7d564a
1
Parent(s): b0fa2a5
SIMPLIFY: Remove all legacy models, only support Llama 3.2-1B
Browse files- app.py +6 -4
- src/backend/chatbot.py +28 -114
app.py
CHANGED
|
@@ -157,14 +157,16 @@ if page == "Garden Optimization":
|
|
| 157 |
st.session_state.model = st.sidebar.radio(
|
| 158 |
"Select an open-source LLM :",
|
| 159 |
(
|
| 160 |
-
"Llama3.2-1b_CPP ⚡
|
| 161 |
-
"Qwen2.5-7b_CPP ⭐ (need to download)",
|
| 162 |
-
"Llama2-7b_CPP (legacy)",
|
| 163 |
-
"deci-7b_CPP (legacy)",
|
| 164 |
"lite_demo (no LLM)",
|
| 165 |
),
|
| 166 |
)
|
| 167 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
# Strip the labels for internal use
|
| 169 |
if "⭐" in st.session_state.model or "⚡" in st.session_state.model or "(legacy)" in st.session_state.model:
|
| 170 |
st.session_state.model = st.session_state.model.split()[0]
|
|
|
|
| 157 |
st.session_state.model = st.sidebar.radio(
|
| 158 |
"Select an open-source LLM :",
|
| 159 |
(
|
| 160 |
+
"Llama3.2-1b_CPP ⚡ ACTIVE",
|
|
|
|
|
|
|
|
|
|
| 161 |
"lite_demo (no LLM)",
|
| 162 |
),
|
| 163 |
)
|
| 164 |
|
| 165 |
+
st.sidebar.caption("Legacy models (disabled):")
|
| 166 |
+
st.sidebar.text("❌ Llama2-7b (too large)")
|
| 167 |
+
st.sidebar.text("❌ Qwen2.5-7b (too large)")
|
| 168 |
+
st.sidebar.text("❌ deci-7b (too large)")
|
| 169 |
+
|
| 170 |
# Strip the labels for internal use
|
| 171 |
if "⭐" in st.session_state.model or "⚡" in st.session_state.model or "(legacy)" in st.session_state.model:
|
| 172 |
st.session_state.model = st.session_state.model.split()[0]
|
src/backend/chatbot.py
CHANGED
|
@@ -114,104 +114,29 @@ def init_llm(model, demo_lite):
|
|
| 114 |
else:
|
| 115 |
print("⚠️ Running on CPU (no GPU detected)")
|
| 116 |
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
model_path=model_path,
|
| 129 |
-
temperature=0.1,
|
| 130 |
-
max_new_tokens=1500, # Increased for longer responses
|
| 131 |
-
context_window=8192, # Qwen supports up to 128K, but 8K is enough for our use case
|
| 132 |
-
generate_kwargs={},
|
| 133 |
-
model_kwargs={"n_gpu_layers": n_gpu_layers},
|
| 134 |
-
verbose=True,
|
| 135 |
-
)
|
| 136 |
-
elif model == "Llama3.2-1b_CPP":
|
| 137 |
-
model_path = os.path.join(model_base_path, "Llama-3.2-1B-Instruct-Q4_K_M.gguf")
|
| 138 |
-
print("model path: ", model_path)
|
| 139 |
-
|
| 140 |
-
# Check if model exists, if not and on HF, provide helpful message
|
| 141 |
-
if not os.path.exists(model_path) and env_config["is_hf_space"]:
|
| 142 |
-
st.error(f"⚠️ Model not found at {model_path}. Please ensure the model file is uploaded to your HuggingFace Space.")
|
| 143 |
-
print(f"❌ Model file not found: {model_path}")
|
| 144 |
-
return None
|
| 145 |
-
|
| 146 |
-
llm = LlamaCPP(
|
| 147 |
-
model_path=model_path,
|
| 148 |
-
temperature=0.1,
|
| 149 |
-
max_new_tokens=1500,
|
| 150 |
-
context_window=8192, # Llama 3.2 supports 128K context
|
| 151 |
-
generate_kwargs={},
|
| 152 |
-
model_kwargs={"n_gpu_layers": n_gpu_layers},
|
| 153 |
-
verbose=True,
|
| 154 |
-
)
|
| 155 |
-
elif model == "Llama2-7b_CPP":
|
| 156 |
-
model_path = os.path.join(model_base_path, "llama-2-7b-chat.Q4_K_M.gguf")
|
| 157 |
-
print("model path: ", model_path)
|
| 158 |
-
|
| 159 |
-
# Check if model exists, if not and on HF, provide helpful message
|
| 160 |
-
if not os.path.exists(model_path) and env_config["is_hf_space"]:
|
| 161 |
-
st.error(f"⚠️ Model not found at {model_path}. Please ensure the model file is uploaded to your HuggingFace Space.")
|
| 162 |
-
print(f"❌ Model file not found: {model_path}")
|
| 163 |
-
return None
|
| 164 |
-
|
| 165 |
-
# Build kwargs for LlamaCPP
|
| 166 |
-
llm_kwargs = {
|
| 167 |
-
"model_path": model_path,
|
| 168 |
-
"temperature": 0.1,
|
| 169 |
-
"max_new_tokens": 1000,
|
| 170 |
-
"context_window": 3000,
|
| 171 |
-
"generate_kwargs": {},
|
| 172 |
-
"model_kwargs": {"n_gpu_layers": n_gpu_layers},
|
| 173 |
-
"verbose": True,
|
| 174 |
-
}
|
| 175 |
-
# Add prompt formatters if available (optional in newer versions)
|
| 176 |
-
if messages_to_prompt is not None:
|
| 177 |
-
llm_kwargs["messages_to_prompt"] = messages_to_prompt
|
| 178 |
-
if completion_to_prompt is not None:
|
| 179 |
-
llm_kwargs["completion_to_prompt"] = completion_to_prompt
|
| 180 |
-
|
| 181 |
-
llm = LlamaCPP(**llm_kwargs)
|
| 182 |
-
elif model == "deci-7b_CPP":
|
| 183 |
-
model_path = os.path.join(model_base_path, "decilm-7b-uniform-gqa-q8_0.gguf")
|
| 184 |
-
print("model path: ", model_path)
|
| 185 |
-
|
| 186 |
-
# Check if model exists, if not and on HF, provide helpful message
|
| 187 |
-
if not os.path.exists(model_path) and env_config["is_hf_space"]:
|
| 188 |
-
st.error(f"⚠️ Model not found at {model_path}. Please ensure the model file is uploaded to your HuggingFace Space.")
|
| 189 |
-
print(f"❌ Model file not found: {model_path}")
|
| 190 |
-
return None
|
| 191 |
-
|
| 192 |
-
llm = LlamaCPP(
|
| 193 |
-
# You can pass in the URL to a GGML model to download it automatically
|
| 194 |
-
# model_url=model_url,
|
| 195 |
-
# optionally, you can set the path to a pre-downloaded model instead of model_url
|
| 196 |
-
model_path=model_path,
|
| 197 |
-
# model_url = "https://huggingface.co/Deci/DeciLM-7B-instruct-GGUF/resolve/main/decilm-7b-uniform-gqa-q8_0.gguf",
|
| 198 |
-
temperature=0.1,
|
| 199 |
-
max_new_tokens=1000,
|
| 200 |
-
# llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
|
| 201 |
-
context_window=3000,
|
| 202 |
-
# kwargs to pass to __call__()
|
| 203 |
-
generate_kwargs={},
|
| 204 |
-
# kwargs to pass to __init__()
|
| 205 |
-
# set to at least 1 to use GPU, -1 to use all layers on GPU
|
| 206 |
-
model_kwargs={"n_gpu_layers": n_gpu_layers},
|
| 207 |
-
# transform inputs into Llama2 format
|
| 208 |
-
# messages_to_prompt=messages_to_prompt,
|
| 209 |
-
# completion_to_prompt=completion_to_prompt,
|
| 210 |
-
verbose=True,
|
| 211 |
-
)
|
| 212 |
-
else:
|
| 213 |
-
print("Error with chatbot model")
|
| 214 |
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
return llm
|
| 216 |
|
| 217 |
|
|
@@ -246,15 +171,15 @@ def chat_response(template, prompt_text, model, demo_lite):
|
|
| 246 |
|
| 247 |
return response
|
| 248 |
# return response.content
|
| 249 |
-
|
| 250 |
-
|
|
|
|
| 251 |
if "llm" not in st.session_state:
|
| 252 |
st.session_state.llm = init_llm(model, demo_lite)
|
|
|
|
|
|
|
| 253 |
response = st.session_state.llm.complete(template + prompt_text)
|
| 254 |
return response.text
|
| 255 |
-
else:
|
| 256 |
-
print("Error with chatbot model: ", model)
|
| 257 |
-
return None
|
| 258 |
|
| 259 |
|
| 260 |
# # get the plant list from user input
|
|
@@ -277,13 +202,6 @@ def get_plant_care_tips(plant_list, model, demo_lite):
|
|
| 277 |
+ "], generate 1-2 plant care tips for each plant based on what you know. Return just the plant care tips in HTML markdown format. Make sure to use ### for headers. Do not include any other text or explanation before or after the markdown. It must be in HTML markdown format."
|
| 278 |
)
|
| 279 |
|
| 280 |
-
if model == "deci-7b_CPP":
|
| 281 |
-
template = (
|
| 282 |
-
"### System: \n\n You are a helpful assistant that knows all about gardening, plants, and companion planting."
|
| 283 |
-
+ "\n\n ### User: Generate gardening tips. Return just the plant care tips in HTML markdown format. Make sure to use ### for headers. Do not include any other text or explanation before or after the markdown. It must be in HTML markdown format. \n\n"
|
| 284 |
-
)
|
| 285 |
-
text = "### Assistant: \n\n"
|
| 286 |
-
print("deci-7b_CPP")
|
| 287 |
plant_care_tips = chat_response(template, text, model, demo_lite)
|
| 288 |
# check to see if response contains ### or < for headers
|
| 289 |
print("BP6", plant_care_tips)
|
|
@@ -293,11 +211,7 @@ def get_plant_care_tips(plant_list, model, demo_lite):
|
|
| 293 |
if plant_care_tips is None:
|
| 294 |
return "Error: Could not generate plant care tips. Please try again or select a different model."
|
| 295 |
|
| 296 |
-
if
|
| 297 |
-
"###" not in plant_care_tips
|
| 298 |
-
and "<" not in plant_care_tips
|
| 299 |
-
and model != "deci-7b_CPP"
|
| 300 |
-
): # deci-7b_CPP has more general plant care tips
|
| 301 |
st.write(plant_care_tips)
|
| 302 |
print("Error with parsing plant care tips")
|
| 303 |
# try again up to 5 times
|
|
|
|
| 114 |
else:
|
| 115 |
print("⚠️ Running on CPU (no GPU detected)")
|
| 116 |
|
| 117 |
+
# Only Llama 3.2-1B is supported (legacy models removed for simplicity)
|
| 118 |
+
model_path = os.path.join(model_base_path, "Llama-3.2-1B-Instruct-Q4_K_M.gguf")
|
| 119 |
+
print(f"Loading Llama 3.2-1B from: {model_path}")
|
| 120 |
+
|
| 121 |
+
# Check if model exists
|
| 122 |
+
if not os.path.exists(model_path):
|
| 123 |
+
error_msg = f"⚠️ Model not found at {model_path}"
|
| 124 |
+
if env_config["is_hf_space"]:
|
| 125 |
+
error_msg += ". Please ensure the model file is uploaded to your HuggingFace Space."
|
| 126 |
+
st.error(error_msg)
|
| 127 |
+
print(f"❌ {error_msg}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
return None
|
| 129 |
+
|
| 130 |
+
# Initialize Llama 3.2-1B with GPU support
|
| 131 |
+
llm = LlamaCPP(
|
| 132 |
+
model_path=model_path,
|
| 133 |
+
temperature=0.1,
|
| 134 |
+
max_new_tokens=1500,
|
| 135 |
+
context_window=8192, # Llama 3.2 supports 128K context
|
| 136 |
+
generate_kwargs={},
|
| 137 |
+
model_kwargs={"n_gpu_layers": n_gpu_layers},
|
| 138 |
+
verbose=True,
|
| 139 |
+
)
|
| 140 |
return llm
|
| 141 |
|
| 142 |
|
|
|
|
| 171 |
|
| 172 |
return response
|
| 173 |
# return response.content
|
| 174 |
+
else:
|
| 175 |
+
# Use Llama 3.2-1B (only supported model)
|
| 176 |
+
print("Using Llama 3.2-1B")
|
| 177 |
if "llm" not in st.session_state:
|
| 178 |
st.session_state.llm = init_llm(model, demo_lite)
|
| 179 |
+
if st.session_state.llm is None:
|
| 180 |
+
return "Error: Could not initialize LLM. Please check the logs."
|
| 181 |
response = st.session_state.llm.complete(template + prompt_text)
|
| 182 |
return response.text
|
|
|
|
|
|
|
|
|
|
| 183 |
|
| 184 |
|
| 185 |
# # get the plant list from user input
|
|
|
|
| 202 |
+ "], generate 1-2 plant care tips for each plant based on what you know. Return just the plant care tips in HTML markdown format. Make sure to use ### for headers. Do not include any other text or explanation before or after the markdown. It must be in HTML markdown format."
|
| 203 |
)
|
| 204 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 205 |
plant_care_tips = chat_response(template, text, model, demo_lite)
|
| 206 |
# check to see if response contains ### or < for headers
|
| 207 |
print("BP6", plant_care_tips)
|
|
|
|
| 211 |
if plant_care_tips is None:
|
| 212 |
return "Error: Could not generate plant care tips. Please try again or select a different model."
|
| 213 |
|
| 214 |
+
if "###" not in plant_care_tips and "<" not in plant_care_tips:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
st.write(plant_care_tips)
|
| 216 |
print("Error with parsing plant care tips")
|
| 217 |
# try again up to 5 times
|