Spaces:

danidanidani
/

GRDN.AI.3

Sleeping

App Files Files Community

danidanidani commited on Oct 19, 2025

Commit

d7d564a

1 Parent(s): b0fa2a5

SIMPLIFY: Remove all legacy models, only support Llama 3.2-1B

Browse files

Files changed (2) hide show

app.py +6 -4
src/backend/chatbot.py +28 -114

app.py CHANGED Viewed

@@ -157,14 +157,16 @@ if page == "Garden Optimization":
     st.session_state.model = st.sidebar.radio(
         "Select an open-source LLM :",
         (
-            "Llama3.2-1b_CPP ⚡ NEW & FASTEST",
-            "Qwen2.5-7b_CPP ⭐ (need to download)",
-            "Llama2-7b_CPP (legacy)",
-            "deci-7b_CPP (legacy)",
             "lite_demo (no LLM)",
         ),
     )
     # Strip the labels for internal use
     if "⭐" in st.session_state.model or "⚡" in st.session_state.model or "(legacy)" in st.session_state.model:
         st.session_state.model = st.session_state.model.split()[0]

     st.session_state.model = st.sidebar.radio(
         "Select an open-source LLM :",
         (
+            "Llama3.2-1b_CPP ⚡ ACTIVE",
             "lite_demo (no LLM)",
         ),
     )
+    st.sidebar.caption("Legacy models (disabled):")
+    st.sidebar.text("❌ Llama2-7b (too large)")
+    st.sidebar.text("❌ Qwen2.5-7b (too large)")
+    st.sidebar.text("❌ deci-7b (too large)")
     # Strip the labels for internal use
     if "⭐" in st.session_state.model or "⚡" in st.session_state.model or "(legacy)" in st.session_state.model:
         st.session_state.model = st.session_state.model.split()[0]

src/backend/chatbot.py CHANGED Viewed

@@ -114,104 +114,29 @@ def init_llm(model, demo_lite):
         else:
             print("⚠️ Running on CPU (no GPU detected)")
-        if model == "Qwen2.5-7b_CPP":
-            model_path = os.path.join(model_base_path, "Qwen2.5-7B-Instruct-Q5_K_M.gguf")
-            print("model path: ", model_path)
-            # Check if model exists, if not and on HF, provide helpful message
-            if not os.path.exists(model_path) and env_config["is_hf_space"]:
-                st.error(f"⚠️ Model not found at {model_path}. Please ensure the model file is uploaded to your HuggingFace Space.")
-                print(f"❌ Model file not found: {model_path}")
-                return None
-            llm = LlamaCPP(
-                model_path=model_path,
-                temperature=0.1,
-                max_new_tokens=1500,  # Increased for longer responses
-                context_window=8192,  # Qwen supports up to 128K, but 8K is enough for our use case
-                generate_kwargs={},
-                model_kwargs={"n_gpu_layers": n_gpu_layers},
-                verbose=True,
-            )
-        elif model == "Llama3.2-1b_CPP":
-            model_path = os.path.join(model_base_path, "Llama-3.2-1B-Instruct-Q4_K_M.gguf")
-            print("model path: ", model_path)
-            # Check if model exists, if not and on HF, provide helpful message
-            if not os.path.exists(model_path) and env_config["is_hf_space"]:
-                st.error(f"⚠️ Model not found at {model_path}. Please ensure the model file is uploaded to your HuggingFace Space.")
-                print(f"❌ Model file not found: {model_path}")
-                return None
-            llm = LlamaCPP(
-                model_path=model_path,
-                temperature=0.1,
-                max_new_tokens=1500,
-                context_window=8192,  # Llama 3.2 supports 128K context
-                generate_kwargs={},
-                model_kwargs={"n_gpu_layers": n_gpu_layers},
-                verbose=True,
-            )
-        elif model == "Llama2-7b_CPP":
-            model_path = os.path.join(model_base_path, "llama-2-7b-chat.Q4_K_M.gguf")
-            print("model path: ", model_path)
-            # Check if model exists, if not and on HF, provide helpful message
-            if not os.path.exists(model_path) and env_config["is_hf_space"]:
-                st.error(f"⚠️ Model not found at {model_path}. Please ensure the model file is uploaded to your HuggingFace Space.")
-                print(f"❌ Model file not found: {model_path}")
-                return None
-            # Build kwargs for LlamaCPP
-            llm_kwargs = {
-                "model_path": model_path,
-                "temperature": 0.1,
-                "max_new_tokens": 1000,
-                "context_window": 3000,
-                "generate_kwargs": {},
-                "model_kwargs": {"n_gpu_layers": n_gpu_layers},
-                "verbose": True,
-            }
-            # Add prompt formatters if available (optional in newer versions)
-            if messages_to_prompt is not None:
-                llm_kwargs["messages_to_prompt"] = messages_to_prompt
-            if completion_to_prompt is not None:
-                llm_kwargs["completion_to_prompt"] = completion_to_prompt
-            llm = LlamaCPP(**llm_kwargs)
-        elif model == "deci-7b_CPP":
-            model_path = os.path.join(model_base_path, "decilm-7b-uniform-gqa-q8_0.gguf")
-            print("model path: ", model_path)
-            # Check if model exists, if not and on HF, provide helpful message
-            if not os.path.exists(model_path) and env_config["is_hf_space"]:
-                st.error(f"⚠️ Model not found at {model_path}. Please ensure the model file is uploaded to your HuggingFace Space.")
-                print(f"❌ Model file not found: {model_path}")
-                return None
-            llm = LlamaCPP(
-                # You can pass in the URL to a GGML model to download it automatically
-                # model_url=model_url,
-                # optionally, you can set the path to a pre-downloaded model instead of model_url
-                model_path=model_path,
-                # model_url = "https://huggingface.co/Deci/DeciLM-7B-instruct-GGUF/resolve/main/decilm-7b-uniform-gqa-q8_0.gguf",
-                temperature=0.1,
-                max_new_tokens=1000,
-                # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
-                context_window=3000,
-                # kwargs to pass to __call__()
-                generate_kwargs={},
-                # kwargs to pass to __init__()
-                # set to at least 1 to use GPU, -1 to use all layers on GPU
-                model_kwargs={"n_gpu_layers": n_gpu_layers},
-                # transform inputs into Llama2 format
-                # messages_to_prompt=messages_to_prompt,
-                # completion_to_prompt=completion_to_prompt,
-                verbose=True,
-            )
-        else:
-            print("Error with chatbot model")
             return None
         return llm
@@ -246,15 +171,15 @@ def chat_response(template, prompt_text, model, demo_lite):
         return response
         # return response.content
-    elif model in ["Llama2-7b_CPP", "deci-7b_CPP", "Llama3.2-1b_CPP", "Qwen2.5-7b_CPP"]:
-        print("BP 5.1: running full demo, model: ", model)
         if "llm" not in st.session_state:
             st.session_state.llm = init_llm(model, demo_lite)
         response = st.session_state.llm.complete(template + prompt_text)
         return response.text
-    else:
-        print("Error with chatbot model: ", model)
-        return None
 # # get the plant list from user input
@@ -277,13 +202,6 @@ def get_plant_care_tips(plant_list, model, demo_lite):
         + "], generate 1-2 plant care tips for each plant based on what you know. Return just the plant care tips in HTML markdown format. Make sure to use ### for headers. Do not include any other text or explanation before or after the markdown. It must be in HTML markdown format."
     )
-    if model == "deci-7b_CPP":
-        template = (
-            "### System: \n\n You are a helpful assistant that knows all about gardening, plants, and companion planting."
-            + "\n\n ### User: Generate gardening tips. Return just the plant care tips in HTML markdown format. Make sure to use ### for headers. Do not include any other text or explanation before or after the markdown. It must be in HTML markdown format. \n\n"
-        )
-        text = "### Assistant: \n\n"
-        print("deci-7b_CPP")
     plant_care_tips = chat_response(template, text, model, demo_lite)
     # check to see if response contains ### or < for headers
     print("BP6", plant_care_tips)
@@ -293,11 +211,7 @@ def get_plant_care_tips(plant_list, model, demo_lite):
     if plant_care_tips is None:
         return "Error: Could not generate plant care tips. Please try again or select a different model."
-    if (
-        "###" not in plant_care_tips
-        and "<" not in plant_care_tips
-        and model != "deci-7b_CPP"
-    ):  # deci-7b_CPP has more general plant care tips
         st.write(plant_care_tips)
         print("Error with parsing plant care tips")
         # try again up to 5 times

         else:
             print("⚠️ Running on CPU (no GPU detected)")
+        # Only Llama 3.2-1B is supported (legacy models removed for simplicity)
+        model_path = os.path.join(model_base_path, "Llama-3.2-1B-Instruct-Q4_K_M.gguf")
+        print(f"Loading Llama 3.2-1B from: {model_path}")
+        # Check if model exists
+        if not os.path.exists(model_path):
+            error_msg = f"⚠️ Model not found at {model_path}"
+            if env_config["is_hf_space"]:
+                error_msg += ". Please ensure the model file is uploaded to your HuggingFace Space."
+            st.error(error_msg)
+            print(f"❌ {error_msg}")
             return None
+        # Initialize Llama 3.2-1B with GPU support
+        llm = LlamaCPP(
+            model_path=model_path,
+            temperature=0.1,
+            max_new_tokens=1500,
+            context_window=8192,  # Llama 3.2 supports 128K context
+            generate_kwargs={},
+            model_kwargs={"n_gpu_layers": n_gpu_layers},
+            verbose=True,
+        )
         return llm
         return response
         # return response.content
+    else:
+        # Use Llama 3.2-1B (only supported model)
+        print("Using Llama 3.2-1B")
         if "llm" not in st.session_state:
             st.session_state.llm = init_llm(model, demo_lite)
+        if st.session_state.llm is None:
+            return "Error: Could not initialize LLM. Please check the logs."
         response = st.session_state.llm.complete(template + prompt_text)
         return response.text
 # # get the plant list from user input
         + "], generate 1-2 plant care tips for each plant based on what you know. Return just the plant care tips in HTML markdown format. Make sure to use ### for headers. Do not include any other text or explanation before or after the markdown. It must be in HTML markdown format."
     )
     plant_care_tips = chat_response(template, text, model, demo_lite)
     # check to see if response contains ### or < for headers
     print("BP6", plant_care_tips)
     if plant_care_tips is None:
         return "Error: Could not generate plant care tips. Please try again or select a different model."
+    if "###" not in plant_care_tips and "<" not in plant_care_tips:
         st.write(plant_care_tips)
         print("Error with parsing plant care tips")
         # try again up to 5 times