future-html

Sleeping

App Files Files Community

aadya1762 commited on Mar 17, 2025

Commit

c1e7456

1 Parent(s): 296ef11

Add model config sliders

Browse files

Files changed (2) hide show

app.py +34 -4
gemmademo/_model.py +9 -9

app.py CHANGED Viewed

@@ -1,9 +1,6 @@
 # Interface all the functions from gemmademo.
-# Implement login functionality in the side bar.
 # Implement a task selector in the side bar.
-# Interface all the functions from gemmademo.
 # Add a button to clear the chat history.
 import streamlit as st
 from gemmademo import (
     LlamaCppGemmaModel,
@@ -25,6 +22,10 @@ def main():
         st.session_state.selected_model = "gemma-2b-it"
     if "selected_task" not in st.session_state:
         st.session_state.selected_task = "Question Answering"
     # Sidebar for login and configuration
     with st.sidebar:
@@ -56,6 +57,31 @@ def main():
             st.session_state.selected_task = selected_task
             st.rerun()
         # Clear chat history button
         if st.button("Clear Chat History"):
             if "chat_instance" in st.session_state:
@@ -65,7 +91,11 @@ def main():
     # Main content area
     # Initialize model with the selected configuration
     model_name = st.session_state.selected_model
-    model = LlamaCppGemmaModel(name=model_name)
     # Load model (will use cached version if available)
     with st.spinner(f"Loading {model_name}..."):

 # Interface all the functions from gemmademo.
 # Implement a task selector in the side bar.
 # Add a button to clear the chat history.
 import streamlit as st
 from gemmademo import (
     LlamaCppGemmaModel,
         st.session_state.selected_model = "gemma-2b-it"
     if "selected_task" not in st.session_state:
         st.session_state.selected_task = "Question Answering"
+    if "max_tokens" not in st.session_state:
+        st.session_state.max_tokens = 512
+    if "temperature" not in st.session_state:
+        st.session_state.temperature = 0.7
     # Sidebar for login and configuration
     with st.sidebar:
             st.session_state.selected_task = selected_task
             st.rerun()
+        # Model Config Selection
+        new_max_tokens_value = st.slider(
+            "Max Tokens",
+            min_value=1,
+            max_value=4096,
+            value=st.session_state.max_tokens,
+            step=1,
+        )
+        # After setting the slider values
+        if st.session_state.max_tokens != new_max_tokens_value:
+            st.session_state.max_tokens = new_max_tokens_value
+            st.rerun()
+        new_temperature_value = st.slider(
+            "Temperature",
+            min_value=0.0,
+            max_value=1.0,
+            value=st.session_state.temperature,
+            step=0.01,
+        )
+        # After setting the slider values
+        if st.session_state.temperature != new_temperature_value:
+            st.session_state.temperature = new_temperature_value
+            st.rerun()
         # Clear chat history button
         if st.button("Clear Chat History"):
             if "chat_instance" in st.session_state:
     # Main content area
     # Initialize model with the selected configuration
     model_name = st.session_state.selected_model
+    model = LlamaCppGemmaModel(
+        name=model_name,
+        max_tokens=st.session_state.max_tokens,
+        temperature=st.session_state.temperature,
+    )
     # Load model (will use cached version if available)
     with st.spinner(f"Loading {model_name}..."):

gemmademo/_model.py CHANGED Viewed

@@ -51,7 +51,9 @@ class LlamaCppGemmaModel:
         },
     }
-    def __init__(self, name: str = "gemma-2b"):
         """
         Initialize the model instance.
@@ -60,8 +62,10 @@ class LlamaCppGemmaModel:
         """
         self.name = name
         self.model = None  # Instance of Llama from llama.cpp
-    def load_model(self, n_threads: int = 2, n_ctx: int = 2048, n_gpu_layers: int = 0):
         """
         Load the model and cache it in Streamlit's session state.
         If the model file does not exist, it will be downloaded to the models/ directory.
@@ -104,7 +108,7 @@ class LlamaCppGemmaModel:
             with st.spinner(f"Loading {self.name}..."):
                 st.session_state[model_key] = Llama(
                     model_path=model_path,
-                    n_threads=n_threads,
                     n_ctx=n_ctx,
                     n_gpu_layers=n_gpu_layers,
                 )
@@ -114,9 +118,6 @@ class LlamaCppGemmaModel:
     def generate_response(
         self,
         prompt: str,
-        max_tokens: int = 512,
-        temperature: float = 0.7,
-        **kwargs,
     ) -> str:
         """
         Generate a response using the llama.cpp model.
@@ -136,9 +137,8 @@ class LlamaCppGemmaModel:
         # Call the llama.cpp model with the provided parameters.
         response = self.model(
             prompt,
-            max_tokens=max_tokens,
-            temperature=temperature,
-            **kwargs,
         )
         generated_text = response["choices"][0]["text"]
         return generated_text.strip()

         },
     }
+    def __init__(
+        self, name: str = "gemma-2b", max_tokens: int = 512, temperature: float = 0.7
+    ):
         """
         Initialize the model instance.
         """
         self.name = name
         self.model = None  # Instance of Llama from llama.cpp
+        self.max_tokens = max_tokens
+        self.temperature = temperature
+    def load_model(self, n_ctx: int = 2048, n_gpu_layers: int = 0):
         """
         Load the model and cache it in Streamlit's session state.
         If the model file does not exist, it will be downloaded to the models/ directory.
             with st.spinner(f"Loading {self.name}..."):
                 st.session_state[model_key] = Llama(
                     model_path=model_path,
+                    n_threads=os.cpu_count(),
                     n_ctx=n_ctx,
                     n_gpu_layers=n_gpu_layers,
                 )
     def generate_response(
         self,
         prompt: str,
     ) -> str:
         """
         Generate a response using the llama.cpp model.
         # Call the llama.cpp model with the provided parameters.
         response = self.model(
             prompt,
+            max_tokens=self.max_tokens,
+            temperature=self.temperature,
         )
         generated_text = response["choices"][0]["text"]
         return generated_text.strip()