future-html

Sleeping

App Files Files Community

aadya1762 commited on Mar 17, 2025

Commit

8cc5c82

1 Parent(s): 73503c4

port to gradio

Browse files

Files changed (4) hide show

README.md +1 -2
app.py +21 -95
gemmademo/_chat.py +18 -40
gemmademo/_model.py +26 -47

README.md CHANGED Viewed

@@ -3,8 +3,7 @@ title: Gemma Chat Interface
 emoji: 🤖
 colorFrom: indigo
 colorTo: blue
-sdk: streamlit
-sdk_version: 1.43.1
 python_version: 3.12
 app_file: app.py
 pinned: false

 emoji: 🤖
 colorFrom: indigo
 colorTo: blue
+sdk: gradio
 python_version: 3.12
 app_file: app.py
 pinned: false

app.py CHANGED Viewed

@@ -1,100 +1,26 @@
-# Interface all the functions from gemmademo.
-# Implement a task selector in the side bar.
-# Add a button to clear the chat history.
-import streamlit as st
-from gemmademo import (
-    LlamaCppGemmaModel,
-    StreamlitChat,
-    PromptManager,
-    huggingface_login,
-)
-import os
-import sys
-import subprocess
 def main():
-    # Page configuration
-    st.set_page_config(page_title="Gemma Chat Demo", layout="wide")
-    # Initialize session state variables
-    if "selected_model" not in st.session_state:
-        st.session_state.selected_model = "gemma-2b-it"
-    if "selected_task" not in st.session_state:
-        st.session_state.selected_task = "Question Answering"
-    # Sidebar for login and configuration
-    with st.sidebar:
-        st.title("Gemma Chat Configuration")
-        # Login section
-        huggingface_login(os.getenv("HF_TOKEN"))
-        # Model selection
-        st.subheader("Model Selection")
-        model_options = list(LlamaCppGemmaModel.AVAILABLE_MODELS.keys())
-        selected_model = st.selectbox(
-            "Select Gemma Model",
-            model_options,
-            index=model_options.index(st.session_state.selected_model),
-        )
-        if selected_model != st.session_state.selected_model:
-            st.session_state.selected_model = selected_model
-            st.rerun()
-        # Task selection
-        st.subheader("Task Selection")
-        task_options = ["Question Answering", "Text Generation", "Code Completion"]
-        selected_task = st.selectbox(
-            "Select Task",
-            task_options,
-            index=task_options.index(st.session_state.selected_task),
-        )
-        if selected_task != st.session_state.selected_task:
-            st.session_state.selected_task = selected_task
-            st.rerun()
-    # Main content area
-    # Initialize model with the selected configuration
-    model_name = st.session_state.selected_model
-    model = LlamaCppGemmaModel(name=model_name)
-    # Load model (will use cached version if available)
-    with st.spinner(f"Loading {model_name}..."):
         model.load_model()
-    # Initialize prompt manager with selected task
-    prompt_manager = PromptManager(task=st.session_state.selected_task)
-    # Initialize chat interface
-    chat = StreamlitChat(model=model, prompt_manager=prompt_manager)
-    st.session_state.chat_instance = chat
-    # Run the chat interface
-    chat.run()
 if __name__ == "__main__":
-    # Check if the script is being run directly with Python
-    # If so, launch Streamlit programmatically
-    if not os.environ.get("STREAMLIT_RUN_APP"):
-        os.environ["STREAMLIT_RUN_APP"] = "1"
-        # Get the current script path
-        script_path = os.path.abspath(__file__)
-        # Launch streamlit run with port 7860 and headless mode
-        cmd = [
-            sys.executable,
-            "-m",
-            "streamlit",
-            "run",
-            script_path,
-            "--server.port",
-            "7860",
-            "--server.address",
-            "0.0.0.0",
-            "--server.headless",
-            "true",
-        ]
-        subprocess.run(cmd)
-    else:
-        # Normal Streamlit execution
-        main()

+import gradio as gr
+from gemmademo import LlamaCppGemmaModel, GradioChat, PromptManager
 def main():
+    # Model and task selection
+    model_options = list(LlamaCppGemmaModel.AVAILABLE_MODELS.keys())
+    task_options = ["Question Answering", "Text Generation", "Code Completion"]
+    def update_chat(model_name, task_name):
+        model = LlamaCppGemmaModel(name=model_name)
         model.load_model()
+        prompt_manager = PromptManager(task=task_name)
+        chat = GradioChat(model=model, prompt_manager=prompt_manager)
+        chat.run()
+    gr.Interface(
+        fn=update_chat,
+        inputs=[
+            gr.Dropdown(choices=model_options, value="gemma-2b-it", label="Select Gemma Model"),
+            gr.Dropdown(choices=task_options, value="Question Answering", label="Select Task"),
+        ],
+        outputs=[],
+    ).launch()
 if __name__ == "__main__":
+    main()

gemmademo/_chat.py CHANGED Viewed

@@ -1,17 +1,17 @@
-import streamlit as st
 from ._model import LlamaCppGemmaModel
 from ._prompts import PromptManager
-class StreamlitChat:
     """
     A class that handles the chat interface for the Gemma model.
     Features:
-    - A Streamlit-based chatbot UI.
-    -  Maintains chat history across reruns.
-    -  Uses Gemma (Hugging Face) model for generating responses.
-    -  Formats user inputs before sending them to the model.
     """
     def __init__(self, model: LlamaCppGemmaModel, prompt_manager: PromptManager):
@@ -22,37 +22,15 @@ class StreamlitChat:
         self._chat()
     def _chat(self):
-        st.title("Using model : " + self.model.get_model_name())
-        self._build_states()
-        # Display chat messages from history on app rerun
-        for message in st.session_state.messages:
-            with st.chat_message(message["role"]):
-                st.markdown(message["content"])
-        # React to user input
-        if prompt := st.chat_input("What is up?"):
-            prompt = prompt.replace(
-                "\n", "  \n"
-            )  # Only double spaced backslash is rendered in streamlit for newlines.
-            with st.chat_message("User"):
-                st.markdown(prompt)
-            st.session_state.messages.append({"role": "User", "content": prompt})
-            prompt = self.prompt_manager.get_prompt(
-                user_input=st.session_state.messages[-1]["content"]
-            )
-            response = self.model.generate_response(prompt).replace(
-                "\n", "  \n"
-            )  # Only double spaced backslash is rendered in streamlit for newlines.
-            with st.chat_message("Gemma"):
-                st.markdown(response)
-            st.session_state.messages.append({"role": "Gemma", "content": response})
-    def _build_states(self):
-        # Initialize chat history
-        if "messages" not in st.session_state:
-            st.session_state.messages = []
-    def clear_history(self):
-        st.session_state.messages = []

+import gradio as gr
 from ._model import LlamaCppGemmaModel
 from ._prompts import PromptManager
+class GradioChat:
     """
     A class that handles the chat interface for the Gemma model.
     Features:
+    - A Gradio-based chatbot UI.
+    - Maintains chat history automatically.
+    - Uses Gemma (Hugging Face) model for generating responses.
+    - Formats user inputs before sending them to the model.
     """
     def __init__(self, model: LlamaCppGemmaModel, prompt_manager: PromptManager):
         self._chat()
     def _chat(self):
+        def chat_fn(history, message):
+            prompt = self.prompt_manager.get_prompt(user_input=message)
+            response = self.model.generate_response(prompt)
+            return response
+        chat_interface = gr.ChatInterface(
+            chat_fn,
+            chatbot=gr.Chatbot(label="Using model: " + self.model.get_model_name()),
+            textbox=gr.Textbox(placeholder="What is up?", container=False),
+        )
+        chat_interface.launch()

gemmademo/_model.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import os
 from typing import Dict
-import streamlit as st
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
@@ -51,7 +50,7 @@ class LlamaCppGemmaModel:
         },
     }
-    def __init__(self, name: str = "gemma-2b",):
         """
         Initialize the model instance.
@@ -63,60 +62,46 @@ class LlamaCppGemmaModel:
     def load_model(self, n_ctx: int = 2048, n_gpu_layers: int = 0):
         """
-        Load the model and cache it in Streamlit's session state.
-        If the model file does not exist, it will be downloaded to the models/ directory.
         Args:
-            n_threads (int): Number of CPU threads to use.
             n_ctx (int): Context window size.
             n_gpu_layers (int): Number of layers to offload to GPU (if supported; 0 for CPU-only).
-        Returns:
-            self: Loaded model instance.
         """
         model_info = self.AVAILABLE_MODELS.get(self.name)
         if not model_info:
             raise ValueError(f"Model {self.name} is not available.")
         model_path = model_info["model_path"]
         # If the model file doesn't exist, download it.
         if not os.path.exists(model_path):
             os.makedirs(os.path.dirname(model_path), exist_ok=True)
             repo_id = model_info.get("repo_id")
             filename = model_info.get("filename")
             if repo_id is None or filename is None:
-                raise ValueError(
-                    "Repository ID or filename is missing for model download."
-                )
-            with st.spinner(f"Downloading {self.name}..."):
-                downloaded_path = hf_hub_download(
-                    repo_id=repo_id,
-                    filename=filename,
-                    local_dir=os.path.dirname(model_path),
-                    local_dir_use_symlinks=False,
-                )
-                # If the downloaded file is not at the expected location, rename it.
-                if downloaded_path != model_path:
-                    os.rename(downloaded_path, model_path)
-        model_key = f"gemma_model_{self.name}"
-        if model_key not in st.session_state:
-            with st.spinner(f"Loading {self.name}..."):
-                st.session_state[model_key] = Llama(
-                    model_path=model_path,
-                    n_threads=os.cpu_count(),
-                    n_ctx=n_ctx,
-                    n_gpu_layers=n_gpu_layers,
-                )
-        self.model = st.session_state[model_key]
         return self
-    def generate_response(
-        self,
-        prompt: str,
-        max_tokens: int = 512,
-        temperature: float = 0.7,
-    ) -> str:
         """
         Generate a response using the llama.cpp model.
@@ -124,7 +109,6 @@ class LlamaCppGemmaModel:
             prompt (str): Input prompt text.
             max_tokens (int): Maximum number of tokens to generate.
             temperature (float): Sampling temperature (higher = more creative).
-            **kwargs: Additional generation parameters.
         Returns:
             str: Generated response text.
@@ -132,14 +116,12 @@ class LlamaCppGemmaModel:
         if self.model is None:
             self.load_model()
-        # Call the llama.cpp model with the provided parameters.
         response = self.model(
             prompt,
             max_tokens=max_tokens,
             temperature=temperature,
         )
-        generated_text = response["choices"][0]["text"]
-        return generated_text.strip()
     def get_model_info(self) -> Dict:
         """
@@ -148,10 +130,7 @@ class LlamaCppGemmaModel:
         Returns:
             Dict: A dictionary containing the model name and load status.
         """
-        return {
-            "name": self.name,
-            "loaded": self.model is not None,
-        }
     def get_model_name(self) -> str:
         """
@@ -160,4 +139,4 @@ class LlamaCppGemmaModel:
         Returns:
             str: Model name.
         """
-        return self.name

 import os
 from typing import Dict
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
         },
     }
+    def __init__(self, name: str = "gemma-2b"):
         """
         Initialize the model instance.
     def load_model(self, n_ctx: int = 2048, n_gpu_layers: int = 0):
         """
+        Load the model. If the model file does not exist, it will be downloaded.
         Args:
             n_ctx (int): Context window size.
             n_gpu_layers (int): Number of layers to offload to GPU (if supported; 0 for CPU-only).
         """
         model_info = self.AVAILABLE_MODELS.get(self.name)
         if not model_info:
             raise ValueError(f"Model {self.name} is not available.")
         model_path = model_info["model_path"]
         # If the model file doesn't exist, download it.
         if not os.path.exists(model_path):
             os.makedirs(os.path.dirname(model_path), exist_ok=True)
             repo_id = model_info.get("repo_id")
             filename = model_info.get("filename")
             if repo_id is None or filename is None:
+                raise ValueError("Repository ID or filename is missing for model download.")
+            downloaded_path = hf_hub_download(
+                repo_id=repo_id,
+                filename=filename,
+                local_dir=os.path.dirname(model_path),
+                local_dir_use_symlinks=False,
+            )
+            if downloaded_path != model_path:
+                os.rename(downloaded_path, model_path)
+        self.model = Llama(
+            model_path=model_path,
+            n_threads=os.cpu_count(),
+            n_ctx=n_ctx,
+            n_gpu_layers=n_gpu_layers,
+        )
         return self
+    def generate_response(self, prompt: str, max_tokens: int = 512, temperature: float = 0.7) -> str:
         """
         Generate a response using the llama.cpp model.
             prompt (str): Input prompt text.
             max_tokens (int): Maximum number of tokens to generate.
             temperature (float): Sampling temperature (higher = more creative).
         Returns:
             str: Generated response text.
         if self.model is None:
             self.load_model()
         response = self.model(
             prompt,
             max_tokens=max_tokens,
             temperature=temperature,
         )
+        return response["choices"][0]["text"].strip()
     def get_model_info(self) -> Dict:
         """
         Returns:
             Dict: A dictionary containing the model name and load status.
         """
+        return {"name": self.name, "loaded": self.model is not None}
     def get_model_name(self) -> str:
         """
         Returns:
             str: Model name.
         """
+        return self.name