future-html

Sleeping

App Files Files Community

aadya1762 commited on Mar 10, 2025

Commit

b4ecb60

0 Parent(s):

initial commit

Browse files

Files changed (7) hide show

app.py +101 -0
gemmademo/__init__.py +6 -0
gemmademo/_chat.py +50 -0
gemmademo/_model.py +184 -0
gemmademo/_prompts.py +43 -0
gemmademo/_utils.py +6 -0
requirements.txt +9 -0

app.py ADDED Viewed

	@@ -0,0 +1,101 @@

+# Interface all the functions from gemmademo.
+# Implement login functionality in the side bar.
+# Implement a task selector in the side bar.
+# Interface all the functions from gemmademo.
+# Add a button to clear the chat history.
+import streamlit as st
+from gemmademo import HuggingFaceGemmaModel, StreamlitChat, PromptManager, huggingface_login
+import os
+import sys
+def main():
+    # Page configuration
+    st.set_page_config(page_title="Gemma Chat Demo", layout="wide")
+    # Initialize session state variables
+    if "authenticated" not in st.session_state:
+        st.session_state.authenticated = False
+    if "selected_model" not in st.session_state:
+        st.session_state.selected_model = "gemma-2b-it"
+    if "selected_task" not in st.session_state:
+        st.session_state.selected_task = "Question Answering"
+    # Sidebar for login and configuration
+    with st.sidebar:
+        st.title("Gemma Chat Configuration")
+        # Login section
+        st.subheader("Login")
+        if not st.session_state.authenticated:
+            hf_token = st.text_input("Hugging Face Token", type="password")
+            if st.button("Login"):
+                try:
+                    huggingface_login(hf_token)
+                    st.session_state.authenticated = True
+                    st.success("Successfully logged in!")
+                except Exception as e:
+                    st.error(f"Login failed: {str(e)}")
+        else:
+            st.success("Logged in to Hugging Face")
+            if st.button("Logout"):
+                st.session_state.authenticated = False
+                st.experimental_rerun()
+        # Model selection
+        st.subheader("Model Selection")
+        model_options = list(HuggingFaceGemmaModel.AVAILABLE_MODELS.keys())
+        selected_model = st.selectbox(
+            "Select Gemma Model",
+            model_options,
+            index=model_options.index(st.session_state.selected_model)
+        )
+        if selected_model != st.session_state.selected_model:
+            st.session_state.selected_model = selected_model
+            st.experimental_rerun()
+        # Task selection
+        st.subheader("Task Selection")
+        task_options = ["Question Answering", "Text Generation", "Code Completion"]
+        selected_task = st.selectbox(
+            "Select Task",
+            task_options,
+            index=task_options.index(st.session_state.selected_task)
+        )
+        if selected_task != st.session_state.selected_task:
+            st.session_state.selected_task = selected_task
+            st.experimental_rerun()
+        # Clear chat history button
+        if st.button("Clear Chat History"):
+            if "chat_instance" in st.session_state:
+                st.session_state.chat_instance.clear_history()
+            st.experimental_rerun()
+    # Main content area
+    if st.session_state.authenticated:
+        # Initialize model with the selected configuration
+        model_name = HuggingFaceGemmaModel.AVAILABLE_MODELS[st.session_state.selected_model]["name"]
+        model = HuggingFaceGemmaModel(name=model_name)
+        # Load model (will use cached version if available)
+        with st.spinner(f"Loading {model_name}..."):
+            model.load_model(device_map="auto")
+        # Initialize prompt manager with selected task
+        prompt_manager = PromptManager(task=st.session_state.selected_task)
+        # Initialize chat interface
+        chat = StreamlitChat(model=model, prompt_manager=prompt_manager)
+        st.session_state.chat_instance = chat
+        # Run the chat interface
+        chat.run()
+    else:
+        st.info("Please login with your Hugging Face token in the sidebar to start chatting.")
+if __name__ == "__main__":
+    if len(sys.argv) == 1:
+        os.system(f"streamlit run {__file__}")
+    else:
+        main()

gemmademo/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from ._chat import StreamlitChat
+from ._model import HuggingFaceGemmaModel
+from ._prompts import PromptManager
+from ._utils import huggingface_login
+__all__ = ["StreamlitChat", "HuggingFaceGemmaModel", "PromptManager", "huggingface_login"]

gemmademo/_chat.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import streamlit as st
+from ._model import HuggingFaceGemmaModel
+from ._prompts import PromptManager
+class StreamlitChat:
+    """
+    A class that handles the chat interface for the Gemma model.
+    Features:
+    ✅ A Streamlit-based chatbot UI.
+    ✅ Maintains chat history across reruns.
+    ✅ Uses Gemma (Hugging Face) model for generating responses.
+    ✅ Formats user inputs before sending them to the model.
+    """
+    def __init__(self, model: HuggingFaceGemmaModel, prompt_manager: PromptManager):
+        self.model = model
+        self.prompt_manager = prompt_manager
+    def run(self):
+        self._chat()
+    def _chat(self):
+        st.title("Using model : " + self.model.get_model_name())
+        self._build_states()
+        # Display chat messages from history on app rerun
+        for message in st.session_state.messages:
+            with st.chat_message(message["role"]):
+                st.markdown(message["content"])
+        # React to user input
+        if prompt := st.chat_input("What is up?"):
+            with st.chat_message("User"):
+                st.markdown(prompt)
+            st.session_state.messages.append({"role": "User", "content": prompt})
+            prompt = self.prompt_manager.get_prompt(user_input=st.session_state.messages[-1]["content"])
+            response = self.model.generate_response(prompt)
+            with st.chat_message("Gemma"):
+                st.markdown(response)
+            st.session_state.messages.append({"role": "Gemma", "content": response})
+    def _build_states(self):
+        # Initialize chat history
+        if "messages" not in st.session_state:
+            st.session_state.messages = []
+    def clear_history(self):
+        st.session_state.messages = []

gemmademo/_model.py ADDED Viewed

	@@ -0,0 +1,184 @@

+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+import torch
+from typing import Dict, Optional
+import streamlit as st
+torch.classes.__path__ = [] # add this line to manually set it to empty.
+def load_model(name: str, device_map: str = "cpu"):
+    """
+    Model loading function that loads the model without caching
+    """
+    tokenizer = AutoTokenizer.from_pretrained(name)
+    model = AutoModelForCausalLM.from_pretrained(
+        name,
+        torch_dtype=torch.bfloat16,
+        low_cpu_mem_usage=True,
+        device_map=device_map,
+        use_safetensors=True,
+        use_flash_attention_2=False,
+        use_cache=True,
+        load_in_8bit=True,
+    )
+    model = torch.compile(model, fullgraph=True, mode="reduce-overhead")
+    pipe = pipeline(
+        "text-generation",
+        model=model,
+        tokenizer=tokenizer,
+        device_map=device_map,
+        torch_dtype=torch.bfloat16,
+        do_sample=True,
+        temperature=0.7,
+        max_new_tokens=512,
+        pad_token_id=tokenizer.eos_token_id,
+        eos_token_id=tokenizer.eos_token_id,
+        return_full_text=False
+    )
+    return tokenizer, model, pipe
+class HuggingFaceGemmaModel:
+    """
+    A class for the Hugging Face Gemma model. Handles model selection, loading, and inference.
+    Uses transformers pipeline for better text generation and formatting.
+    Example
+    -------
+    Select Gemma 2B, 7B etc.
+    Additional Information:
+    ----------------------
+    Complete Information: https://huggingface.co/google/gemma-2b
+    Available Models:
+    - google/gemma-2b (2B parameters, base)
+    - google/gemma-2b-it (2B parameters, instruction-tuned)
+    - google/gemma-7b (7B parameters, base)
+    - google/gemma-7b-it (7B parameters, instruction-tuned)
+    """
+    AVAILABLE_MODELS: Dict[str, Dict] = {
+        "gemma-2b": {
+            "name": "google/gemma-2b",
+            "description": "2B parameters, base model",
+            "type": "base"
+        },
+        "gemma-2b-it": {
+            "name": "google/gemma-2b-it",
+            "description": "2B parameters, instruction-tuned",
+            "type": "instruct"
+        },
+        "gemma-7b": {
+            "name": "google/gemma-7b",
+            "description": "7B parameters, base model",
+            "type": "base"
+        },
+        "gemma-7b-it": {
+            "name": "google/gemma-7b-it",
+            "description": "7B parameters, instruction-tuned",
+            "type": "instruct"
+        }
+    }
+    def __init__(self, name: str = "google/gemma-2b"):
+        self.name = name
+        self.model = None
+        self.tokenizer = None
+        self.pipeline = None
+    def load_model(self, device_map: str = "cpu"):
+        """
+        Load the model using session state
+        Args:
+            device_map: Device mapping strategy (should be "cpu" for CPU-only inference)
+        """
+        # Create a unique key for this model in session state
+        model_key = f"gemma_model_{self.name}"
+        tokenizer_key = f"gemma_tokenizer_{self.name}"
+        pipeline_key = f"gemma_pipeline_{self.name}"
+        # Check if model is already loaded in session state
+        if (model_key not in st.session_state or
+            tokenizer_key not in st.session_state or
+            pipeline_key not in st.session_state):
+            # Show loading indicator
+            with st.spinner(f"Loading {self.name}..."):
+                tokenizer, model, pipe = load_model(self.name, device_map)
+                # Store in session state
+                st.session_state[tokenizer_key] = tokenizer
+                st.session_state[model_key] = model
+                st.session_state[pipeline_key] = pipe
+        # Get model from session state
+        self.tokenizer = st.session_state[tokenizer_key]
+        self.model = st.session_state[model_key]
+        self.pipeline = st.session_state[pipeline_key]
+        return self
+    def generate_response(
+        self,
+        prompt: str,
+        max_length: int = 512,
+        temperature: float = 0.7,
+        num_return_sequences: int = 1,
+        **kwargs
+    ) -> str:
+        """
+        Generate a response using the text generation pipeline
+        Args:
+            prompt: Input text
+            max_length: Maximum number of new tokens to generate
+            temperature: Sampling temperature (higher = more creative)
+            num_return_sequences: Number of responses to generate
+            **kwargs: Additional generation parameters for the pipeline
+        Returns:
+            str: Generated response
+        """
+        if not self.pipeline:
+            self.load_model()
+        # Update generation config with any provided kwargs
+        generation_config = {
+            "max_new_tokens": max_length,
+            "temperature": temperature,
+            "num_return_sequences": num_return_sequences,
+            "do_sample": True,
+            **kwargs
+        }
+        # Generate response using the pipeline
+        outputs = self.pipeline(
+            prompt,
+            **generation_config
+        )
+        # Extract the generated text
+        if num_return_sequences == 1:
+            response = outputs[0]["generated_text"]
+        else:
+            # Join multiple sequences if requested
+            response = "\n---\n".join(output["generated_text"] for output in outputs)
+        return response.strip()
+    def get_model_info(self) -> Dict:
+        """Return information about the model"""
+        return {
+            "name": self.name,
+            "loaded": self.model is not None,
+            "pipeline_ready": self.pipeline is not None
+        }
+    def get_model_name(self) -> str:
+        """Return the name of the model"""
+        return self.name

gemmademo/_prompts.py ADDED Viewed

	@@ -0,0 +1,43 @@

+class PromptManager:
+    def __init__(self, task):
+        self.task = task
+    def get_prompt(self, user_input):
+        if self.task == "Question Answering":
+            return self.get_question_answering_prompt(user_input)
+        elif self.task == "Text Generation":
+            return self.get_text_generation_prompt(user_input)
+        elif self.task == "Code Completion":
+            return self.get_code_completion_prompt(user_input)
+        else:
+            raise ValueError(f"Task {self.task} not supported")
+    def get_question_answering_prompt(self, user_input):
+        """
+        Format user input for question answering task
+        """
+        prompt = f"""You are a helpful AI assistant. Answer the following question accurately and concisely.
+        Question: {user_input}
+        Answer:"""
+        return prompt
+    def get_text_generation_prompt(self, user_input):
+        """
+        Format user input for text generation task
+        """
+        prompt = f"""Continue the following text in a coherent and engaging way:
+        {user_input}
+        Continuation:"""
+        return prompt
+    def get_code_completion_prompt(self, user_input):
+        """
+        Format user input for code completion task
+        """
+        prompt = f"""Complete the following code snippet with proper syntax and best practices:
+        {user_input}
+        Completed code:"""
+        return prompt

gemmademo/_utils.py ADDED Viewed

	@@ -0,0 +1,6 @@

+def huggingface_login(token: str):
+    """
+    Login to Hugging Face using the token
+    """
+    from huggingface_hub import login
+    login(token=token)

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+streamlit>=1.30.0
+transformers>=4.36.0
+torch>=2.1.0
+huggingface-hub>=0.19.0
+accelerate>=0.25.0
+bitsandbytes>=0.41.0
+safetensors>=0.4.0
+sentencepiece>=0.1.99
+protobuf>=4.25.0