Spaces:

laur0613
/

chatbot

Build error

App Files Files Community

Sarah Bentley commited on Apr 4, 2025

Commit

86e3856

1 Parent(s): 11d501b

adding metadata

Browse files

Files changed (7) hide show

.gitignore +2 -1
README.md +21 -3
app.py +11 -5
chatbot_development.ipynb +27 -31
requirements.txt +9 -6
src/chat.py +60 -2
src/model.py +38 -20

.gitignore CHANGED Viewed

@@ -2,4 +2,5 @@ venv/
 __pycache__/
 .env
 *.pyc
-.ipynb_checkpoints/

 __pycache__/
 .env
 *.pyc
+.ipynb_checkpoints/
+models/

README.md CHANGED Viewed

@@ -1,11 +1,29 @@
 # Boston Public School Selection Chatbot
-This is a skeleton repo you can use to design a school choice chatbot. Feel free to change it however you'd like! The end goal: make the chatbot and upload it to a huggingface space. Instructions for doing so are below.
 ## Setup
-1. Install the required dependencies:
 ```bash
 pip install -r requirements.txt
 ```
@@ -28,7 +46,7 @@ To deploy your chatbot as a free web interface using Hugging Face Spaces:
 1. Create a Hugging Face Space:
    - Go to [Hugging Face Spaces](https://huggingface.co/spaces)
-   - Click "Create new Space"
    - Choose a name for your space (e.g., "boston-school-chatbot")
    - Select "Gradio" as the SDK
    - Choose "CPU" as the hardware (free tier)

+---
+title: Boston Public School Choice
+emoji: 🚀
+colorFrom: blue
+colorTo: red
+sdk: gradio
+sdk_version: 3.50.2
+python_version: 3.10
+app_file: app.py
+pinned: false
+---
 # Boston Public School Selection Chatbot
+This is a skeleton repo you can use to design a school choice chatbot. Feel free to change it however you'd like! This repo is compatible with CPU (using your own computer). Loading the model and running inference might be a little slow, but it should be manageable. If you have access to your own GPUs you can use them as well, but we don't require it whatsoever.
+The end goal: make the chatbot and upload it to a huggingface space. We have included instructions for interacting with huggingface below. Here's an example of the final output we made as an example:. Your chatbot should be much better!
+Note: We encourage you to use AI tools (like Cursor or LLMs) to help you on this assignment. Learn how to leverage these tools.
 ## Setup
+1. Make a virtual environment and install the required dependencies:
 ```bash
+python -m venv venv
+source venv/bin/activate
 pip install -r requirements.txt
 ```
 1. Create a Hugging Face Space:
    - Go to [Hugging Face Spaces](https://huggingface.co/spaces)
+   - Click "New Space"
    - Choose a name for your space (e.g., "boston-school-chatbot")
    - Select "Gradio" as the SDK
    - Choose "CPU" as the hardware (free tier)

app.py CHANGED Viewed

@@ -19,7 +19,7 @@ Example Usage:
 """
 import gradio as gr
-from src.model import load_model, generate_response
 from src.chat import SchoolChatbot
 def create_chatbot():
@@ -62,16 +62,22 @@ def create_chatbot():
                 - Return that response as a string
         """
         # TODO: Generate and return response
-        pass
-    # Create Gradio interface
     demo = gr.ChatInterface(
         chat,
         title="Boston Public School Selection Assistant",
         description="Ask me anything about Boston public schools!",
         examples=[
-            "What schools in Jamaica Plain offer Spanish programs?",
-            "How do I schedule a tour of the Hernandez School?"
         ]
     )

 """
 import gradio as gr
+from src.model import load_model
 from src.chat import SchoolChatbot
 def create_chatbot():
                 - Return that response as a string
         """
         # TODO: Generate and return response
+        try:
+            # Generate response using our chatbot
+            response = chatbot.get_response(message)
+            return response
+        except Exception as e:
+            return f"I apologize, but I encountered an error. Please try again. Error: {str(e)}"
+    # Create Gradio interface. Customize the interface as you'd like!
     demo = gr.ChatInterface(
         chat,
         title="Boston Public School Selection Assistant",
         description="Ask me anything about Boston public schools!",
         examples=[
+            "I live in Jamaica Plain and want to send my child to kindergarten. What schools are available?"
         ]
     )

chatbot_development.ipynb CHANGED Viewed

@@ -18,27 +18,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "ename": "",
-     "evalue": "",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[1;31mRunning cells with 'Python 3.11.6' requires the ipykernel package.\n",
-      "\u001b[1;31mRun the following command to install 'ipykernel' into the Python environment. \n",
-      "\u001b[1;31mCommand: '/usr/local/bin/python3 -m pip install ipykernel -U --user --force-reinstall'"
-     ]
-    }
-   ],
    "source": [
     "import torch\n",
     "from huggingface_hub import login\n",
     "\n",
     "\n",
-    "from model import load_model, save_model\n",
-    "from chat import SchoolChatbot"
    ]
   },
   {
@@ -50,12 +39,12 @@
     "\"\"\"\n",
     "TODO: Add your Hugging Face token\n",
     "Options:\n",
-    "1. Use login() and enter token when prompted\n",
     "2. Set environment variable HUGGINGFACE_TOKEN\n",
     "3. Pass token directly (not recommended for shared notebooks)\n",
     "\"\"\"\n",
     "\n",
-    "# login()  # Uncomment this line and add your token\n",
     "\n"
    ]
   },
@@ -74,7 +63,6 @@
    "source": [
     "\"\"\"\n",
     "Load the model using functions from model.py\n",
-    "Note: This might take a few minutes depending on your hardware\n",
     "\"\"\"\n",
     "\n",
     "model, tokenizer = load_model()\n",
@@ -93,7 +81,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -110,17 +98,14 @@
    "outputs": [],
    "source": [
     "\"\"\"\n",
-    "Test out generating some responses from the chatbot\n",
     "\"\"\"\n",
-    "test_questions = [\n",
-    "    \"What schools in Jamaica Plain offer Spanish programs?\",\n",
-    "    \"How do I schedule a tour of the Hernandez School?\"\n",
-    "]\n",
     "\n",
-    "for question in test_questions:\n",
-    "    print(f\"\\nQuestion: {question}\")\n",
-    "    response = chatbot.get_response(question)\n",
-    "    print(f\"Response: {response}\")\n"
    ]
   },
   {
@@ -129,7 +114,9 @@
    "source": [
     "# TODO: Update pre-trained Llama to be a school choice chatbot\n",
     "\n",
-    "This part is up to you! You might want to finetune the model, simply make a really good system prompt, use RAG, provide it boston school choice data somehow, etc. Be creative! You can also feel free to do this in another script and then evaluate the model here."
    ]
   },
   {
@@ -139,19 +126,28 @@
    "outputs": [],
    "source": [
     "# If you update the model, you can use the `save_model` function from model.py to save the new model\n",
     "save_model(model, tokenizer)\n"
    ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   },
   "language_info": {
    "name": "python",
-   "version": "3.11.6"
   }
  },
  "nbformat": 4,

   },
   {
    "cell_type": "code",
+   "execution_count": 11,
    "metadata": {},
+   "outputs": [],
    "source": [
     "import torch\n",
     "from huggingface_hub import login\n",
     "\n",
     "\n",
+    "from src.model import load_model, save_model\n",
+    "from src.chat import SchoolChatbot"
    ]
   },
   {
     "\"\"\"\n",
     "TODO: Add your Hugging Face token\n",
     "Options:\n",
+    "1. Use login() and enter token when prompted. It won't ask for your token if you already logged in using the command: huggingface-cli login in the terminal.\n",
     "2. Set environment variable HUGGINGFACE_TOKEN\n",
     "3. Pass token directly (not recommended for shared notebooks)\n",
     "\"\"\"\n",
     "\n",
+    "login()\n",
     "\n"
    ]
   },
    "source": [
     "\"\"\"\n",
     "Load the model using functions from model.py\n",
     "\"\"\"\n",
     "\n",
     "model, tokenizer = load_model()\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 14,
    "metadata": {},
    "outputs": [],
    "source": [
    "outputs": [],
    "source": [
     "\"\"\"\n",
+    "Test out generating some responses from the chatbot.\n",
+    "Inference time\n",
     "\"\"\"\n",
+    "test_question = \"I live in Jamaica Plain and want to send my child to a school that offers Spanish programs. What schools are available?\"\n",
     "\n",
+    "print(f\"\\nQuestion: {test_question}\")\n",
+    "response = chatbot.get_response(test_question)\n",
+    "print(f\"Response: {response}\")\n"
    ]
   },
   {
    "source": [
     "# TODO: Update pre-trained Llama to be a school choice chatbot\n",
     "\n",
+    "This part is up to you! You might want to finetune the model, simply make a really good system prompt, use RAG, provide it boston school choice data somehow, etc. Be creative! If you choose to finetune the model, we recommend using LoRA.\n",
+    "\n",
+    "You can also feel free to do this in another script and then evaluate the model here."
    ]
   },
   {
    "outputs": [],
    "source": [
     "# If you update the model, you can use the `save_model` function from model.py to save the new model\n",
+    "# Note: This might take a few minutes depending on your hardware. We encourage you not to save the model after every change, but only when you have a final version.\n",
     "save_model(model, tokenizer)\n"
    ]
   }
  ],
  "metadata": {
   "kernelspec": {
+   "display_name": "venv",
    "language": "python",
    "name": "python3"
   },
   "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
    "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.12"
   }
  },
  "nbformat": 4,

requirements.txt CHANGED Viewed

@@ -1,6 +1,9 @@
-torch>=2.0.0
-transformers>=4.30.0
-datasets>=2.12.0
-accelerate>=0.20.0
-sentencepiece>=0.1.99
-gradio>=3.40.0

+torch>=2.1.0
+transformers>=4.34.0
+datasets>=2.14.0
+accelerate>=0.24.0
+sentencepiece>=0.1.99
+gradio>=3.50.0
+huggingface-hub>=0.19.0
+numpy<2.0.0
+ipywidgets>=8.0.0

src/chat.py CHANGED Viewed

@@ -1,3 +1,6 @@
 class SchoolChatbot:
     """
     This class is extra scaffolding around a model. Modify this class to specify how the model recieves prompts and generates responses.
@@ -36,7 +39,19 @@ class SchoolChatbot:
              User: {user_input}
              Assistant:"
         """
-        pass
     def get_response(self, user_input):
         """
@@ -59,4 +74,47 @@ class SchoolChatbot:
         - Consider parameters like temperature and max_length
         - Clean up the response before returning it
         """
-        pass

+import torch
+import gc
 class SchoolChatbot:
     """
     This class is extra scaffolding around a model. Modify this class to specify how the model recieves prompts and generates responses.
              User: {user_input}
              Assistant:"
         """
+        system_prompt = """You are a helpful assistant that specializes in helping parents choose Boston public schools.
+        You provide accurate information about school programs, locations, enrollment processes, and other important details.
+        Always be professional, clear, and focused on helping parents make informed decisions about schools.
+        """
+        # Combine system prompt with user input
+        formatted_prompt = f"""
+        {system_prompt}
+        User: {user_input}
+        Assistant:"""
+        return formatted_prompt
     def get_response(self, user_input):
         """
         - Consider parameters like temperature and max_length
         - Clean up the response before returning it
         """
+        prompt = self.format_prompt(user_input)
+        # Memory-efficient tokenization
+        print("Tokenizing...")
+        inputs = self.tokenizer(
+            prompt,
+            return_tensors="pt",
+            padding=True,
+            truncation=True,
+            max_length=256    # Reduced input length for CPU
+        )
+        # Memory-efficient generation
+        print("Generating...")
+        with torch.inference_mode():
+            outputs = self.model.generate(
+                inputs['input_ids'],    # Changed to directly use input_ids
+                attention_mask=inputs['attention_mask'] if 'attention_mask' in inputs else None,
+                max_new_tokens=150,     # Reduced output length for CPU
+                temperature=0.7,
+                top_p=0.95,
+                do_sample=True,
+                pad_token_id=self.tokenizer.eos_token_id,
+                repetition_penalty=1.2,
+                num_return_sequences=1,
+                early_stopping=True
+            )
+        # Clean up memory
+        del inputs
+        gc.collect()     # Force garbage collection
+        response = self.tokenizer.decode(
+            outputs[0],
+            skip_special_tokens=True,
+            clean_up_tokenization_spaces=True
+        )
+        # Clean up more memory
+        del outputs
+        gc.collect()
+        response = response.split("Assistant:")[-1].strip()
+        return response

src/model.py CHANGED Viewed

@@ -21,6 +21,7 @@ Example Usage:
 import os
 from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 import torch
 # Choose a model
 MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"  # Change this to your preferred model
@@ -34,53 +35,70 @@ MODEL_SAVE_PATH = "models/school_chatbot"
 def save_model(model, tokenizer, save_directory="models/school_chatbot"):
     """
-    Save the model and tokenizer to a local directory
     """
     # Create directory if it doesn't exist
     os.makedirs(save_directory, exist_ok=True)
-    # Save model and tokenizer
-    model.save_pretrained(save_directory)
-    tokenizer.save_pretrained(save_directory)
-    print(f"Model and tokenizer saved to {save_directory}")
 def load_model():
     """
-    Load the model with 4-bit quantization
     """
     try:
-        # Use quantization to reduce memory usage
-        quantization_config = BitsAndBytesConfig(
-            load_in_4bit=True,              # Enable 4-bit quantization
-            bnb_4bit_compute_dtype=torch.float16,  # Compute dtype
-            bnb_4bit_quant_type="nf4",     # Normalized float 4 format
-            bnb_4bit_use_double_quant=True # Use nested quantization
-        )
         if os.path.exists(MODEL_SAVE_PATH):
-            print("Loading quantized model from local storage...")
             tokenizer = AutoTokenizer.from_pretrained(MODEL_SAVE_PATH)
             model = AutoModelForCausalLM.from_pretrained(
                 MODEL_SAVE_PATH,
-                quantization_config=quantization_config,
-                device_map="auto"
             )
         else:
-            print("Downloading and quantizing model from Hugging Face...")
             tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
             model = AutoModelForCausalLM.from_pretrained(
                 MODEL_NAME,
-                quantization_config=quantization_config,
-                device_map="auto"
             )
             # Save for future use
             save_model(model, tokenizer)
         return model, tokenizer
     except Exception as e:
         print(f"Error loading model: {e}")
         return None, None

 import os
 from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 import torch
+import gc
 # Choose a model
 MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"  # Change this to your preferred model
 def save_model(model, tokenizer, save_directory="models/school_chatbot"):
     """
+    Save the model and tokenizer to a local directory with CPU memory optimization
     """
     # Create directory if it doesn't exist
     os.makedirs(save_directory, exist_ok=True)
+    # Move model to CPU if it's on GPU
+    model = model.cpu()
+    # Save in half precision to reduce file size
+    model.half()  # Convert to float16
+    try:
+        # Save in smaller chunks
+        model.save_pretrained(
+            save_directory,
+            safe_serialization=True,  # More memory efficient serialization
+            max_shard_size="500MB"    # Split into smaller files
+        )
+        # Save tokenizer (relatively small, no special handling needed)
+        tokenizer.save_pretrained(save_directory)
+        print(f"Model and tokenizer saved to {save_directory}")
+    finally:
+        # Clean up memory
+        gc.collect()
+        # Convert back to float32 for continued use if needed
+        model.float()
 def load_model():
     """
+    Load the model for CPU usage
     """
     try:
         if os.path.exists(MODEL_SAVE_PATH):
+            print("Loading model from local storage...")
             tokenizer = AutoTokenizer.from_pretrained(MODEL_SAVE_PATH)
             model = AutoModelForCausalLM.from_pretrained(
                 MODEL_SAVE_PATH,
+                low_cpu_mem_usage=True,
+                torch_dtype=torch.float32
             )
         else:
+            print("Downloading model from Hugging Face... Should take 2-3 minutes.")
             tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
             model = AutoModelForCausalLM.from_pretrained(
                 MODEL_NAME,
+                low_cpu_mem_usage=True,
+                torch_dtype=torch.float32
             )
             # Save for future use
             save_model(model, tokenizer)
+        # Move model to CPU
+        model = model.to("cpu")
         return model, tokenizer
     except Exception as e:
         print(f"Error loading model: {e}")
         return None, None
+if __name__ == "__main__":
+    model, tokenizer = load_model()
+    print(model)
+    print(tokenizer)