Sarah Bentley commited on
Commit
86e3856
·
1 Parent(s): 11d501b

adding metadata

Browse files
Files changed (7) hide show
  1. .gitignore +2 -1
  2. README.md +21 -3
  3. app.py +11 -5
  4. chatbot_development.ipynb +27 -31
  5. requirements.txt +9 -6
  6. src/chat.py +60 -2
  7. src/model.py +38 -20
.gitignore CHANGED
@@ -2,4 +2,5 @@ venv/
2
  __pycache__/
3
  .env
4
  *.pyc
5
- .ipynb_checkpoints/
 
 
2
  __pycache__/
3
  .env
4
  *.pyc
5
+ .ipynb_checkpoints/
6
+ models/
README.md CHANGED
@@ -1,11 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
  # Boston Public School Selection Chatbot
2
 
3
- This is a skeleton repo you can use to design a school choice chatbot. Feel free to change it however you'd like! The end goal: make the chatbot and upload it to a huggingface space. Instructions for doing so are below.
 
 
 
 
4
 
5
  ## Setup
6
 
7
- 1. Install the required dependencies:
8
  ```bash
 
 
9
  pip install -r requirements.txt
10
  ```
11
 
@@ -28,7 +46,7 @@ To deploy your chatbot as a free web interface using Hugging Face Spaces:
28
 
29
  1. Create a Hugging Face Space:
30
  - Go to [Hugging Face Spaces](https://huggingface.co/spaces)
31
- - Click "Create new Space"
32
  - Choose a name for your space (e.g., "boston-school-chatbot")
33
  - Select "Gradio" as the SDK
34
  - Choose "CPU" as the hardware (free tier)
 
1
+ ---
2
+ title: Boston Public School Choice
3
+ emoji: 🚀
4
+ colorFrom: blue
5
+ colorTo: red
6
+ sdk: gradio
7
+ sdk_version: 3.50.2
8
+ python_version: 3.10
9
+ app_file: app.py
10
+ pinned: false
11
+ ---
12
+
13
  # Boston Public School Selection Chatbot
14
 
15
+ This is a skeleton repo you can use to design a school choice chatbot. Feel free to change it however you'd like! This repo is compatible with CPU (using your own computer). Loading the model and running inference might be a little slow, but it should be manageable. If you have access to your own GPUs you can use them as well, but we don't require it whatsoever.
16
+
17
+ The end goal: make the chatbot and upload it to a huggingface space. We have included instructions for interacting with huggingface below. Here's an example of the final output we made as an example:. Your chatbot should be much better!
18
+
19
+ Note: We encourage you to use AI tools (like Cursor or LLMs) to help you on this assignment. Learn how to leverage these tools.
20
 
21
  ## Setup
22
 
23
+ 1. Make a virtual environment and install the required dependencies:
24
  ```bash
25
+ python -m venv venv
26
+ source venv/bin/activate
27
  pip install -r requirements.txt
28
  ```
29
 
 
46
 
47
  1. Create a Hugging Face Space:
48
  - Go to [Hugging Face Spaces](https://huggingface.co/spaces)
49
+ - Click "New Space"
50
  - Choose a name for your space (e.g., "boston-school-chatbot")
51
  - Select "Gradio" as the SDK
52
  - Choose "CPU" as the hardware (free tier)
app.py CHANGED
@@ -19,7 +19,7 @@ Example Usage:
19
  """
20
 
21
  import gradio as gr
22
- from src.model import load_model, generate_response
23
  from src.chat import SchoolChatbot
24
 
25
  def create_chatbot():
@@ -62,16 +62,22 @@ def create_chatbot():
62
  - Return that response as a string
63
  """
64
  # TODO: Generate and return response
65
- pass
 
 
 
 
 
 
66
 
67
- # Create Gradio interface
 
68
  demo = gr.ChatInterface(
69
  chat,
70
  title="Boston Public School Selection Assistant",
71
  description="Ask me anything about Boston public schools!",
72
  examples=[
73
- "What schools in Jamaica Plain offer Spanish programs?",
74
- "How do I schedule a tour of the Hernandez School?"
75
  ]
76
  )
77
 
 
19
  """
20
 
21
  import gradio as gr
22
+ from src.model import load_model
23
  from src.chat import SchoolChatbot
24
 
25
  def create_chatbot():
 
62
  - Return that response as a string
63
  """
64
  # TODO: Generate and return response
65
+ try:
66
+ # Generate response using our chatbot
67
+ response = chatbot.get_response(message)
68
+ return response
69
+
70
+ except Exception as e:
71
+ return f"I apologize, but I encountered an error. Please try again. Error: {str(e)}"
72
 
73
+
74
+ # Create Gradio interface. Customize the interface as you'd like!
75
  demo = gr.ChatInterface(
76
  chat,
77
  title="Boston Public School Selection Assistant",
78
  description="Ask me anything about Boston public schools!",
79
  examples=[
80
+ "I live in Jamaica Plain and want to send my child to kindergarten. What schools are available?"
 
81
  ]
82
  )
83
 
chatbot_development.ipynb CHANGED
@@ -18,27 +18,16 @@
18
  },
19
  {
20
  "cell_type": "code",
21
- "execution_count": null,
22
  "metadata": {},
23
- "outputs": [
24
- {
25
- "ename": "",
26
- "evalue": "",
27
- "output_type": "error",
28
- "traceback": [
29
- "\u001b[1;31mRunning cells with 'Python 3.11.6' requires the ipykernel package.\n",
30
- "\u001b[1;31mRun the following command to install 'ipykernel' into the Python environment. \n",
31
- "\u001b[1;31mCommand: '/usr/local/bin/python3 -m pip install ipykernel -U --user --force-reinstall'"
32
- ]
33
- }
34
- ],
35
  "source": [
36
  "import torch\n",
37
  "from huggingface_hub import login\n",
38
  "\n",
39
  "\n",
40
- "from model import load_model, save_model\n",
41
- "from chat import SchoolChatbot"
42
  ]
43
  },
44
  {
@@ -50,12 +39,12 @@
50
  "\"\"\"\n",
51
  "TODO: Add your Hugging Face token\n",
52
  "Options:\n",
53
- "1. Use login() and enter token when prompted\n",
54
  "2. Set environment variable HUGGINGFACE_TOKEN\n",
55
  "3. Pass token directly (not recommended for shared notebooks)\n",
56
  "\"\"\"\n",
57
  "\n",
58
- "# login() # Uncomment this line and add your token\n",
59
  "\n"
60
  ]
61
  },
@@ -74,7 +63,6 @@
74
  "source": [
75
  "\"\"\"\n",
76
  "Load the model using functions from model.py\n",
77
- "Note: This might take a few minutes depending on your hardware\n",
78
  "\"\"\"\n",
79
  "\n",
80
  "model, tokenizer = load_model()\n",
@@ -93,7 +81,7 @@
93
  },
94
  {
95
  "cell_type": "code",
96
- "execution_count": null,
97
  "metadata": {},
98
  "outputs": [],
99
  "source": [
@@ -110,17 +98,14 @@
110
  "outputs": [],
111
  "source": [
112
  "\"\"\"\n",
113
- "Test out generating some responses from the chatbot\n",
 
114
  "\"\"\"\n",
115
- "test_questions = [\n",
116
- " \"What schools in Jamaica Plain offer Spanish programs?\",\n",
117
- " \"How do I schedule a tour of the Hernandez School?\"\n",
118
- "]\n",
119
  "\n",
120
- "for question in test_questions:\n",
121
- " print(f\"\\nQuestion: {question}\")\n",
122
- " response = chatbot.get_response(question)\n",
123
- " print(f\"Response: {response}\")\n"
124
  ]
125
  },
126
  {
@@ -129,7 +114,9 @@
129
  "source": [
130
  "# TODO: Update pre-trained Llama to be a school choice chatbot\n",
131
  "\n",
132
- "This part is up to you! You might want to finetune the model, simply make a really good system prompt, use RAG, provide it boston school choice data somehow, etc. Be creative! You can also feel free to do this in another script and then evaluate the model here."
 
 
133
  ]
134
  },
135
  {
@@ -139,19 +126,28 @@
139
  "outputs": [],
140
  "source": [
141
  "# If you update the model, you can use the `save_model` function from model.py to save the new model\n",
 
142
  "save_model(model, tokenizer)\n"
143
  ]
144
  }
145
  ],
146
  "metadata": {
147
  "kernelspec": {
148
- "display_name": "Python 3",
149
  "language": "python",
150
  "name": "python3"
151
  },
152
  "language_info": {
 
 
 
 
 
 
153
  "name": "python",
154
- "version": "3.11.6"
 
 
155
  }
156
  },
157
  "nbformat": 4,
 
18
  },
19
  {
20
  "cell_type": "code",
21
+ "execution_count": 11,
22
  "metadata": {},
23
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
24
  "source": [
25
  "import torch\n",
26
  "from huggingface_hub import login\n",
27
  "\n",
28
  "\n",
29
+ "from src.model import load_model, save_model\n",
30
+ "from src.chat import SchoolChatbot"
31
  ]
32
  },
33
  {
 
39
  "\"\"\"\n",
40
  "TODO: Add your Hugging Face token\n",
41
  "Options:\n",
42
+ "1. Use login() and enter token when prompted. It won't ask for your token if you already logged in using the command: huggingface-cli login in the terminal.\n",
43
  "2. Set environment variable HUGGINGFACE_TOKEN\n",
44
  "3. Pass token directly (not recommended for shared notebooks)\n",
45
  "\"\"\"\n",
46
  "\n",
47
+ "login()\n",
48
  "\n"
49
  ]
50
  },
 
63
  "source": [
64
  "\"\"\"\n",
65
  "Load the model using functions from model.py\n",
 
66
  "\"\"\"\n",
67
  "\n",
68
  "model, tokenizer = load_model()\n",
 
81
  },
82
  {
83
  "cell_type": "code",
84
+ "execution_count": 14,
85
  "metadata": {},
86
  "outputs": [],
87
  "source": [
 
98
  "outputs": [],
99
  "source": [
100
  "\"\"\"\n",
101
+ "Test out generating some responses from the chatbot.\n",
102
+ "Inference time\n",
103
  "\"\"\"\n",
104
+ "test_question = \"I live in Jamaica Plain and want to send my child to a school that offers Spanish programs. What schools are available?\"\n",
 
 
 
105
  "\n",
106
+ "print(f\"\\nQuestion: {test_question}\")\n",
107
+ "response = chatbot.get_response(test_question)\n",
108
+ "print(f\"Response: {response}\")\n"
 
109
  ]
110
  },
111
  {
 
114
  "source": [
115
  "# TODO: Update pre-trained Llama to be a school choice chatbot\n",
116
  "\n",
117
+ "This part is up to you! You might want to finetune the model, simply make a really good system prompt, use RAG, provide it boston school choice data somehow, etc. Be creative! If you choose to finetune the model, we recommend using LoRA.\n",
118
+ "\n",
119
+ "You can also feel free to do this in another script and then evaluate the model here."
120
  ]
121
  },
122
  {
 
126
  "outputs": [],
127
  "source": [
128
  "# If you update the model, you can use the `save_model` function from model.py to save the new model\n",
129
+ "# Note: This might take a few minutes depending on your hardware. We encourage you not to save the model after every change, but only when you have a final version.\n",
130
  "save_model(model, tokenizer)\n"
131
  ]
132
  }
133
  ],
134
  "metadata": {
135
  "kernelspec": {
136
+ "display_name": "venv",
137
  "language": "python",
138
  "name": "python3"
139
  },
140
  "language_info": {
141
+ "codemirror_mode": {
142
+ "name": "ipython",
143
+ "version": 3
144
+ },
145
+ "file_extension": ".py",
146
+ "mimetype": "text/x-python",
147
  "name": "python",
148
+ "nbconvert_exporter": "python",
149
+ "pygments_lexer": "ipython3",
150
+ "version": "3.9.12"
151
  }
152
  },
153
  "nbformat": 4,
requirements.txt CHANGED
@@ -1,6 +1,9 @@
1
- torch>=2.0.0
2
- transformers>=4.30.0
3
- datasets>=2.12.0
4
- accelerate>=0.20.0
5
- sentencepiece>=0.1.99
6
- gradio>=3.40.0
 
 
 
 
1
+ torch>=2.1.0
2
+ transformers>=4.34.0
3
+ datasets>=2.14.0
4
+ accelerate>=0.24.0
5
+ sentencepiece>=0.1.99
6
+ gradio>=3.50.0
7
+ huggingface-hub>=0.19.0
8
+ numpy<2.0.0
9
+ ipywidgets>=8.0.0
src/chat.py CHANGED
@@ -1,3 +1,6 @@
 
 
 
1
  class SchoolChatbot:
2
  """
3
  This class is extra scaffolding around a model. Modify this class to specify how the model recieves prompts and generates responses.
@@ -36,7 +39,19 @@ class SchoolChatbot:
36
  User: {user_input}
37
  Assistant:"
38
  """
39
- pass
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
  def get_response(self, user_input):
42
  """
@@ -59,4 +74,47 @@ class SchoolChatbot:
59
  - Consider parameters like temperature and max_length
60
  - Clean up the response before returning it
61
  """
62
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import gc
3
+
4
  class SchoolChatbot:
5
  """
6
  This class is extra scaffolding around a model. Modify this class to specify how the model recieves prompts and generates responses.
 
39
  User: {user_input}
40
  Assistant:"
41
  """
42
+ system_prompt = """You are a helpful assistant that specializes in helping parents choose Boston public schools.
43
+ You provide accurate information about school programs, locations, enrollment processes, and other important details.
44
+ Always be professional, clear, and focused on helping parents make informed decisions about schools.
45
+ """
46
+
47
+ # Combine system prompt with user input
48
+ formatted_prompt = f"""
49
+ {system_prompt}
50
+
51
+ User: {user_input}
52
+ Assistant:"""
53
+
54
+ return formatted_prompt
55
 
56
  def get_response(self, user_input):
57
  """
 
74
  - Consider parameters like temperature and max_length
75
  - Clean up the response before returning it
76
  """
77
+ prompt = self.format_prompt(user_input)
78
+
79
+ # Memory-efficient tokenization
80
+ print("Tokenizing...")
81
+ inputs = self.tokenizer(
82
+ prompt,
83
+ return_tensors="pt",
84
+ padding=True,
85
+ truncation=True,
86
+ max_length=256 # Reduced input length for CPU
87
+ )
88
+
89
+ # Memory-efficient generation
90
+ print("Generating...")
91
+ with torch.inference_mode():
92
+ outputs = self.model.generate(
93
+ inputs['input_ids'], # Changed to directly use input_ids
94
+ attention_mask=inputs['attention_mask'] if 'attention_mask' in inputs else None,
95
+ max_new_tokens=150, # Reduced output length for CPU
96
+ temperature=0.7,
97
+ top_p=0.95,
98
+ do_sample=True,
99
+ pad_token_id=self.tokenizer.eos_token_id,
100
+ repetition_penalty=1.2,
101
+ num_return_sequences=1,
102
+ early_stopping=True
103
+ )
104
+
105
+ # Clean up memory
106
+ del inputs
107
+ gc.collect() # Force garbage collection
108
+
109
+ response = self.tokenizer.decode(
110
+ outputs[0],
111
+ skip_special_tokens=True,
112
+ clean_up_tokenization_spaces=True
113
+ )
114
+
115
+ # Clean up more memory
116
+ del outputs
117
+ gc.collect()
118
+
119
+ response = response.split("Assistant:")[-1].strip()
120
+ return response
src/model.py CHANGED
@@ -21,6 +21,7 @@ Example Usage:
21
  import os
22
  from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
23
  import torch
 
24
 
25
  # Choose a model
26
  MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" # Change this to your preferred model
@@ -34,53 +35,70 @@ MODEL_SAVE_PATH = "models/school_chatbot"
34
 
35
  def save_model(model, tokenizer, save_directory="models/school_chatbot"):
36
  """
37
- Save the model and tokenizer to a local directory
38
  """
39
  # Create directory if it doesn't exist
40
  os.makedirs(save_directory, exist_ok=True)
41
 
42
- # Save model and tokenizer
43
- model.save_pretrained(save_directory)
44
- tokenizer.save_pretrained(save_directory)
45
 
46
- print(f"Model and tokenizer saved to {save_directory}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
 
49
  def load_model():
50
  """
51
- Load the model with 4-bit quantization
52
  """
53
  try:
54
- # Use quantization to reduce memory usage
55
- quantization_config = BitsAndBytesConfig(
56
- load_in_4bit=True, # Enable 4-bit quantization
57
- bnb_4bit_compute_dtype=torch.float16, # Compute dtype
58
- bnb_4bit_quant_type="nf4", # Normalized float 4 format
59
- bnb_4bit_use_double_quant=True # Use nested quantization
60
- )
61
-
62
  if os.path.exists(MODEL_SAVE_PATH):
63
- print("Loading quantized model from local storage...")
64
  tokenizer = AutoTokenizer.from_pretrained(MODEL_SAVE_PATH)
65
  model = AutoModelForCausalLM.from_pretrained(
66
  MODEL_SAVE_PATH,
67
- quantization_config=quantization_config,
68
- device_map="auto"
69
  )
70
  else:
71
- print("Downloading and quantizing model from Hugging Face...")
72
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
73
  model = AutoModelForCausalLM.from_pretrained(
74
  MODEL_NAME,
75
- quantization_config=quantization_config,
76
- device_map="auto"
77
  )
78
  # Save for future use
79
  save_model(model, tokenizer)
80
 
 
 
81
  return model, tokenizer
82
 
83
  except Exception as e:
84
  print(f"Error loading model: {e}")
85
  return None, None
86
 
 
 
 
 
 
21
  import os
22
  from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
23
  import torch
24
+ import gc
25
 
26
  # Choose a model
27
  MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" # Change this to your preferred model
 
35
 
36
  def save_model(model, tokenizer, save_directory="models/school_chatbot"):
37
  """
38
+ Save the model and tokenizer to a local directory with CPU memory optimization
39
  """
40
  # Create directory if it doesn't exist
41
  os.makedirs(save_directory, exist_ok=True)
42
 
43
+ # Move model to CPU if it's on GPU
44
+ model = model.cpu()
 
45
 
46
+ # Save in half precision to reduce file size
47
+ model.half() # Convert to float16
48
+
49
+ try:
50
+ # Save in smaller chunks
51
+ model.save_pretrained(
52
+ save_directory,
53
+ safe_serialization=True, # More memory efficient serialization
54
+ max_shard_size="500MB" # Split into smaller files
55
+ )
56
+
57
+ # Save tokenizer (relatively small, no special handling needed)
58
+ tokenizer.save_pretrained(save_directory)
59
+
60
+ print(f"Model and tokenizer saved to {save_directory}")
61
+ finally:
62
+ # Clean up memory
63
+ gc.collect()
64
+
65
+ # Convert back to float32 for continued use if needed
66
+ model.float()
67
 
68
 
69
  def load_model():
70
  """
71
+ Load the model for CPU usage
72
  """
73
  try:
 
 
 
 
 
 
 
 
74
  if os.path.exists(MODEL_SAVE_PATH):
75
+ print("Loading model from local storage...")
76
  tokenizer = AutoTokenizer.from_pretrained(MODEL_SAVE_PATH)
77
  model = AutoModelForCausalLM.from_pretrained(
78
  MODEL_SAVE_PATH,
79
+ low_cpu_mem_usage=True,
80
+ torch_dtype=torch.float32
81
  )
82
  else:
83
+ print("Downloading model from Hugging Face... Should take 2-3 minutes.")
84
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
85
  model = AutoModelForCausalLM.from_pretrained(
86
  MODEL_NAME,
87
+ low_cpu_mem_usage=True,
88
+ torch_dtype=torch.float32
89
  )
90
  # Save for future use
91
  save_model(model, tokenizer)
92
 
93
+ # Move model to CPU
94
+ model = model.to("cpu")
95
  return model, tokenizer
96
 
97
  except Exception as e:
98
  print(f"Error loading model: {e}")
99
  return None, None
100
 
101
+ if __name__ == "__main__":
102
+ model, tokenizer = load_model()
103
+ print(model)
104
+ print(tokenizer)