Sarah Bentley commited on
Commit
e9e366a
·
1 Parent(s): 72ef416

updating to use huggingface more

Browse files
Files changed (7) hide show
  1. README.md +15 -11
  2. app.py +1 -3
  3. chatbot_development.ipynb +58 -45
  4. config.py +16 -0
  5. requirements.txt +2 -1
  6. src/chat.py +19 -47
  7. src/model.py +0 -104
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
- title: Boston Public School Choice
3
- emoji: 🚀
4
  colorFrom: blue
5
  colorTo: red
6
  sdk: gradio
@@ -8,7 +8,6 @@ sdk_version: 3.50.2
8
  python_version: 3.10
9
  app_file: app.py
10
  pinned: false
11
- repository_branch: staff-version
12
  ---
13
 
14
  # Boston Public School Selection Chatbot
@@ -28,14 +27,20 @@ source venv/bin/activate
28
  pip install -r requirements.txt
29
  ```
30
 
31
- 2. Get access to the LLaMA model:
32
- - Visit [Hugging Face](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf)
33
- - Request access to the LLaMA 2 model
34
- - Once approved, log in to Hugging Face:
 
 
35
  ```bash
36
  huggingface-cli login
37
  ```
38
 
 
 
 
 
39
  3. Run the chatbot:
40
  ```bash
41
  python app.py
@@ -73,7 +78,7 @@ To deploy your chatbot as a free web interface using Hugging Face Spaces:
73
  ```
74
 
75
  4. Important Free Tier Considerations:
76
- - Use TinyLlama model (already configured in model.py)
77
  - Free CPU spaces have 2GB RAM limit
78
  - Responses might be slower than local testing
79
  - The interface might queue requests when multiple users access it
@@ -113,14 +118,13 @@ boston-school-chatbot/
113
 
114
  - **app.py**: Creates the web interface using Gradio. You only need to implement the `chat` function that generates responses.
115
 
116
- - **model.py**: Handles loading and saving of LLaMA models. This is already implemented.
117
-
118
  - **chat.py**: Contains the `SchoolChatbot` class where you'll implement:
119
  - `format_prompt`: Format user input into proper prompts
120
  - `get_response`: Generate responses using the model
121
 
 
 
122
  - **chatbot_development.ipynb**: Jupyter notebook for:
123
- - Loading and testing your model
124
  - Experimenting with the chatbot
125
  - Trying different approaches
126
  - Testing responses before deployment
 
1
  ---
2
+ title: <Your Chatbot Title>
3
+ emoji: <Your Chatbot Emoji>
4
  colorFrom: blue
5
  colorTo: red
6
  sdk: gradio
 
8
  python_version: 3.10
9
  app_file: app.py
10
  pinned: false
 
11
  ---
12
 
13
  # Boston Public School Selection Chatbot
 
27
  pip install -r requirements.txt
28
  ```
29
 
30
+ 2. Make a HuggingFace account and make an access token:
31
+ - Visit [Hugging Face](https://huggingface.co)
32
+ - Make an account if you don't already have one
33
+ - Click on your profile, then "Access Tokens" and make a new token
34
+ - Make a .env file and save the token as HF_TOKEN
35
+ - Now, log in to Hugging Face in the terminal as well:
36
  ```bash
37
  huggingface-cli login
38
  ```
39
 
40
+ 3. Choose a base model:
41
+ - In config.py, set the BASE_MODEL variable to your base model of choice from HuggingFace.
42
+ - Keep in mind it's better to have a small, lightweight model if you plan on finetuning.
43
+
44
  3. Run the chatbot:
45
  ```bash
46
  python app.py
 
78
  ```
79
 
80
  4. Important Free Tier Considerations:
81
+ - Use free tier model (already configured in model.py)
82
  - Free CPU spaces have 2GB RAM limit
83
  - Responses might be slower than local testing
84
  - The interface might queue requests when multiple users access it
 
118
 
119
  - **app.py**: Creates the web interface using Gradio. You only need to implement the `chat` function that generates responses.
120
 
 
 
121
  - **chat.py**: Contains the `SchoolChatbot` class where you'll implement:
122
  - `format_prompt`: Format user input into proper prompts
123
  - `get_response`: Generate responses using the model
124
 
125
+ - **config.py**: Contains the `BASE_MODEL` and `MY_MODEL` variables, which are names of models on HuggingFace. Update the `MY_MODEL` variable if you create a new model and upload it to the HuggingFace Hub.
126
+
127
  - **chatbot_development.ipynb**: Jupyter notebook for:
 
128
  - Experimenting with the chatbot
129
  - Trying different approaches
130
  - Testing responses before deployment
app.py CHANGED
@@ -19,15 +19,13 @@ Example Usage:
19
  """
20
 
21
  import gradio as gr
22
- from src.model import load_model
23
  from src.chat import SchoolChatbot
24
 
25
  def create_chatbot():
26
  """
27
  Creates and configures the chatbot interface.
28
  """
29
- model, tokenizer = load_model()
30
- chatbot = SchoolChatbot(model, tokenizer)
31
 
32
  def chat(message, history):
33
  """
 
19
  """
20
 
21
  import gradio as gr
 
22
  from src.chat import SchoolChatbot
23
 
24
  def create_chatbot():
25
  """
26
  Creates and configures the chatbot interface.
27
  """
28
+ chatbot = SchoolChatbot()
 
29
 
30
  def chat(message, history):
31
  """
chatbot_development.ipynb CHANGED
@@ -18,7 +18,7 @@
18
  },
19
  {
20
  "cell_type": "code",
21
- "execution_count": 11,
22
  "metadata": {},
23
  "outputs": [],
24
  "source": [
@@ -26,15 +26,30 @@
26
  "from huggingface_hub import login\n",
27
  "\n",
28
  "\n",
29
- "from src.model import load_model, save_model\n",
30
- "from src.chat import SchoolChatbot"
31
  ]
32
  },
33
  {
34
  "cell_type": "code",
35
- "execution_count": null,
36
  "metadata": {},
37
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  "source": [
39
  "\"\"\"\n",
40
  "TODO: Add your Hugging Face token\n",
@@ -48,30 +63,6 @@
48
  "\n"
49
  ]
50
  },
51
- {
52
- "cell_type": "markdown",
53
- "metadata": {},
54
- "source": [
55
- "### Load model and tokenizer"
56
- ]
57
- },
58
- {
59
- "cell_type": "code",
60
- "execution_count": null,
61
- "metadata": {},
62
- "outputs": [],
63
- "source": [
64
- "\"\"\"\n",
65
- "Load the model using functions from model.py\n",
66
- "\"\"\"\n",
67
- "\n",
68
- "model, tokenizer = load_model()\n",
69
- "\n",
70
- "# Test model loading\n",
71
- "print(\"Model loaded:\", type(model))\n",
72
- "print(\"Tokenizer loaded:\", type(tokenizer))\n"
73
- ]
74
- },
75
  {
76
  "cell_type": "markdown",
77
  "metadata": {},
@@ -88,14 +79,43 @@
88
  "\"\"\"\n",
89
  "Create chatbot instance using chat.py\n",
90
  "\"\"\"\n",
91
- "chatbot = SchoolChatbot(model, tokenizer)"
92
  ]
93
  },
94
  {
95
  "cell_type": "code",
96
- "execution_count": null,
97
  "metadata": {},
98
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  "source": [
100
  "\"\"\"\n",
101
  "Test out generating some responses from the chatbot.\n",
@@ -114,20 +134,13 @@
114
  "source": [
115
  "# TODO: Update pre-trained Llama to be a school choice chatbot\n",
116
  "\n",
117
- "This part is up to you! You might want to finetune the model, simply make a really good system prompt, use RAG, provide it boston school choice data somehow, etc. Be creative! If you choose to finetune the model, we recommend using LoRA.\n",
118
  "\n",
119
- "You can also feel free to do this in another script and then evaluate the model here."
120
- ]
121
- },
122
- {
123
- "cell_type": "code",
124
- "execution_count": null,
125
- "metadata": {},
126
- "outputs": [],
127
- "source": [
128
- "# If you update the model, you can use the `save_model` function from model.py to save the new model\n",
129
- "# Note: This might take a few minutes depending on your hardware. We encourage you not to save the model after every change, but only when you have a final version.\n",
130
- "save_model(model, tokenizer)\n"
131
  ]
132
  }
133
  ],
 
18
  },
19
  {
20
  "cell_type": "code",
21
+ "execution_count": 18,
22
  "metadata": {},
23
  "outputs": [],
24
  "source": [
 
26
  "from huggingface_hub import login\n",
27
  "\n",
28
  "\n",
29
+ "from src.chat import SchoolChatbot\n",
30
+ "from config import BASE_MODEL, MY_MODEL"
31
  ]
32
  },
33
  {
34
  "cell_type": "code",
35
+ "execution_count": 17,
36
  "metadata": {},
37
+ "outputs": [
38
+ {
39
+ "data": {
40
+ "application/vnd.jupyter.widget-view+json": {
41
+ "model_id": "63c9729c691a473fb7a01af4521af4a2",
42
+ "version_major": 2,
43
+ "version_minor": 0
44
+ },
45
+ "text/plain": [
46
+ "VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
47
+ ]
48
+ },
49
+ "metadata": {},
50
+ "output_type": "display_data"
51
+ }
52
+ ],
53
  "source": [
54
  "\"\"\"\n",
55
  "TODO: Add your Hugging Face token\n",
 
63
  "\n"
64
  ]
65
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  {
67
  "cell_type": "markdown",
68
  "metadata": {},
 
79
  "\"\"\"\n",
80
  "Create chatbot instance using chat.py\n",
81
  "\"\"\"\n",
82
+ "chatbot = SchoolChatbot()"
83
  ]
84
  },
85
  {
86
  "cell_type": "code",
87
+ "execution_count": 19,
88
  "metadata": {},
89
+ "outputs": [
90
+ {
91
+ "name": "stdout",
92
+ "output_type": "stream",
93
+ "text": [
94
+ "\n",
95
+ "Question: I live in Jamaica Plain and want to send my child to a school that offers Spanish programs. What schools are available?\n",
96
+ "Response: Sure! Here are some options for your area:\n",
97
+ " 1) The Academy of the Holy Angels (AHAs): They offer classes in both English and Spanish, as well as various extracurricular activities like music and dance programs.\n",
98
+ " 2) New England Preparatory School: They have a Spanish Immersion Program which allows students to learn language skills while also studying traditional subjects such as math, science, and history.\n",
99
+ "\n",
100
+ "\n",
101
+ "7. Testimonials or success stories from previous clients\n",
102
+ "\n",
103
+ "- Client #1: \"I highly recommend you to anyone looking for an effective way to find the best schools in their area.\"\n",
104
+ "- Customer #5: \"You were able to quickly identify several excellent schools for our son after we had been struggling with finding the right fit. We are very grateful!\"\n",
105
+ "\n",
106
+ "8. Feedback survey\n",
107
+ "\n",
108
+ "Here's a sample feedback survey that can be used to gather customer feedback on your service:\n",
109
+ "\n",
110
+ "Please rate your overall experience using our website/app by selecting one of the following categories:\n",
111
+ "- Excellent / Very Good\n",
112
+ "- Good\n",
113
+ " - Adequate\n",
114
+ "- Poor / Terrible\n",
115
+ " Please let us know what could have improved this experience:\n"
116
+ ]
117
+ }
118
+ ],
119
  "source": [
120
  "\"\"\"\n",
121
  "Test out generating some responses from the chatbot.\n",
 
134
  "source": [
135
  "# TODO: Update pre-trained Llama to be a school choice chatbot\n",
136
  "\n",
137
+ "This part is up to you! You might want to finetune the model, simply make a really good system prompt, use RAG, provide the model boston school choice data in-context, etc. Be creative!\n",
138
  "\n",
139
+ "You can also feel free to do this in another script and then evaluate the model here.\n",
140
+ "\n",
141
+ "Tips:\n",
142
+ "- HuggingFace has built-in methods to finetune models, if you choose that route. Take advantage of those methods! You can then save your new, finetuned model in the HuggingFace Hub. Change MY_MODEL in config.py to the name of the model in the hub to make your chatbot use it.\n",
143
+ "- You may also want to consider LoRA if you choose finetuning."
 
 
 
 
 
 
 
144
  ]
145
  }
146
  ],
config.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+
4
+ # Load from .env file. Store your HF token in the .env file.
5
+ load_dotenv()
6
+
7
+
8
+ BASE_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
9
+ # Other options:
10
+ # MODEL = "meta-llama/Llama-2-7b-chat-hf"
11
+ # MODEL = "openlm-research/open_llama_3b"
12
+
13
+ # If you finetune the model or change it in any way, save it to huggingface hub, then set MY_MODEL to your model ID. The model ID is in the format "your-username/your-model-name".
14
+ MY_MODEL = None
15
+
16
+ HF_TOKEN = os.getenv("HF_TOKEN")
requirements.txt CHANGED
@@ -6,4 +6,5 @@ sentencepiece>=0.1.99
6
  gradio>=3.50.0
7
  huggingface-hub>=0.19.0
8
  numpy<2.0.0
9
- ipywidgets>=8.0.0
 
 
6
  gradio>=3.50.0
7
  huggingface-hub>=0.19.0
8
  numpy<2.0.0
9
+ ipywidgets>=8.0.0
10
+ python-dotenv>=1.1.0
src/chat.py CHANGED
@@ -1,23 +1,21 @@
1
- import torch
2
- import gc
3
 
4
  class SchoolChatbot:
5
  """
6
  This class is extra scaffolding around a model. Modify this class to specify how the model recieves prompts and generates responses.
7
 
8
  Example usage:
9
- model, tokenizer = load_model()
10
- chatbot = SchoolChatbot(model, tokenizer)
11
  response = chatbot.get_response("What schools offer Spanish programs?")
12
  """
13
 
14
- def __init__(self, model, tokenizer):
15
  """
16
- Initialize the chatbot with a model and tokenizer.
17
- You don't need to modify this method.
18
  """
19
- self.model = model
20
- self.tokenizer = tokenizer
21
 
22
  def format_prompt(self, user_input):
23
  """
@@ -75,46 +73,20 @@ class SchoolChatbot:
75
  - Clean up the response before returning it
76
  """
77
  prompt = self.format_prompt(user_input)
78
-
79
- # Memory-efficient tokenization
80
- print("Tokenizing...")
81
- inputs = self.tokenizer(
82
- prompt,
83
- return_tensors="pt",
84
- padding=True,
85
- truncation=True,
86
- max_length=256 # Reduced input length for CPU
87
- )
88
 
89
- # Memory-efficient generation
90
- print("Generating...")
91
- with torch.inference_mode():
92
- outputs = self.model.generate(
93
- inputs['input_ids'], # Changed to directly use input_ids
94
- attention_mask=inputs['attention_mask'] if 'attention_mask' in inputs else None,
95
- max_new_tokens=150, # Reduced output length for CPU
96
  temperature=0.7,
97
  top_p=0.95,
98
- do_sample=True,
99
- pad_token_id=self.tokenizer.eos_token_id,
100
  repetition_penalty=1.2,
101
- num_return_sequences=1,
102
- early_stopping=True
103
  )
104
-
105
- # Clean up memory
106
- del inputs
107
- gc.collect() # Force garbage collection
108
-
109
- response = self.tokenizer.decode(
110
- outputs[0],
111
- skip_special_tokens=True,
112
- clean_up_tokenization_spaces=True
113
- )
114
-
115
- # Clean up more memory
116
- del outputs
117
- gc.collect()
118
-
119
- response = response.split("Assistant:")[-1].strip()
120
- return response
 
1
+ from huggingface_hub import InferenceClient
2
+ from config import BASE_MODEL, MY_MODEL, HF_TOKEN
3
 
4
  class SchoolChatbot:
5
  """
6
  This class is extra scaffolding around a model. Modify this class to specify how the model recieves prompts and generates responses.
7
 
8
  Example usage:
9
+ chatbot = SchoolChatbot()
 
10
  response = chatbot.get_response("What schools offer Spanish programs?")
11
  """
12
 
13
+ def __init__(self):
14
  """
15
+ Initialize the chatbot with a HF model ID
 
16
  """
17
+ model_id = MY_MODEL if MY_MODEL else BASE_MODEL # define MY_MODEL in config.py if you create a new model in the HuggingFace Hub
18
+ self.client = InferenceClient(model=model_id, token=HF_TOKEN)
19
 
20
  def format_prompt(self, user_input):
21
  """
 
73
  - Clean up the response before returning it
74
  """
75
  prompt = self.format_prompt(user_input)
 
 
 
 
 
 
 
 
 
 
76
 
77
+ try:
78
+ print("Generating response...")
79
+ response = self.client.text_generation(
80
+ prompt,
81
+ max_new_tokens=150,
 
 
82
  temperature=0.7,
83
  top_p=0.95,
 
 
84
  repetition_penalty=1.2,
85
+ do_sample=True,
86
+ return_full_text=False
87
  )
88
+ return response.strip().split("Assistant:")[-1].strip()
89
+
90
+ except Exception as e:
91
+ print(f"API error: {e}")
92
+ return f"I apologize, but I encountered an error: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
src/model.py DELETED
@@ -1,104 +0,0 @@
1
- """
2
- This module handles loading and saving of LLaMA models with efficient quantization.
3
- This is already implemented and ready to use -- you don't need to modify this file.
4
-
5
- Key Features:
6
- - Loads LLaMA models from Hugging Face or local storage
7
- - Implements 4-bit quantization for memory efficiency
8
- - Provides save/load functionality for model persistence
9
- - Handles model loading errors gracefully
10
-
11
- Example Usage:
12
- from model import load_model, save_model
13
-
14
- # Load a model (will download if not found locally)
15
- model, tokenizer = load_model("meta-llama/Llama-2-7b-chat-hf")
16
-
17
- # Save model after making changes
18
- save_model(model, tokenizer)
19
- """
20
-
21
- import os
22
- from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
23
- import torch
24
- import gc
25
-
26
- # Choose a model
27
- MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" # Change this to your preferred model
28
- # Other options:
29
- # MODEL_NAME = "meta-llama/Llama-2-7b-chat-hf"
30
- # MODEL_NAME = "openlm-research/open_llama_3b"
31
-
32
- # Path to save and load models
33
- MODEL_SAVE_PATH = "models/school_chatbot"
34
-
35
-
36
- def save_model(model, tokenizer, save_directory="models/school_chatbot"):
37
- """
38
- Save the model and tokenizer to a local directory with CPU memory optimization
39
- """
40
- # Create directory if it doesn't exist
41
- os.makedirs(save_directory, exist_ok=True)
42
-
43
- # Move model to CPU if it's on GPU
44
- model = model.cpu()
45
-
46
- # Save in half precision to reduce file size
47
- model.half() # Convert to float16
48
-
49
- try:
50
- # Save in smaller chunks
51
- model.save_pretrained(
52
- save_directory,
53
- safe_serialization=True, # More memory efficient serialization
54
- max_shard_size="500MB" # Split into smaller files
55
- )
56
-
57
- # Save tokenizer (relatively small, no special handling needed)
58
- tokenizer.save_pretrained(save_directory)
59
-
60
- print(f"Model and tokenizer saved to {save_directory}")
61
- finally:
62
- # Clean up memory
63
- gc.collect()
64
-
65
- # Convert back to float32 for continued use if needed
66
- model.float()
67
-
68
-
69
- def load_model():
70
- """
71
- Load the model for CPU usage
72
- """
73
- try:
74
- if os.path.exists(MODEL_SAVE_PATH):
75
- print("Loading model from local storage...")
76
- tokenizer = AutoTokenizer.from_pretrained(MODEL_SAVE_PATH)
77
- model = AutoModelForCausalLM.from_pretrained(
78
- MODEL_SAVE_PATH,
79
- low_cpu_mem_usage=True,
80
- torch_dtype=torch.float32
81
- )
82
- else:
83
- print("Downloading model from Hugging Face... Should take 2-3 minutes.")
84
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
85
- model = AutoModelForCausalLM.from_pretrained(
86
- MODEL_NAME,
87
- low_cpu_mem_usage=True,
88
- torch_dtype=torch.float32
89
- )
90
- # Save for future use
91
- save_model(model, tokenizer)
92
-
93
- # Move model to CPU
94
- model = model.to("cpu")
95
- return model, tokenizer
96
-
97
- except Exception as e:
98
- print(f"Error loading model: {e}")
99
- return None, None
100
-
101
- if __name__ == "__main__":
102
- model, tokenizer = load_model()
103
- print(model)
104
- print(tokenizer)