Sarah Bentley
commited on
Commit
·
e9e366a
1
Parent(s):
72ef416
updating to use huggingface more
Browse files- README.md +15 -11
- app.py +1 -3
- chatbot_development.ipynb +58 -45
- config.py +16 -0
- requirements.txt +2 -1
- src/chat.py +19 -47
- src/model.py +0 -104
README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
colorFrom: blue
|
| 5 |
colorTo: red
|
| 6 |
sdk: gradio
|
|
@@ -8,7 +8,6 @@ sdk_version: 3.50.2
|
|
| 8 |
python_version: 3.10
|
| 9 |
app_file: app.py
|
| 10 |
pinned: false
|
| 11 |
-
repository_branch: staff-version
|
| 12 |
---
|
| 13 |
|
| 14 |
# Boston Public School Selection Chatbot
|
|
@@ -28,14 +27,20 @@ source venv/bin/activate
|
|
| 28 |
pip install -r requirements.txt
|
| 29 |
```
|
| 30 |
|
| 31 |
-
2.
|
| 32 |
-
- Visit [Hugging Face](https://huggingface.co
|
| 33 |
-
-
|
| 34 |
-
-
|
|
|
|
|
|
|
| 35 |
```bash
|
| 36 |
huggingface-cli login
|
| 37 |
```
|
| 38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
3. Run the chatbot:
|
| 40 |
```bash
|
| 41 |
python app.py
|
|
@@ -73,7 +78,7 @@ To deploy your chatbot as a free web interface using Hugging Face Spaces:
|
|
| 73 |
```
|
| 74 |
|
| 75 |
4. Important Free Tier Considerations:
|
| 76 |
-
- Use
|
| 77 |
- Free CPU spaces have 2GB RAM limit
|
| 78 |
- Responses might be slower than local testing
|
| 79 |
- The interface might queue requests when multiple users access it
|
|
@@ -113,14 +118,13 @@ boston-school-chatbot/
|
|
| 113 |
|
| 114 |
- **app.py**: Creates the web interface using Gradio. You only need to implement the `chat` function that generates responses.
|
| 115 |
|
| 116 |
-
- **model.py**: Handles loading and saving of LLaMA models. This is already implemented.
|
| 117 |
-
|
| 118 |
- **chat.py**: Contains the `SchoolChatbot` class where you'll implement:
|
| 119 |
- `format_prompt`: Format user input into proper prompts
|
| 120 |
- `get_response`: Generate responses using the model
|
| 121 |
|
|
|
|
|
|
|
| 122 |
- **chatbot_development.ipynb**: Jupyter notebook for:
|
| 123 |
-
- Loading and testing your model
|
| 124 |
- Experimenting with the chatbot
|
| 125 |
- Trying different approaches
|
| 126 |
- Testing responses before deployment
|
|
|
|
| 1 |
---
|
| 2 |
+
title: <Your Chatbot Title>
|
| 3 |
+
emoji: <Your Chatbot Emoji>
|
| 4 |
colorFrom: blue
|
| 5 |
colorTo: red
|
| 6 |
sdk: gradio
|
|
|
|
| 8 |
python_version: 3.10
|
| 9 |
app_file: app.py
|
| 10 |
pinned: false
|
|
|
|
| 11 |
---
|
| 12 |
|
| 13 |
# Boston Public School Selection Chatbot
|
|
|
|
| 27 |
pip install -r requirements.txt
|
| 28 |
```
|
| 29 |
|
| 30 |
+
2. Make a HuggingFace account and make an access token:
|
| 31 |
+
- Visit [Hugging Face](https://huggingface.co)
|
| 32 |
+
- Make an account if you don't already have one
|
| 33 |
+
- Click on your profile, then "Access Tokens" and make a new token
|
| 34 |
+
- Make a .env file and save the token as HF_TOKEN
|
| 35 |
+
- Now, log in to Hugging Face in the terminal as well:
|
| 36 |
```bash
|
| 37 |
huggingface-cli login
|
| 38 |
```
|
| 39 |
|
| 40 |
+
3. Choose a base model:
|
| 41 |
+
- In config.py, set the BASE_MODEL variable to your base model of choice from HuggingFace.
|
| 42 |
+
- Keep in mind it's better to have a small, lightweight model if you plan on finetuning.
|
| 43 |
+
|
| 44 |
3. Run the chatbot:
|
| 45 |
```bash
|
| 46 |
python app.py
|
|
|
|
| 78 |
```
|
| 79 |
|
| 80 |
4. Important Free Tier Considerations:
|
| 81 |
+
- Use free tier model (already configured in model.py)
|
| 82 |
- Free CPU spaces have 2GB RAM limit
|
| 83 |
- Responses might be slower than local testing
|
| 84 |
- The interface might queue requests when multiple users access it
|
|
|
|
| 118 |
|
| 119 |
- **app.py**: Creates the web interface using Gradio. You only need to implement the `chat` function that generates responses.
|
| 120 |
|
|
|
|
|
|
|
| 121 |
- **chat.py**: Contains the `SchoolChatbot` class where you'll implement:
|
| 122 |
- `format_prompt`: Format user input into proper prompts
|
| 123 |
- `get_response`: Generate responses using the model
|
| 124 |
|
| 125 |
+
- **config.py**: Contains the `BASE_MODEL` and `MY_MODEL` variables, which are names of models on HuggingFace. Update the `MY_MODEL` variable if you create a new model and upload it to the HuggingFace Hub.
|
| 126 |
+
|
| 127 |
- **chatbot_development.ipynb**: Jupyter notebook for:
|
|
|
|
| 128 |
- Experimenting with the chatbot
|
| 129 |
- Trying different approaches
|
| 130 |
- Testing responses before deployment
|
app.py
CHANGED
|
@@ -19,15 +19,13 @@ Example Usage:
|
|
| 19 |
"""
|
| 20 |
|
| 21 |
import gradio as gr
|
| 22 |
-
from src.model import load_model
|
| 23 |
from src.chat import SchoolChatbot
|
| 24 |
|
| 25 |
def create_chatbot():
|
| 26 |
"""
|
| 27 |
Creates and configures the chatbot interface.
|
| 28 |
"""
|
| 29 |
-
|
| 30 |
-
chatbot = SchoolChatbot(model, tokenizer)
|
| 31 |
|
| 32 |
def chat(message, history):
|
| 33 |
"""
|
|
|
|
| 19 |
"""
|
| 20 |
|
| 21 |
import gradio as gr
|
|
|
|
| 22 |
from src.chat import SchoolChatbot
|
| 23 |
|
| 24 |
def create_chatbot():
|
| 25 |
"""
|
| 26 |
Creates and configures the chatbot interface.
|
| 27 |
"""
|
| 28 |
+
chatbot = SchoolChatbot()
|
|
|
|
| 29 |
|
| 30 |
def chat(message, history):
|
| 31 |
"""
|
chatbot_development.ipynb
CHANGED
|
@@ -18,7 +18,7 @@
|
|
| 18 |
},
|
| 19 |
{
|
| 20 |
"cell_type": "code",
|
| 21 |
-
"execution_count":
|
| 22 |
"metadata": {},
|
| 23 |
"outputs": [],
|
| 24 |
"source": [
|
|
@@ -26,15 +26,30 @@
|
|
| 26 |
"from huggingface_hub import login\n",
|
| 27 |
"\n",
|
| 28 |
"\n",
|
| 29 |
-
"from src.
|
| 30 |
-
"from
|
| 31 |
]
|
| 32 |
},
|
| 33 |
{
|
| 34 |
"cell_type": "code",
|
| 35 |
-
"execution_count":
|
| 36 |
"metadata": {},
|
| 37 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
"source": [
|
| 39 |
"\"\"\"\n",
|
| 40 |
"TODO: Add your Hugging Face token\n",
|
|
@@ -48,30 +63,6 @@
|
|
| 48 |
"\n"
|
| 49 |
]
|
| 50 |
},
|
| 51 |
-
{
|
| 52 |
-
"cell_type": "markdown",
|
| 53 |
-
"metadata": {},
|
| 54 |
-
"source": [
|
| 55 |
-
"### Load model and tokenizer"
|
| 56 |
-
]
|
| 57 |
-
},
|
| 58 |
-
{
|
| 59 |
-
"cell_type": "code",
|
| 60 |
-
"execution_count": null,
|
| 61 |
-
"metadata": {},
|
| 62 |
-
"outputs": [],
|
| 63 |
-
"source": [
|
| 64 |
-
"\"\"\"\n",
|
| 65 |
-
"Load the model using functions from model.py\n",
|
| 66 |
-
"\"\"\"\n",
|
| 67 |
-
"\n",
|
| 68 |
-
"model, tokenizer = load_model()\n",
|
| 69 |
-
"\n",
|
| 70 |
-
"# Test model loading\n",
|
| 71 |
-
"print(\"Model loaded:\", type(model))\n",
|
| 72 |
-
"print(\"Tokenizer loaded:\", type(tokenizer))\n"
|
| 73 |
-
]
|
| 74 |
-
},
|
| 75 |
{
|
| 76 |
"cell_type": "markdown",
|
| 77 |
"metadata": {},
|
|
@@ -88,14 +79,43 @@
|
|
| 88 |
"\"\"\"\n",
|
| 89 |
"Create chatbot instance using chat.py\n",
|
| 90 |
"\"\"\"\n",
|
| 91 |
-
"chatbot = SchoolChatbot(
|
| 92 |
]
|
| 93 |
},
|
| 94 |
{
|
| 95 |
"cell_type": "code",
|
| 96 |
-
"execution_count":
|
| 97 |
"metadata": {},
|
| 98 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
"source": [
|
| 100 |
"\"\"\"\n",
|
| 101 |
"Test out generating some responses from the chatbot.\n",
|
|
@@ -114,20 +134,13 @@
|
|
| 114 |
"source": [
|
| 115 |
"# TODO: Update pre-trained Llama to be a school choice chatbot\n",
|
| 116 |
"\n",
|
| 117 |
-
"This part is up to you! You might want to finetune the model, simply make a really good system prompt, use RAG, provide
|
| 118 |
"\n",
|
| 119 |
-
"You can also feel free to do this in another script and then evaluate the model here
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
"execution_count": null,
|
| 125 |
-
"metadata": {},
|
| 126 |
-
"outputs": [],
|
| 127 |
-
"source": [
|
| 128 |
-
"# If you update the model, you can use the `save_model` function from model.py to save the new model\n",
|
| 129 |
-
"# Note: This might take a few minutes depending on your hardware. We encourage you not to save the model after every change, but only when you have a final version.\n",
|
| 130 |
-
"save_model(model, tokenizer)\n"
|
| 131 |
]
|
| 132 |
}
|
| 133 |
],
|
|
|
|
| 18 |
},
|
| 19 |
{
|
| 20 |
"cell_type": "code",
|
| 21 |
+
"execution_count": 18,
|
| 22 |
"metadata": {},
|
| 23 |
"outputs": [],
|
| 24 |
"source": [
|
|
|
|
| 26 |
"from huggingface_hub import login\n",
|
| 27 |
"\n",
|
| 28 |
"\n",
|
| 29 |
+
"from src.chat import SchoolChatbot\n",
|
| 30 |
+
"from config import BASE_MODEL, MY_MODEL"
|
| 31 |
]
|
| 32 |
},
|
| 33 |
{
|
| 34 |
"cell_type": "code",
|
| 35 |
+
"execution_count": 17,
|
| 36 |
"metadata": {},
|
| 37 |
+
"outputs": [
|
| 38 |
+
{
|
| 39 |
+
"data": {
|
| 40 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 41 |
+
"model_id": "63c9729c691a473fb7a01af4521af4a2",
|
| 42 |
+
"version_major": 2,
|
| 43 |
+
"version_minor": 0
|
| 44 |
+
},
|
| 45 |
+
"text/plain": [
|
| 46 |
+
"VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
|
| 47 |
+
]
|
| 48 |
+
},
|
| 49 |
+
"metadata": {},
|
| 50 |
+
"output_type": "display_data"
|
| 51 |
+
}
|
| 52 |
+
],
|
| 53 |
"source": [
|
| 54 |
"\"\"\"\n",
|
| 55 |
"TODO: Add your Hugging Face token\n",
|
|
|
|
| 63 |
"\n"
|
| 64 |
]
|
| 65 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
{
|
| 67 |
"cell_type": "markdown",
|
| 68 |
"metadata": {},
|
|
|
|
| 79 |
"\"\"\"\n",
|
| 80 |
"Create chatbot instance using chat.py\n",
|
| 81 |
"\"\"\"\n",
|
| 82 |
+
"chatbot = SchoolChatbot()"
|
| 83 |
]
|
| 84 |
},
|
| 85 |
{
|
| 86 |
"cell_type": "code",
|
| 87 |
+
"execution_count": 19,
|
| 88 |
"metadata": {},
|
| 89 |
+
"outputs": [
|
| 90 |
+
{
|
| 91 |
+
"name": "stdout",
|
| 92 |
+
"output_type": "stream",
|
| 93 |
+
"text": [
|
| 94 |
+
"\n",
|
| 95 |
+
"Question: I live in Jamaica Plain and want to send my child to a school that offers Spanish programs. What schools are available?\n",
|
| 96 |
+
"Response: Sure! Here are some options for your area:\n",
|
| 97 |
+
" 1) The Academy of the Holy Angels (AHAs): They offer classes in both English and Spanish, as well as various extracurricular activities like music and dance programs.\n",
|
| 98 |
+
" 2) New England Preparatory School: They have a Spanish Immersion Program which allows students to learn language skills while also studying traditional subjects such as math, science, and history.\n",
|
| 99 |
+
"\n",
|
| 100 |
+
"\n",
|
| 101 |
+
"7. Testimonials or success stories from previous clients\n",
|
| 102 |
+
"\n",
|
| 103 |
+
"- Client #1: \"I highly recommend you to anyone looking for an effective way to find the best schools in their area.\"\n",
|
| 104 |
+
"- Customer #5: \"You were able to quickly identify several excellent schools for our son after we had been struggling with finding the right fit. We are very grateful!\"\n",
|
| 105 |
+
"\n",
|
| 106 |
+
"8. Feedback survey\n",
|
| 107 |
+
"\n",
|
| 108 |
+
"Here's a sample feedback survey that can be used to gather customer feedback on your service:\n",
|
| 109 |
+
"\n",
|
| 110 |
+
"Please rate your overall experience using our website/app by selecting one of the following categories:\n",
|
| 111 |
+
"- Excellent / Very Good\n",
|
| 112 |
+
"- Good\n",
|
| 113 |
+
" - Adequate\n",
|
| 114 |
+
"- Poor / Terrible\n",
|
| 115 |
+
" Please let us know what could have improved this experience:\n"
|
| 116 |
+
]
|
| 117 |
+
}
|
| 118 |
+
],
|
| 119 |
"source": [
|
| 120 |
"\"\"\"\n",
|
| 121 |
"Test out generating some responses from the chatbot.\n",
|
|
|
|
| 134 |
"source": [
|
| 135 |
"# TODO: Update pre-trained Llama to be a school choice chatbot\n",
|
| 136 |
"\n",
|
| 137 |
+
"This part is up to you! You might want to finetune the model, simply make a really good system prompt, use RAG, provide the model boston school choice data in-context, etc. Be creative!\n",
|
| 138 |
"\n",
|
| 139 |
+
"You can also feel free to do this in another script and then evaluate the model here.\n",
|
| 140 |
+
"\n",
|
| 141 |
+
"Tips:\n",
|
| 142 |
+
"- HuggingFace has built-in methods to finetune models, if you choose that route. Take advantage of those methods! You can then save your new, finetuned model in the HuggingFace Hub. Change MY_MODEL in config.py to the name of the model in the hub to make your chatbot use it.\n",
|
| 143 |
+
"- You may also want to consider LoRA if you choose finetuning."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
]
|
| 145 |
}
|
| 146 |
],
|
config.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
|
| 4 |
+
# Load from .env file. Store your HF token in the .env file.
|
| 5 |
+
load_dotenv()
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
BASE_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
|
| 9 |
+
# Other options:
|
| 10 |
+
# MODEL = "meta-llama/Llama-2-7b-chat-hf"
|
| 11 |
+
# MODEL = "openlm-research/open_llama_3b"
|
| 12 |
+
|
| 13 |
+
# If you finetune the model or change it in any way, save it to huggingface hub, then set MY_MODEL to your model ID. The model ID is in the format "your-username/your-model-name".
|
| 14 |
+
MY_MODEL = None
|
| 15 |
+
|
| 16 |
+
HF_TOKEN = os.getenv("HF_TOKEN")
|
requirements.txt
CHANGED
|
@@ -6,4 +6,5 @@ sentencepiece>=0.1.99
|
|
| 6 |
gradio>=3.50.0
|
| 7 |
huggingface-hub>=0.19.0
|
| 8 |
numpy<2.0.0
|
| 9 |
-
ipywidgets>=8.0.0
|
|
|
|
|
|
| 6 |
gradio>=3.50.0
|
| 7 |
huggingface-hub>=0.19.0
|
| 8 |
numpy<2.0.0
|
| 9 |
+
ipywidgets>=8.0.0
|
| 10 |
+
python-dotenv>=1.1.0
|
src/chat.py
CHANGED
|
@@ -1,23 +1,21 @@
|
|
| 1 |
-
import
|
| 2 |
-
import
|
| 3 |
|
| 4 |
class SchoolChatbot:
|
| 5 |
"""
|
| 6 |
This class is extra scaffolding around a model. Modify this class to specify how the model recieves prompts and generates responses.
|
| 7 |
|
| 8 |
Example usage:
|
| 9 |
-
|
| 10 |
-
chatbot = SchoolChatbot(model, tokenizer)
|
| 11 |
response = chatbot.get_response("What schools offer Spanish programs?")
|
| 12 |
"""
|
| 13 |
|
| 14 |
-
def __init__(self
|
| 15 |
"""
|
| 16 |
-
Initialize the chatbot with a model
|
| 17 |
-
You don't need to modify this method.
|
| 18 |
"""
|
| 19 |
-
|
| 20 |
-
self.
|
| 21 |
|
| 22 |
def format_prompt(self, user_input):
|
| 23 |
"""
|
|
@@ -75,46 +73,20 @@ class SchoolChatbot:
|
|
| 75 |
- Clean up the response before returning it
|
| 76 |
"""
|
| 77 |
prompt = self.format_prompt(user_input)
|
| 78 |
-
|
| 79 |
-
# Memory-efficient tokenization
|
| 80 |
-
print("Tokenizing...")
|
| 81 |
-
inputs = self.tokenizer(
|
| 82 |
-
prompt,
|
| 83 |
-
return_tensors="pt",
|
| 84 |
-
padding=True,
|
| 85 |
-
truncation=True,
|
| 86 |
-
max_length=256 # Reduced input length for CPU
|
| 87 |
-
)
|
| 88 |
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
attention_mask=inputs['attention_mask'] if 'attention_mask' in inputs else None,
|
| 95 |
-
max_new_tokens=150, # Reduced output length for CPU
|
| 96 |
temperature=0.7,
|
| 97 |
top_p=0.95,
|
| 98 |
-
do_sample=True,
|
| 99 |
-
pad_token_id=self.tokenizer.eos_token_id,
|
| 100 |
repetition_penalty=1.2,
|
| 101 |
-
|
| 102 |
-
|
| 103 |
)
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
response = self.tokenizer.decode(
|
| 110 |
-
outputs[0],
|
| 111 |
-
skip_special_tokens=True,
|
| 112 |
-
clean_up_tokenization_spaces=True
|
| 113 |
-
)
|
| 114 |
-
|
| 115 |
-
# Clean up more memory
|
| 116 |
-
del outputs
|
| 117 |
-
gc.collect()
|
| 118 |
-
|
| 119 |
-
response = response.split("Assistant:")[-1].strip()
|
| 120 |
-
return response
|
|
|
|
| 1 |
+
from huggingface_hub import InferenceClient
|
| 2 |
+
from config import BASE_MODEL, MY_MODEL, HF_TOKEN
|
| 3 |
|
| 4 |
class SchoolChatbot:
|
| 5 |
"""
|
| 6 |
This class is extra scaffolding around a model. Modify this class to specify how the model recieves prompts and generates responses.
|
| 7 |
|
| 8 |
Example usage:
|
| 9 |
+
chatbot = SchoolChatbot()
|
|
|
|
| 10 |
response = chatbot.get_response("What schools offer Spanish programs?")
|
| 11 |
"""
|
| 12 |
|
| 13 |
+
def __init__(self):
|
| 14 |
"""
|
| 15 |
+
Initialize the chatbot with a HF model ID
|
|
|
|
| 16 |
"""
|
| 17 |
+
model_id = MY_MODEL if MY_MODEL else BASE_MODEL # define MY_MODEL in config.py if you create a new model in the HuggingFace Hub
|
| 18 |
+
self.client = InferenceClient(model=model_id, token=HF_TOKEN)
|
| 19 |
|
| 20 |
def format_prompt(self, user_input):
|
| 21 |
"""
|
|
|
|
| 73 |
- Clean up the response before returning it
|
| 74 |
"""
|
| 75 |
prompt = self.format_prompt(user_input)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
|
| 77 |
+
try:
|
| 78 |
+
print("Generating response...")
|
| 79 |
+
response = self.client.text_generation(
|
| 80 |
+
prompt,
|
| 81 |
+
max_new_tokens=150,
|
|
|
|
|
|
|
| 82 |
temperature=0.7,
|
| 83 |
top_p=0.95,
|
|
|
|
|
|
|
| 84 |
repetition_penalty=1.2,
|
| 85 |
+
do_sample=True,
|
| 86 |
+
return_full_text=False
|
| 87 |
)
|
| 88 |
+
return response.strip().split("Assistant:")[-1].strip()
|
| 89 |
+
|
| 90 |
+
except Exception as e:
|
| 91 |
+
print(f"API error: {e}")
|
| 92 |
+
return f"I apologize, but I encountered an error: {str(e)}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/model.py
DELETED
|
@@ -1,104 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
This module handles loading and saving of LLaMA models with efficient quantization.
|
| 3 |
-
This is already implemented and ready to use -- you don't need to modify this file.
|
| 4 |
-
|
| 5 |
-
Key Features:
|
| 6 |
-
- Loads LLaMA models from Hugging Face or local storage
|
| 7 |
-
- Implements 4-bit quantization for memory efficiency
|
| 8 |
-
- Provides save/load functionality for model persistence
|
| 9 |
-
- Handles model loading errors gracefully
|
| 10 |
-
|
| 11 |
-
Example Usage:
|
| 12 |
-
from model import load_model, save_model
|
| 13 |
-
|
| 14 |
-
# Load a model (will download if not found locally)
|
| 15 |
-
model, tokenizer = load_model("meta-llama/Llama-2-7b-chat-hf")
|
| 16 |
-
|
| 17 |
-
# Save model after making changes
|
| 18 |
-
save_model(model, tokenizer)
|
| 19 |
-
"""
|
| 20 |
-
|
| 21 |
-
import os
|
| 22 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
| 23 |
-
import torch
|
| 24 |
-
import gc
|
| 25 |
-
|
| 26 |
-
# Choose a model
|
| 27 |
-
MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" # Change this to your preferred model
|
| 28 |
-
# Other options:
|
| 29 |
-
# MODEL_NAME = "meta-llama/Llama-2-7b-chat-hf"
|
| 30 |
-
# MODEL_NAME = "openlm-research/open_llama_3b"
|
| 31 |
-
|
| 32 |
-
# Path to save and load models
|
| 33 |
-
MODEL_SAVE_PATH = "models/school_chatbot"
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
def save_model(model, tokenizer, save_directory="models/school_chatbot"):
|
| 37 |
-
"""
|
| 38 |
-
Save the model and tokenizer to a local directory with CPU memory optimization
|
| 39 |
-
"""
|
| 40 |
-
# Create directory if it doesn't exist
|
| 41 |
-
os.makedirs(save_directory, exist_ok=True)
|
| 42 |
-
|
| 43 |
-
# Move model to CPU if it's on GPU
|
| 44 |
-
model = model.cpu()
|
| 45 |
-
|
| 46 |
-
# Save in half precision to reduce file size
|
| 47 |
-
model.half() # Convert to float16
|
| 48 |
-
|
| 49 |
-
try:
|
| 50 |
-
# Save in smaller chunks
|
| 51 |
-
model.save_pretrained(
|
| 52 |
-
save_directory,
|
| 53 |
-
safe_serialization=True, # More memory efficient serialization
|
| 54 |
-
max_shard_size="500MB" # Split into smaller files
|
| 55 |
-
)
|
| 56 |
-
|
| 57 |
-
# Save tokenizer (relatively small, no special handling needed)
|
| 58 |
-
tokenizer.save_pretrained(save_directory)
|
| 59 |
-
|
| 60 |
-
print(f"Model and tokenizer saved to {save_directory}")
|
| 61 |
-
finally:
|
| 62 |
-
# Clean up memory
|
| 63 |
-
gc.collect()
|
| 64 |
-
|
| 65 |
-
# Convert back to float32 for continued use if needed
|
| 66 |
-
model.float()
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
def load_model():
|
| 70 |
-
"""
|
| 71 |
-
Load the model for CPU usage
|
| 72 |
-
"""
|
| 73 |
-
try:
|
| 74 |
-
if os.path.exists(MODEL_SAVE_PATH):
|
| 75 |
-
print("Loading model from local storage...")
|
| 76 |
-
tokenizer = AutoTokenizer.from_pretrained(MODEL_SAVE_PATH)
|
| 77 |
-
model = AutoModelForCausalLM.from_pretrained(
|
| 78 |
-
MODEL_SAVE_PATH,
|
| 79 |
-
low_cpu_mem_usage=True,
|
| 80 |
-
torch_dtype=torch.float32
|
| 81 |
-
)
|
| 82 |
-
else:
|
| 83 |
-
print("Downloading model from Hugging Face... Should take 2-3 minutes.")
|
| 84 |
-
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
| 85 |
-
model = AutoModelForCausalLM.from_pretrained(
|
| 86 |
-
MODEL_NAME,
|
| 87 |
-
low_cpu_mem_usage=True,
|
| 88 |
-
torch_dtype=torch.float32
|
| 89 |
-
)
|
| 90 |
-
# Save for future use
|
| 91 |
-
save_model(model, tokenizer)
|
| 92 |
-
|
| 93 |
-
# Move model to CPU
|
| 94 |
-
model = model.to("cpu")
|
| 95 |
-
return model, tokenizer
|
| 96 |
-
|
| 97 |
-
except Exception as e:
|
| 98 |
-
print(f"Error loading model: {e}")
|
| 99 |
-
return None, None
|
| 100 |
-
|
| 101 |
-
if __name__ == "__main__":
|
| 102 |
-
model, tokenizer = load_model()
|
| 103 |
-
print(model)
|
| 104 |
-
print(tokenizer)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|