Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files- distilbert_finetuing.ipynb +18 -140
- t5_training.ipynb +11 -14
distilbert_finetuing.ipynb
CHANGED
|
@@ -298,7 +298,8 @@
|
|
| 298 |
"source": [
|
| 299 |
"from transformers import DistilBertTokenizer\n",
|
| 300 |
"import torch\n",
|
| 301 |
-
"\n",
|
|
|
|
| 302 |
"# Load the DistilBERT tokenizer\n",
|
| 303 |
"tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')\n",
|
| 304 |
"\n",
|
|
@@ -366,7 +367,8 @@
|
|
| 366 |
"from transformers import DistilBertForSequenceClassification\n",
|
| 367 |
"\n",
|
| 368 |
"# Load the model with a classification head\n",
|
| 369 |
-
"model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=6) # 6 classes: 0 to 5\n"
|
|
|
|
| 370 |
]
|
| 371 |
},
|
| 372 |
{
|
|
@@ -424,12 +426,12 @@
|
|
| 424 |
"from torch.optim.lr_scheduler import StepLR\n",
|
| 425 |
"\n",
|
| 426 |
"# Set up the optimizer\n",
|
| 427 |
-
"optimizer = AdamW(
|
| 428 |
"\n",
|
| 429 |
"# Define the training loop\n",
|
| 430 |
"epochs = 1\n",
|
| 431 |
"device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')\n",
|
| 432 |
-
"
|
| 433 |
"\n",
|
| 434 |
"print(device)"
|
| 435 |
]
|
|
@@ -800,7 +802,7 @@
|
|
| 800 |
],
|
| 801 |
"source": [
|
| 802 |
"for epoch in range(epochs):\n",
|
| 803 |
-
"
|
| 804 |
" total_loss = 0\n",
|
| 805 |
" for batch in train_dataloader:\n",
|
| 806 |
" input_ids, labels = batch\n",
|
|
@@ -810,7 +812,7 @@
|
|
| 810 |
" optimizer.zero_grad()\n",
|
| 811 |
"\n",
|
| 812 |
" # Forward pass\n",
|
| 813 |
-
" outputs =
|
| 814 |
" loss = outputs.loss\n",
|
| 815 |
" total_loss += loss.item()\n",
|
| 816 |
"\n",
|
|
@@ -835,7 +837,7 @@
|
|
| 835 |
}
|
| 836 |
],
|
| 837 |
"source": [
|
| 838 |
-
"
|
| 839 |
"correct_predictions = 0\n",
|
| 840 |
"total_predictions = 0\n",
|
| 841 |
"\n",
|
|
@@ -844,7 +846,7 @@
|
|
| 844 |
" input_ids, labels = batch\n",
|
| 845 |
" input_ids, labels = input_ids.to(device), labels.to(device)\n",
|
| 846 |
" # Forward pass\n",
|
| 847 |
-
" outputs =
|
| 848 |
" predictions = torch.argmax(outputs.logits, dim=-1)\n",
|
| 849 |
"\n",
|
| 850 |
" correct_predictions += (predictions == labels).sum().item()\n",
|
|
@@ -872,9 +874,9 @@
|
|
| 872 |
" inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)\n",
|
| 873 |
" input_ids = inputs['input_ids'].to(device)\n",
|
| 874 |
" \n",
|
| 875 |
-
"
|
| 876 |
" with torch.no_grad():\n",
|
| 877 |
-
" outputs =
|
| 878 |
" prediction = torch.argmax(outputs.logits, dim=-1)\n",
|
| 879 |
" return prediction.item()\n",
|
| 880 |
"\n",
|
|
@@ -915,10 +917,10 @@
|
|
| 915 |
" inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)\n",
|
| 916 |
" input_ids = inputs['input_ids'].to(device)\n",
|
| 917 |
" \n",
|
| 918 |
-
"
|
| 919 |
" with torch.no_grad():\n",
|
| 920 |
" # Get the raw logits from the model\n",
|
| 921 |
-
" outputs =
|
| 922 |
" logits = outputs.logits\n",
|
| 923 |
" \n",
|
| 924 |
" # Apply softmax to get probabilities\n",
|
|
@@ -961,7 +963,7 @@
|
|
| 961 |
}
|
| 962 |
],
|
| 963 |
"source": [
|
| 964 |
-
"
|
| 965 |
"\n",
|
| 966 |
"# Save the tokenizer\n",
|
| 967 |
"tokenizer.save_pretrained('./fine_tuned_distilbert')"
|
|
@@ -976,7 +978,7 @@
|
|
| 976 |
"from transformers import DistilBertForSequenceClassification, DistilBertTokenizer\n",
|
| 977 |
"\n",
|
| 978 |
"# Load the saved model\n",
|
| 979 |
-
"
|
| 980 |
"\n",
|
| 981 |
"# Load the saved tokenizer\n",
|
| 982 |
"tokenizer = DistilBertTokenizer.from_pretrained('./fine_tuned_distilbert')\n"
|
|
@@ -1007,9 +1009,9 @@
|
|
| 1007 |
" inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)\n",
|
| 1008 |
" input_ids = inputs['input_ids'].to(device)\n",
|
| 1009 |
"\n",
|
| 1010 |
-
"
|
| 1011 |
" with torch.no_grad():\n",
|
| 1012 |
-
" outputs =
|
| 1013 |
" logits = outputs.logits\n",
|
| 1014 |
" probabilities = softmax(logits, dim=-1)\n",
|
| 1015 |
" \n",
|
|
@@ -1028,130 +1030,6 @@
|
|
| 1028 |
" print(f\"{class_label}: {prob:.4f}\")"
|
| 1029 |
]
|
| 1030 |
},
|
| 1031 |
-
{
|
| 1032 |
-
"cell_type": "code",
|
| 1033 |
-
"execution_count": 55,
|
| 1034 |
-
"metadata": {},
|
| 1035 |
-
"outputs": [],
|
| 1036 |
-
"source": [
|
| 1037 |
-
"e = ['@ What are the key differences between classification and regression tasks in supervised learning, and how do you determine which algorithm to use for a specific problem?',\n",
|
| 1038 |
-
" '@ How does clustering differ from dimensionality reduction, and can you provide real-world examples of where each is applied?',\n",
|
| 1039 |
-
" '@ What are common evaluation metrics for classification models, and how do precision, recall, and F1-score relate to each other?',\n",
|
| 1040 |
-
" '@ How do convolutional neural networks (CNNs) and recurrent neural networks (RNNs) differ in their architecture and applications?',\n",
|
| 1041 |
-
" '@ What steps can be taken to identify and mitigate bias in machine learning models, and why is this an important consideration?']"
|
| 1042 |
-
]
|
| 1043 |
-
},
|
| 1044 |
-
{
|
| 1045 |
-
"cell_type": "code",
|
| 1046 |
-
"execution_count": 56,
|
| 1047 |
-
"metadata": {},
|
| 1048 |
-
"outputs": [
|
| 1049 |
-
{
|
| 1050 |
-
"name": "stdout",
|
| 1051 |
-
"output_type": "stream",
|
| 1052 |
-
"text": [
|
| 1053 |
-
"{'Remembering': 0.10612957, 'Understanding': 0.019418646, 'Applying': 0.06178399, 'Analyzing': 0.06437193, 'Evaluating': 0.02016813, 'Creating': 0.7281277}\n",
|
| 1054 |
-
"{'Remembering': 0.0023775953, 'Understanding': 0.007248114, 'Applying': 0.030584276, 'Analyzing': 0.03784482, 'Evaluating': 0.011662786, 'Creating': 0.9102824}\n",
|
| 1055 |
-
"{'Remembering': 0.77779603, 'Understanding': 0.00137261, 'Applying': 0.030797651, 'Analyzing': 0.01779477, 'Evaluating': 0.015782129, 'Creating': 0.15645678}\n",
|
| 1056 |
-
"{'Remembering': 0.0041304147, 'Understanding': 0.0012872498, 'Applying': 0.0071271434, 'Analyzing': 0.08727108, 'Evaluating': 0.012631507, 'Creating': 0.8875526}\n",
|
| 1057 |
-
"{'Remembering': 0.02713421, 'Understanding': 0.0032449323, 'Applying': 0.0559042, 'Analyzing': 0.021534933, 'Evaluating': 0.015711982, 'Creating': 0.8764698}\n"
|
| 1058 |
-
]
|
| 1059 |
-
}
|
| 1060 |
-
],
|
| 1061 |
-
"source": [
|
| 1062 |
-
"for i in e:\n",
|
| 1063 |
-
" class_probabilities = predict_with_loaded_model(i)\n",
|
| 1064 |
-
" print(class_probabilities)"
|
| 1065 |
-
]
|
| 1066 |
-
},
|
| 1067 |
-
{
|
| 1068 |
-
"cell_type": "code",
|
| 1069 |
-
"execution_count": 67,
|
| 1070 |
-
"metadata": {},
|
| 1071 |
-
"outputs": [],
|
| 1072 |
-
"source": [
|
| 1073 |
-
"weights = {\n",
|
| 1074 |
-
" 'Remembering': 0.5,\n",
|
| 1075 |
-
" 'Understanding': 0.5,\n",
|
| 1076 |
-
" 'Applying': 0.5,\n",
|
| 1077 |
-
" 'Analyzing': 0.5,\n",
|
| 1078 |
-
" 'Evaluating': 0.5,\n",
|
| 1079 |
-
" 'Creating':0.5,\n",
|
| 1080 |
-
"}"
|
| 1081 |
-
]
|
| 1082 |
-
},
|
| 1083 |
-
{
|
| 1084 |
-
"cell_type": "code",
|
| 1085 |
-
"execution_count": 68,
|
| 1086 |
-
"metadata": {},
|
| 1087 |
-
"outputs": [],
|
| 1088 |
-
"source": [
|
| 1089 |
-
"questions = [\n",
|
| 1090 |
-
" {'Remembering': 0.10612957, 'Understanding': 0.019418646, 'Applying': 0.06178399, 'Analyzing': 0.06437193, 'Evaluating': 0.02016813, 'Creating': 0.7281277},\n",
|
| 1091 |
-
" {'Remembering': 0.0023775953, 'Understanding': 0.007248114, 'Applying': 0.030584276, 'Analyzing': 0.03784482, 'Evaluating': 0.011662786, 'Creating': 0.9102824},\n",
|
| 1092 |
-
" {'Remembering': 0.77779603, 'Understanding': 0.00137261, 'Applying': 0.030797651, 'Analyzing': 0.01779477, 'Evaluating': 0.015782129, 'Creating': 0.15645678},\n",
|
| 1093 |
-
" {'Remembering': 0.0041304147, 'Understanding': 0.0012872498, 'Applying': 0.0071271434, 'Analyzing': 0.08727108, 'Evaluating': 0.012631507, 'Creating': 0.8875526},\n",
|
| 1094 |
-
" {'Remembering': 0.02713421, 'Understanding': 0.0032449323, 'Applying': 0.0559042, 'Analyzing': 0.021534933, 'Evaluating': 0.015711982, 'Creating': 0.8764698}\n",
|
| 1095 |
-
"]"
|
| 1096 |
-
]
|
| 1097 |
-
},
|
| 1098 |
-
{
|
| 1099 |
-
"cell_type": "code",
|
| 1100 |
-
"execution_count": 69,
|
| 1101 |
-
"metadata": {},
|
| 1102 |
-
"outputs": [
|
| 1103 |
-
{
|
| 1104 |
-
"name": "stdout",
|
| 1105 |
-
"output_type": "stream",
|
| 1106 |
-
"text": [
|
| 1107 |
-
"2.49999998975 18.0 90.0\n",
|
| 1108 |
-
"Normalized Score of the Paper: 0.0278\n"
|
| 1109 |
-
]
|
| 1110 |
-
}
|
| 1111 |
-
],
|
| 1112 |
-
"source": [
|
| 1113 |
-
"def calculate_score(question, weights):\n",
|
| 1114 |
-
" score = sum(question[level] * weight for level, weight in weights.items())\n",
|
| 1115 |
-
" return score\n",
|
| 1116 |
-
"\n",
|
| 1117 |
-
"total_score = sum(calculate_score(q, weights) for q in questions)\n",
|
| 1118 |
-
"max_score_per_question = sum([weights[level] for level in weights]) * 6 \n",
|
| 1119 |
-
"max_total_score = max_score_per_question * len(questions) \n",
|
| 1120 |
-
"normalized_score = (total_score - 0) / (max_total_score - 0)\n",
|
| 1121 |
-
"print(total_score, max_score_per_question, max_total_score)\n",
|
| 1122 |
-
"print(f\"Normalized Score of the Paper: {normalized_score:.4f}\")"
|
| 1123 |
-
]
|
| 1124 |
-
},
|
| 1125 |
-
{
|
| 1126 |
-
"cell_type": "code",
|
| 1127 |
-
"execution_count": null,
|
| 1128 |
-
"metadata": {},
|
| 1129 |
-
"outputs": [],
|
| 1130 |
-
"source": []
|
| 1131 |
-
},
|
| 1132 |
-
{
|
| 1133 |
-
"cell_type": "code",
|
| 1134 |
-
"execution_count": 70,
|
| 1135 |
-
"metadata": {},
|
| 1136 |
-
"outputs": [
|
| 1137 |
-
{
|
| 1138 |
-
"name": "stdout",
|
| 1139 |
-
"output_type": "stream",
|
| 1140 |
-
"text": [
|
| 1141 |
-
"{'Remembering': 0.10612957, 'Understanding': 0.019418646, 'Applying': 0.06178399, 'Analyzing': 0.06437193, 'Evaluating': 0.02016813, 'Creating': 0.7281277}\n",
|
| 1142 |
-
"{'Remembering': 0.0023775953, 'Understanding': 0.007248114, 'Applying': 0.030584276, 'Analyzing': 0.03784482, 'Evaluating': 0.011662786, 'Creating': 0.9102824}\n",
|
| 1143 |
-
"{'Remembering': 0.77779603, 'Understanding': 0.00137261, 'Applying': 0.030797651, 'Analyzing': 0.01779477, 'Evaluating': 0.015782129, 'Creating': 0.15645678}\n",
|
| 1144 |
-
"{'Remembering': 0.0041304147, 'Understanding': 0.0012872498, 'Applying': 0.0071271434, 'Analyzing': 0.08727108, 'Evaluating': 0.012631507, 'Creating': 0.8875526}\n",
|
| 1145 |
-
"{'Remembering': 0.02713421, 'Understanding': 0.0032449323, 'Applying': 0.0559042, 'Analyzing': 0.021534933, 'Evaluating': 0.015711982, 'Creating': 0.8764698}\n"
|
| 1146 |
-
]
|
| 1147 |
-
}
|
| 1148 |
-
],
|
| 1149 |
-
"source": [
|
| 1150 |
-
"for i in e:\n",
|
| 1151 |
-
" class_probabilities = predict_with_loaded_model(i)\n",
|
| 1152 |
-
" print(class_probabilities)"
|
| 1153 |
-
]
|
| 1154 |
-
},
|
| 1155 |
{
|
| 1156 |
"cell_type": "code",
|
| 1157 |
"execution_count": null,
|
|
|
|
| 298 |
"source": [
|
| 299 |
"from transformers import DistilBertTokenizer\n",
|
| 300 |
"import torch\n",
|
| 301 |
+
"from torch.utils.data import DataLoader\n",
|
| 302 |
+
"import intel_extension_for_pytorch as ipex\n",
|
| 303 |
"# Load the DistilBERT tokenizer\n",
|
| 304 |
"tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')\n",
|
| 305 |
"\n",
|
|
|
|
| 367 |
"from transformers import DistilBertForSequenceClassification\n",
|
| 368 |
"\n",
|
| 369 |
"# Load the model with a classification head\n",
|
| 370 |
+
"model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=6) # 6 classes: 0 to 5\n",
|
| 371 |
+
"optimized_model = ipex.optimize(model, dtype=torch.float32)"
|
| 372 |
]
|
| 373 |
},
|
| 374 |
{
|
|
|
|
| 426 |
"from torch.optim.lr_scheduler import StepLR\n",
|
| 427 |
"\n",
|
| 428 |
"# Set up the optimizer\n",
|
| 429 |
+
"optimizer = AdamW(optimized_model.parameters(), lr=0.0001)\n",
|
| 430 |
"\n",
|
| 431 |
"# Define the training loop\n",
|
| 432 |
"epochs = 1\n",
|
| 433 |
"device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')\n",
|
| 434 |
+
"optimized_model.to(device)\n",
|
| 435 |
"\n",
|
| 436 |
"print(device)"
|
| 437 |
]
|
|
|
|
| 802 |
],
|
| 803 |
"source": [
|
| 804 |
"for epoch in range(epochs):\n",
|
| 805 |
+
" optimized_model.train()\n",
|
| 806 |
" total_loss = 0\n",
|
| 807 |
" for batch in train_dataloader:\n",
|
| 808 |
" input_ids, labels = batch\n",
|
|
|
|
| 812 |
" optimizer.zero_grad()\n",
|
| 813 |
"\n",
|
| 814 |
" # Forward pass\n",
|
| 815 |
+
" outputs = optimized_model(input_ids, labels=labels)\n",
|
| 816 |
" loss = outputs.loss\n",
|
| 817 |
" total_loss += loss.item()\n",
|
| 818 |
"\n",
|
|
|
|
| 837 |
}
|
| 838 |
],
|
| 839 |
"source": [
|
| 840 |
+
"optimized_model.eval()\n",
|
| 841 |
"correct_predictions = 0\n",
|
| 842 |
"total_predictions = 0\n",
|
| 843 |
"\n",
|
|
|
|
| 846 |
" input_ids, labels = batch\n",
|
| 847 |
" input_ids, labels = input_ids.to(device), labels.to(device)\n",
|
| 848 |
" # Forward pass\n",
|
| 849 |
+
" outputs = optimized_model(input_ids)\n",
|
| 850 |
" predictions = torch.argmax(outputs.logits, dim=-1)\n",
|
| 851 |
"\n",
|
| 852 |
" correct_predictions += (predictions == labels).sum().item()\n",
|
|
|
|
| 874 |
" inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)\n",
|
| 875 |
" input_ids = inputs['input_ids'].to(device)\n",
|
| 876 |
" \n",
|
| 877 |
+
" optimized_model.eval()\n",
|
| 878 |
" with torch.no_grad():\n",
|
| 879 |
+
" outputs = optimized_model(input_ids)\n",
|
| 880 |
" prediction = torch.argmax(outputs.logits, dim=-1)\n",
|
| 881 |
" return prediction.item()\n",
|
| 882 |
"\n",
|
|
|
|
| 917 |
" inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)\n",
|
| 918 |
" input_ids = inputs['input_ids'].to(device)\n",
|
| 919 |
" \n",
|
| 920 |
+
" optimized_model.eval()\n",
|
| 921 |
" with torch.no_grad():\n",
|
| 922 |
" # Get the raw logits from the model\n",
|
| 923 |
+
" outputs = optimized_model(input_ids)\n",
|
| 924 |
" logits = outputs.logits\n",
|
| 925 |
" \n",
|
| 926 |
" # Apply softmax to get probabilities\n",
|
|
|
|
| 963 |
}
|
| 964 |
],
|
| 965 |
"source": [
|
| 966 |
+
"optimized_model.save_pretrained('./fine_tuned_distilbert')\n",
|
| 967 |
"\n",
|
| 968 |
"# Save the tokenizer\n",
|
| 969 |
"tokenizer.save_pretrained('./fine_tuned_distilbert')"
|
|
|
|
| 978 |
"from transformers import DistilBertForSequenceClassification, DistilBertTokenizer\n",
|
| 979 |
"\n",
|
| 980 |
"# Load the saved model\n",
|
| 981 |
+
"optimized_model = DistilBertForSequenceClassification.from_pretrained('./fine_tuned_distilbert')\n",
|
| 982 |
"\n",
|
| 983 |
"# Load the saved tokenizer\n",
|
| 984 |
"tokenizer = DistilBertTokenizer.from_pretrained('./fine_tuned_distilbert')\n"
|
|
|
|
| 1009 |
" inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)\n",
|
| 1010 |
" input_ids = inputs['input_ids'].to(device)\n",
|
| 1011 |
"\n",
|
| 1012 |
+
" optimized_model.eval()\n",
|
| 1013 |
" with torch.no_grad():\n",
|
| 1014 |
+
" outputs = optimized_model(input_ids)\n",
|
| 1015 |
" logits = outputs.logits\n",
|
| 1016 |
" probabilities = softmax(logits, dim=-1)\n",
|
| 1017 |
" \n",
|
|
|
|
| 1030 |
" print(f\"{class_label}: {prob:.4f}\")"
|
| 1031 |
]
|
| 1032 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1033 |
{
|
| 1034 |
"cell_type": "code",
|
| 1035 |
"execution_count": null,
|
t5_training.ipynb
CHANGED
|
@@ -25,19 +25,22 @@
|
|
| 25 |
"from transformers import T5ForConditionalGeneration, T5Tokenizer\n",
|
| 26 |
"from datasets import Dataset\n",
|
| 27 |
"from transformers import Trainer, TrainingArguments\n",
|
|
|
|
|
|
|
|
|
|
| 28 |
"import json\n",
|
| 29 |
"\n",
|
| 30 |
"# Load pre-trained FLAN-T5 model and tokenizer\n",
|
| 31 |
"model_name = \"google/flan-t5-large\" # FLAN-T5 Base Model\n",
|
| 32 |
"tokenizer = T5Tokenizer.from_pretrained(model_name)\n",
|
| 33 |
"model = T5ForConditionalGeneration.from_pretrained(model_name)\n",
|
| 34 |
-
"\n",
|
| 35 |
"# Example input-output pair for fine-tuning\n",
|
| 36 |
-
"data = json.load(
|
| 37 |
"\n",
|
| 38 |
"# Convert the data to a Hugging Face dataset\n",
|
| 39 |
"dataset = Dataset.from_dict(data)\n",
|
| 40 |
-
"\n",
|
| 41 |
"# Tokenize the data\n",
|
| 42 |
"def preprocess_function(examples):\n",
|
| 43 |
" model_inputs = tokenizer(examples['input_text'], padding=\"max_length\", truncation=True, max_length=2048)\n",
|
|
@@ -71,7 +74,7 @@
|
|
| 71 |
"\n",
|
| 72 |
"# Initialize the Trainer class\n",
|
| 73 |
"trainer = Trainer(\n",
|
| 74 |
-
" model=
|
| 75 |
" args=training_args,\n",
|
| 76 |
" train_dataset=tokenized_datasets,\n",
|
| 77 |
" eval_dataset=tokenized_datasets # Use the same dataset for evaluation since we only have one data point\n",
|
|
@@ -82,17 +85,11 @@
|
|
| 82 |
"\n",
|
| 83 |
"# Save the fine-tuned model\n",
|
| 84 |
"#trainer.save_model(\"./flan_t5_finetuned\")\n",
|
| 85 |
-
"
|
| 86 |
"tokenizer.save_pretrained(\"./flan_t5_finetuned\")\n",
|
| 87 |
"\n",
|
| 88 |
"# Evaluate the model on the training data (for a single example)\n",
|
| 89 |
-
"
|
| 90 |
-
"inputs = tokenizer(\"What are the key differences between classification and regression tasks in supervised learning, and how do you determine which algorithm to use for a specific problem? e How does clustering differ from dimensionality reduction, and can you provide real-world examples of where each is applied?\", return_tensors=\"pt\", padding=True)\n",
|
| 91 |
-
"outputs = model.generate(inputs['input_ids'], max_length=1024)\n",
|
| 92 |
-
"\n",
|
| 93 |
-
"# Decode the generated output\n",
|
| 94 |
-
"generated_output = tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
|
| 95 |
-
"print(generated_output)"
|
| 96 |
]
|
| 97 |
},
|
| 98 |
{
|
|
@@ -110,14 +107,14 @@
|
|
| 110 |
"\n",
|
| 111 |
"# Load your FP32 model\n",
|
| 112 |
"model_path = \"./flan_t5_finetuned\"\n",
|
| 113 |
-
"
|
| 114 |
"tokenizer = T5Tokenizer.from_pretrained(model_path)\n",
|
| 115 |
"\n",
|
| 116 |
"# Define the quantization configuration\n",
|
| 117 |
"quant_config = PostTrainingQuantConfig(approach='dynamic') # Dynamic quantization\n",
|
| 118 |
"\n",
|
| 119 |
"# Quantize the model\n",
|
| 120 |
-
"q_model = fit(model=
|
| 121 |
"\n",
|
| 122 |
"# Save the quantized model\n",
|
| 123 |
"quantized_model_path = \"./flan_t5_quantized_fp16\"\n",
|
|
|
|
| 25 |
"from transformers import T5ForConditionalGeneration, T5Tokenizer\n",
|
| 26 |
"from datasets import Dataset\n",
|
| 27 |
"from transformers import Trainer, TrainingArguments\n",
|
| 28 |
+
"import torch\n",
|
| 29 |
+
"from torch.utils.data import DataLoader\n",
|
| 30 |
+
"import intel_extension_for_pytorch as ipex\n",
|
| 31 |
"import json\n",
|
| 32 |
"\n",
|
| 33 |
"# Load pre-trained FLAN-T5 model and tokenizer\n",
|
| 34 |
"model_name = \"google/flan-t5-large\" # FLAN-T5 Base Model\n",
|
| 35 |
"tokenizer = T5Tokenizer.from_pretrained(model_name)\n",
|
| 36 |
"model = T5ForConditionalGeneration.from_pretrained(model_name)\n",
|
| 37 |
+
"optimized_model = ipex.optimize(model, dtype=torch.float32)\n",
|
| 38 |
"# Example input-output pair for fine-tuning\n",
|
| 39 |
+
"data = json.load(\"t5train.json\")\n",
|
| 40 |
"\n",
|
| 41 |
"# Convert the data to a Hugging Face dataset\n",
|
| 42 |
"dataset = Dataset.from_dict(data)\n",
|
| 43 |
+
"dataloader = DataLoader(dataset, num_workers=4, pin_memory=True)\n",
|
| 44 |
"# Tokenize the data\n",
|
| 45 |
"def preprocess_function(examples):\n",
|
| 46 |
" model_inputs = tokenizer(examples['input_text'], padding=\"max_length\", truncation=True, max_length=2048)\n",
|
|
|
|
| 74 |
"\n",
|
| 75 |
"# Initialize the Trainer class\n",
|
| 76 |
"trainer = Trainer(\n",
|
| 77 |
+
" model=optimized_model,\n",
|
| 78 |
" args=training_args,\n",
|
| 79 |
" train_dataset=tokenized_datasets,\n",
|
| 80 |
" eval_dataset=tokenized_datasets # Use the same dataset for evaluation since we only have one data point\n",
|
|
|
|
| 85 |
"\n",
|
| 86 |
"# Save the fine-tuned model\n",
|
| 87 |
"#trainer.save_model(\"./flan_t5_finetuned\")\n",
|
| 88 |
+
"optimized_model.save_pretrained(\"./flan_t5_finetuned\")\n",
|
| 89 |
"tokenizer.save_pretrained(\"./flan_t5_finetuned\")\n",
|
| 90 |
"\n",
|
| 91 |
"# Evaluate the model on the training data (for a single example)\n",
|
| 92 |
+
"optimized_model.eval()"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
]
|
| 94 |
},
|
| 95 |
{
|
|
|
|
| 107 |
"\n",
|
| 108 |
"# Load your FP32 model\n",
|
| 109 |
"model_path = \"./flan_t5_finetuned\"\n",
|
| 110 |
+
"optimized_model = T5ForConditionalGeneration.from_pretrained(model_path)\n",
|
| 111 |
"tokenizer = T5Tokenizer.from_pretrained(model_path)\n",
|
| 112 |
"\n",
|
| 113 |
"# Define the quantization configuration\n",
|
| 114 |
"quant_config = PostTrainingQuantConfig(approach='dynamic') # Dynamic quantization\n",
|
| 115 |
"\n",
|
| 116 |
"# Quantize the model\n",
|
| 117 |
+
"q_model = fit(model=optimized_model, conf=quant_config)\n",
|
| 118 |
"\n",
|
| 119 |
"# Save the quantized model\n",
|
| 120 |
"quantized_model_path = \"./flan_t5_quantized_fp16\"\n",
|