Spaces:
Sleeping
Sleeping
Updating to include NLI lora model
Browse files
app.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
-
from transformers import pipeline, AutoTokenizer, AutoModel, BertForSequenceClassification
|
| 3 |
from peft.auto import AutoPeftModelForSequenceClassification
|
| 4 |
from tensorboard.backend.event_processing import event_accumulator
|
| 5 |
from peft import PeftModel
|
|
@@ -13,10 +13,6 @@ loraModel = AutoPeftModelForSequenceClassification.from_pretrained("Intradiction
|
|
| 13 |
#tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
|
| 14 |
tokenizer1 = AutoTokenizer.from_pretrained("albert-base-v2")
|
| 15 |
tokenizer2 = AutoTokenizer.from_pretrained("microsoft/deberta-v3-xsmall")
|
| 16 |
-
# base_model = AutoModel.from_pretrained("microsoft/deberta-v3-xsmall")
|
| 17 |
-
# peft_model_id = "rajevan123/STS-Lora-Fine-Tuning-Capstone-Deberta-small"
|
| 18 |
-
# model = PeftModel.from_pretrained(base_model, peft_model_id)
|
| 19 |
-
# #merged_model = model.merge_and_unload()
|
| 20 |
|
| 21 |
|
| 22 |
# Handle calls to DistilBERT------------------------------------------
|
|
@@ -42,32 +38,44 @@ def distilBERTUntrained_fn(text):
|
|
| 42 |
|
| 43 |
|
| 44 |
# Handle calls to ALBERT---------------------------------------------
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
#NLI models
|
| 50 |
def AlbertnoLORA_fn(text1, text2):
|
| 51 |
return AlbertnoLORA_pipe({'text': text1, 'text_pair': text2})
|
| 52 |
|
| 53 |
def AlbertwithLORA_fn(text1, text2):
|
| 54 |
-
return (
|
| 55 |
|
| 56 |
def AlbertUntrained_fn(text1, text2):
|
| 57 |
return ALbertUntrained_pipe({'text': text1, 'text_pair': text2})
|
| 58 |
|
| 59 |
|
| 60 |
# Handle calls to Deberta--------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
DebertaUntrained_pipe = pipeline("text-classification", model="microsoft/deberta-v3-xsmall")
|
| 62 |
DebertanoLORA_pipe = pipeline("text-classification", model="rajevan123/STS-Conventional-Fine-Tuning")
|
| 63 |
-
#DebertawithLORA_pipe = pipeline("text-classification",model=
|
| 64 |
|
| 65 |
#STS models
|
| 66 |
def DebertanoLORA_fn(text1, text2):
|
| 67 |
return DebertanoLORA_pipe({'text': text1, 'text_pair': text2})
|
| 68 |
|
| 69 |
def DebertawithLORA_fn(text1, text2):
|
| 70 |
-
#return DebertawithLORA_pipe({'text': text1, 'text_pair': text2})
|
| 71 |
return ("working2")
|
| 72 |
|
| 73 |
def DebertaUntrained_fn(text1, text2):
|
|
@@ -94,7 +102,16 @@ def displayMetricStatsText():
|
|
| 94 |
event_acc.Reload()
|
| 95 |
accuracy_data = event_acc.Scalars('eval/accuracy')
|
| 96 |
loss_data = event_acc.Scalars('eval/loss')
|
| 97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
for i in range(0, len(loss_data)):
|
| 99 |
metrics = metrics + 'Epoch Number: ' + str(i) + '\n'
|
| 100 |
metrics = metrics + 'Accuracy (%): ' + str(round(accuracy_data[i].value * 100, 3)) + '\n'
|
|
@@ -117,7 +134,16 @@ def displayMetricStatsTextTCLora():
|
|
| 117 |
event_acc.Reload()
|
| 118 |
accuracy_data = event_acc.Scalars('eval/accuracy')
|
| 119 |
loss_data = event_acc.Scalars('eval/loss')
|
| 120 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
for i in range(0, len(loss_data)):
|
| 122 |
metrics = metrics + 'Epoch Number: ' + str(i) + '\n'
|
| 123 |
metrics = metrics + 'Accuracy (%): ' + str(round(accuracy_data[i].value * 100, 3)) + '\n'
|
|
@@ -127,7 +153,7 @@ def displayMetricStatsTextTCLora():
|
|
| 127 |
|
| 128 |
def displayMetricStatsTextNLINoLora():
|
| 129 |
#file_name = 'events.out.tfevents.NLI-Conventional.1'
|
| 130 |
-
file_name = hf_hub_download(repo_id="
|
| 131 |
event_acc = event_accumulator.EventAccumulator(file_name,
|
| 132 |
size_guidance={
|
| 133 |
event_accumulator.COMPRESSED_HISTOGRAMS: 500,
|
|
@@ -149,7 +175,8 @@ def displayMetricStatsTextNLINoLora():
|
|
| 149 |
return metrics
|
| 150 |
|
| 151 |
def displayMetricStatsTextNLILora():
|
| 152 |
-
file_name = 'events.out.tfevents.NLI-Lora.0'
|
|
|
|
| 153 |
event_acc = event_accumulator.EventAccumulator(file_name,
|
| 154 |
size_guidance={
|
| 155 |
event_accumulator.COMPRESSED_HISTOGRAMS: 500,
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
from transformers import pipeline, AutoTokenizer, AutoModel, BertForSequenceClassification, AlbertForSequenceClassification, DebertaForSequenceClassification, AutoModelForSequenceClassification
|
| 3 |
from peft.auto import AutoPeftModelForSequenceClassification
|
| 4 |
from tensorboard.backend.event_processing import event_accumulator
|
| 5 |
from peft import PeftModel
|
|
|
|
| 13 |
#tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
|
| 14 |
tokenizer1 = AutoTokenizer.from_pretrained("albert-base-v2")
|
| 15 |
tokenizer2 = AutoTokenizer.from_pretrained("microsoft/deberta-v3-xsmall")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
|
| 18 |
# Handle calls to DistilBERT------------------------------------------
|
|
|
|
| 38 |
|
| 39 |
|
| 40 |
# Handle calls to ALBERT---------------------------------------------
|
| 41 |
+
base_model1 = AlbertForSequenceClassification.from_pretrained("Alireza1044/albert-base-v2-mnli")
|
| 42 |
+
peft_model_id1 = "m4faisal/NLI-Lora-Fine-Tuning-10K-ALBERT"
|
| 43 |
+
model1 = PeftModel.from_pretrained(model=base_model1, model_id=peft_model_id1)
|
| 44 |
+
sa_merged_model1 = model1.merge_and_unload()
|
| 45 |
+
bbu_tokenizer1 = AutoTokenizer.from_pretrained("Alireza1044/albert-base-v2-mnli")
|
| 46 |
+
|
| 47 |
+
ALbertUntrained_pipe = pipeline("text-classification", model="Alireza1044/albert-base-v2-mnli")
|
| 48 |
+
AlbertnoLORA_pipe = pipeline(model="m4faisal/NLI-Conventional-Fine-Tuning")
|
| 49 |
+
AlbertwithLORA_pipe = pipeline("text-classification",model=sa_merged_model1, tokenizer=bbu_tokenizer1)
|
| 50 |
|
| 51 |
#NLI models
|
| 52 |
def AlbertnoLORA_fn(text1, text2):
|
| 53 |
return AlbertnoLORA_pipe({'text': text1, 'text_pair': text2})
|
| 54 |
|
| 55 |
def AlbertwithLORA_fn(text1, text2):
|
| 56 |
+
return AlbertwithLORA_pipe({'text': text1, 'text_pair': text2})
|
| 57 |
|
| 58 |
def AlbertUntrained_fn(text1, text2):
|
| 59 |
return ALbertUntrained_pipe({'text': text1, 'text_pair': text2})
|
| 60 |
|
| 61 |
|
| 62 |
# Handle calls to Deberta--------------------------------------------
|
| 63 |
+
# base_model2 = AutoModelForSequenceClassification.from_pretrained("microsoft/deberta-v3-xsmall", ignore_mismatched_sizes=True)
|
| 64 |
+
# peft_model_id2 = "rajevan123/STS-Lora-Fine-Tuning-Capstone-Deberta-old-model-pipe-test_augmentation"
|
| 65 |
+
# model2 = PeftModel.from_pretrained(model=base_model2, model_id=peft_model_id2)
|
| 66 |
+
# sa_merged_model2 = model2.merge_and_unload()
|
| 67 |
+
# bbu_tokenizer2 = AutoTokenizer.from_pretrained("microsoft/deberta-v3-xsmall")
|
| 68 |
+
|
| 69 |
DebertaUntrained_pipe = pipeline("text-classification", model="microsoft/deberta-v3-xsmall")
|
| 70 |
DebertanoLORA_pipe = pipeline("text-classification", model="rajevan123/STS-Conventional-Fine-Tuning")
|
| 71 |
+
# DebertawithLORA_pipe = pipeline("text-classification",model=sa_merged_model2, tokenizer=bbu_tokenizer2)
|
| 72 |
|
| 73 |
#STS models
|
| 74 |
def DebertanoLORA_fn(text1, text2):
|
| 75 |
return DebertanoLORA_pipe({'text': text1, 'text_pair': text2})
|
| 76 |
|
| 77 |
def DebertawithLORA_fn(text1, text2):
|
| 78 |
+
# return DebertawithLORA_pipe({'text': text1, 'text_pair': text2})
|
| 79 |
return ("working2")
|
| 80 |
|
| 81 |
def DebertaUntrained_fn(text1, text2):
|
|
|
|
| 102 |
event_acc.Reload()
|
| 103 |
accuracy_data = event_acc.Scalars('eval/accuracy')
|
| 104 |
loss_data = event_acc.Scalars('eval/loss')
|
| 105 |
+
|
| 106 |
+
#code to pull time data (very inaccurate)
|
| 107 |
+
# time_data = event_acc.Scalars('eval/runtime')
|
| 108 |
+
# Ttime = 0
|
| 109 |
+
# for time in time_data:
|
| 110 |
+
# Ttime+=time.value
|
| 111 |
+
# Ttime = str(round(Ttime/60,2))
|
| 112 |
+
# print(Ttime)
|
| 113 |
+
|
| 114 |
+
metrics = ("Active Training Time: mins \n\n")
|
| 115 |
for i in range(0, len(loss_data)):
|
| 116 |
metrics = metrics + 'Epoch Number: ' + str(i) + '\n'
|
| 117 |
metrics = metrics + 'Accuracy (%): ' + str(round(accuracy_data[i].value * 100, 3)) + '\n'
|
|
|
|
| 134 |
event_acc.Reload()
|
| 135 |
accuracy_data = event_acc.Scalars('eval/accuracy')
|
| 136 |
loss_data = event_acc.Scalars('eval/loss')
|
| 137 |
+
|
| 138 |
+
#code to pull time data (very inaccurate)
|
| 139 |
+
# time_data = event_acc.Scalars('eval/runtime')
|
| 140 |
+
# Ttime = 0
|
| 141 |
+
# for time in time_data:
|
| 142 |
+
# Ttime+=time.value
|
| 143 |
+
# Ttime = str(round(Ttime/60,2))
|
| 144 |
+
# print(event_acc.Tags())
|
| 145 |
+
|
| 146 |
+
metrics = ("Active Training Time: mins \n\n")
|
| 147 |
for i in range(0, len(loss_data)):
|
| 148 |
metrics = metrics + 'Epoch Number: ' + str(i) + '\n'
|
| 149 |
metrics = metrics + 'Accuracy (%): ' + str(round(accuracy_data[i].value * 100, 3)) + '\n'
|
|
|
|
| 153 |
|
| 154 |
def displayMetricStatsTextNLINoLora():
|
| 155 |
#file_name = 'events.out.tfevents.NLI-Conventional.1'
|
| 156 |
+
file_name = hf_hub_download(repo_id="m4faisal/NLI-Conventional-Fine-Tuning", filename="runs/Mar20_23-18-22_a7cbf6b28344/events.out.tfevents.1710976706.a7cbf6b28344.5071.0")
|
| 157 |
event_acc = event_accumulator.EventAccumulator(file_name,
|
| 158 |
size_guidance={
|
| 159 |
event_accumulator.COMPRESSED_HISTOGRAMS: 500,
|
|
|
|
| 175 |
return metrics
|
| 176 |
|
| 177 |
def displayMetricStatsTextNLILora():
|
| 178 |
+
#file_name = 'events.out.tfevents.NLI-Lora.0'
|
| 179 |
+
file_name = hf_hub_download(repo_id="m4faisal/NLI-Lora-Fine-Tuning-10K", filename="runs/Mar20_18-07-52_87caf1b1d04f/events.out.tfevents.1710958080.87caf1b1d04f.7531.0")
|
| 180 |
event_acc = event_accumulator.EventAccumulator(file_name,
|
| 181 |
size_guidance={
|
| 182 |
event_accumulator.COMPRESSED_HISTOGRAMS: 500,
|