Spaces:
Sleeping
Sleeping
Add threads
Browse files
app.py
CHANGED
|
@@ -49,8 +49,8 @@ st.write('Loading the pretrained model ...')
|
|
| 49 |
model_name = "CarolXia/pii-kd-deberta-v2"
|
| 50 |
# config = PeftConfig.from_pretrained(model_name)
|
| 51 |
model = DebertaV2ForTokenClassification.from_pretrained(model_name, token=st.secrets["HUGGINGFACE_TOKEN"])
|
| 52 |
-
|
| 53 |
-
|
| 54 |
# Try quantization instead
|
| 55 |
# model = AutoModelForTokenClassification.from_pretrained(model_name, device_map="auto", load_in_8bit=True)
|
| 56 |
tokenizer = DebertaV2Tokenizer.from_pretrained("microsoft/mdeberta-v3-base", token=st.secrets["HUGGINGFACE_TOKEN"])
|
|
@@ -141,18 +141,27 @@ entity_set=dict()
|
|
| 141 |
dataset = load_dataset("Isotonic/pii-masking-200k", split="train")
|
| 142 |
unmasked_text = dataset['unmasked_text'] # This will load the entire column inmemory. Must do this to avoid I/O delay later
|
| 143 |
|
| 144 |
-
st.write('
|
| 145 |
-
sizes = [0] *
|
| 146 |
start = time.time()
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
|
| 157 |
end = time.time()
|
| 158 |
length = end - start
|
|
|
|
| 49 |
model_name = "CarolXia/pii-kd-deberta-v2"
|
| 50 |
# config = PeftConfig.from_pretrained(model_name)
|
| 51 |
model = DebertaV2ForTokenClassification.from_pretrained(model_name, token=st.secrets["HUGGINGFACE_TOKEN"])
|
| 52 |
+
if torch.cuda.is_available():
|
| 53 |
+
model = model.to("cuda")
|
| 54 |
# Try quantization instead
|
| 55 |
# model = AutoModelForTokenClassification.from_pretrained(model_name, device_map="auto", load_in_8bit=True)
|
| 56 |
tokenizer = DebertaV2Tokenizer.from_pretrained("microsoft/mdeberta-v3-base", token=st.secrets["HUGGINGFACE_TOKEN"])
|
|
|
|
| 141 |
dataset = load_dataset("Isotonic/pii-masking-200k", split="train")
|
| 142 |
unmasked_text = dataset['unmasked_text'] # This will load the entire column inmemory. Must do this to avoid I/O delay later
|
| 143 |
|
| 144 |
+
st.write('Number of rows in the dataset ', dataset.num_rows)
|
| 145 |
+
sizes = [0] * 5
|
| 146 |
start = time.time()
|
| 147 |
+
# t0 = threading.Thread(target=process_datasets, args=(0, 50, unmasked_text, sizes, 0, entity_set, []))
|
| 148 |
+
# t1 = threading.Thread(target=process_datasets, args=(25, 50, unmasked_text, sizes, 1, entity_set, []))
|
| 149 |
+
# t2 = threading.Thread(target=process_datasets, args=(20, 30, unmasked_text, sizes, 2, entity_set, []))
|
| 150 |
+
# t3 = threading.Thread(target=process_datasets, args=(30, 40, unmasked_text, sizes, 3, entity_set, []))
|
| 151 |
+
# t4 = threading.Thread(target=process_datasets, args=(40, 50, unmasked_text, sizes, 4, entity_set, []))
|
| 152 |
+
# with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], profile_memory=True, record_shapes=True) as prof:
|
| 153 |
+
process_datasets(0, 50, unmasked_text, sizes, 0, entity_set, [])
|
| 154 |
+
# t0.start()
|
| 155 |
+
# t1.start()
|
| 156 |
+
# t2.start()
|
| 157 |
+
# t3.start()
|
| 158 |
+
# t4.start()
|
| 159 |
+
|
| 160 |
+
# t0.join()
|
| 161 |
+
# t1.join()
|
| 162 |
+
# t2.join()
|
| 163 |
+
# t3.join()
|
| 164 |
+
# t4.join()
|
| 165 |
|
| 166 |
end = time.time()
|
| 167 |
length = end - start
|