rishikasrinivas commited on
Commit
258ab84
·
verified ·
1 Parent(s): 66d9552

Delete formula_masks

Browse files
formula_masks/lottery_ticket/Run0.25_3/formula_masks_0.0.json DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:61e0974685a29bbd7f7b22ab6d0f46e0341a5c8c10722774a1ea4842c489f42e
3
- size 97444932
 
 
 
 
formula_masks/lottery_ticket/Run0.25_3/formula_masks_25.0.json DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a7f0c0a93ec3acd700f4082850b3faf1b82f69181359a9af71a1d08e8f91a66
3
- size 97891374
 
 
 
 
formula_masks/lottery_ticket/Run0.25_3/formula_masks_43.75.json DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e67d3b8c2221b246606c08c517ce5570f808e1bb678aec90924325b9d4529241
3
- size 99399484
 
 
 
 
formula_masks/lottery_ticket/Run0.25_3/formula_masks_57.812.json DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c9bbd9383a3019825b83ab4dbdeaedcced5ee109452588cab4cd473a0c23668
3
- size 98651040
 
 
 
 
formula_masks/lottery_ticket/Run0.25_3/formula_masks_68.359.json DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:20c1378b30f8829398e025000a064d1a9008e4649843964db33cb02035ba14c9
3
- size 100166878
 
 
 
 
formula_masks/lottery_ticket/Run0.25_3/formula_masks_76.27.json DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e252a18b1f34ceb08c92524a5c8451e22bd476af839b214a0dd8570f18a8055c
3
- size 98406377
 
 
 
 
formula_masks/lottery_ticket/Run0.25_4/formula_masks_25.0.json DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b65c5c38ca23368cf27a8dd5d819a3bad667e61073b7dddf861b90d5d8e3595b
3
- size 97642165
 
 
 
 
formula_masks/lottery_ticket/Run0.25_4/formula_masks_43.75.json DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:36de0ada01a878706762929235ad76749a048ad9228ca630d65c5e75387e8451
3
- size 100377460
 
 
 
 
formula_masks/lottery_ticket/Run0.25_4/formula_masks_57.812.json DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f7b15a51b2577716a5fb4e7508d530954a2d299bf752cd8b42bb577967a0132c
3
- size 100068063
 
 
 
 
formula_masks/lottery_ticket/Run0.25_4/formula_masks_68.359.json DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca4b9af1201edd1dfd80f7943c220d1e4ecf226aa432329b028dbd3678ae7949
3
- size 100252082
 
 
 
 
formula_masks/lottery_ticket/Run0.25_4/formula_masks_76.27.json DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:35bc235efe134333625c8688bf11400934f6430b1772e1a3b828fa0f5bb3a358
3
- size 94123479
 
 
 
 
formula_masks/lottery_ticket/Run2Full/explanation_for_mask_align.ipynb DELETED
@@ -1,639 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 141,
6
- "id": "224d774c",
7
- "metadata": {},
8
- "outputs": [],
9
- "source": [
10
- "import json\n",
11
- "with open(\"formula_masks_0.0.json\", 'r') as f:\n",
12
- " j_0 = json.load(f)"
13
- ]
14
- },
15
- {
16
- "cell_type": "code",
17
- "execution_count": 142,
18
- "id": "65f79ebb",
19
- "metadata": {},
20
- "outputs": [],
21
- "source": [
22
- "with open(\"formula_masks_36.0.json\", 'r') as f:\n",
23
- " j_36 = json.load(f)"
24
- ]
25
- },
26
- {
27
- "cell_type": "code",
28
- "execution_count": 170,
29
- "id": "c81895e0",
30
- "metadata": {},
31
- "outputs": [
32
- {
33
- "data": {
34
- "text/plain": [
35
- "tensor(1276.1914)"
36
- ]
37
- },
38
- "execution_count": 170,
39
- "metadata": {},
40
- "output_type": "execute_result"
41
- }
42
- ],
43
- "source": [
44
- "a=np.where(np.array(j_0['3']['0'])==1)\n",
45
- "b=np.where(np.array(j_36['3']['0'])==1)\n",
46
- "a=0\n",
47
- "for i in range(1024):\n",
48
- " try:\n",
49
- " a+=torch.tensor(j_0['3'][str(i)]).sum()\n",
50
- " except:\n",
51
- " continue\n",
52
- "a/1024"
53
- ]
54
- },
55
- {
56
- "cell_type": "code",
57
- "execution_count": null,
58
- "id": "3103c94b",
59
- "metadata": {},
60
- "outputs": [],
61
- "source": [
62
- "def fmask_overlap(cluster, percent):\n",
63
- " with open(\"formula_masks_0.0.json\", 'r') as f:\n",
64
- " j_0 = json.load(f)\n",
65
- " with open(f\"formula_masks_{percent}.json\", 'r') as f:\n",
66
- " j_p = json.load(f)\n",
67
- " \n",
68
- " activ_mask_iou = 0\n",
69
- " ravg=0\n",
70
- " ct_=0\n",
71
- " for neuron in range(len(orig)):\n",
72
- " percent_ofsamplesinprunedthatalsoactivateinog, ct, num_of_samples_wherepruned_activatesneuronexcludingtheonesthat_alsoactivateforog = iou_mock(pruned[neuron], orig[neuron])\n",
73
- " activ_mask_iou += percent_ofsamplesinprunedthatalsoactivateinog\n",
74
- " ravg += num_of_samples_wherepruned_activatesneuronexcludingtheonesthat_alsoactivateforog\n",
75
- " ct_ += ct\n",
76
- " return activ_mask_iou/len(orig), ravg/len(orig), ct_/len(orig)\n"
77
- ]
78
- },
79
- {
80
- "cell_type": "code",
81
- "execution_count": 131,
82
- "id": "7a7aeefe",
83
- "metadata": {},
84
- "outputs": [
85
- {
86
- "data": {
87
- "text/plain": [
88
- "(tensor(0.8514), 1024)"
89
- ]
90
- },
91
- "execution_count": 131,
92
- "metadata": {},
93
- "output_type": "execute_result"
94
- }
95
- ],
96
- "source": [
97
- "import numpy as np\n",
98
- "np.where(np.array(j_0['3']['12'])==1), np.where(np.array(j_36['3']['12'])==1)\n",
99
- "\n",
100
- "\n",
101
- "def iou(t1,t2):\n",
102
- " t1 = torch.tensor(t1) \n",
103
- " t2 = torch.tensor(t2)\n",
104
- " return (t1&t2).sum()/(t1|t2).sum()\n",
105
- "\n",
106
- "res = 0\n",
107
- "t=0\n",
108
- "for neuron in range(1024):\n",
109
- " try:\n",
110
- " ious = iou(j_36['1'][str(neuron)], j_0['1'][str(neuron)] )\n",
111
- " res += ious\n",
112
- " \n",
113
- " t+=1\n",
114
- " except:\n",
115
- " continue\n",
116
- " \n",
117
- "res/t, t"
118
- ]
119
- },
120
- {
121
- "cell_type": "code",
122
- "execution_count": 17,
123
- "id": "c6eda8fb",
124
- "metadata": {},
125
- "outputs": [],
126
- "source": [
127
- "import pickle\n",
128
- "\n",
129
- "# Replace with your actual file path\n",
130
- "with open('/workspace/CCE_NLI/BERT/exp/lottery_ticket/Run2Full/Masks/0.0%Pruned/ActivationRanges.pkl', 'rb') as file:\n",
131
- " data_0 = pickle.load(file)\n",
132
- "with open('/workspace/CCE_NLI/BERT/exp/lottery_ticket/Run2Full/Masks/36.0%Pruned/ActivationRanges.pkl', 'rb') as file:\n",
133
- " data_36 = pickle.load(file)\n"
134
- ]
135
- },
136
- {
137
- "cell_type": "code",
138
- "execution_count": 108,
139
- "id": "be9a2665",
140
- "metadata": {},
141
- "outputs": [],
142
- "source": [
143
- "import torch\n",
144
- "clus1 = torch.load(\"/workspace/CCE_NLI/BERT/exp/lottery_ticket/Run2Full/Masks/0.0%Pruned/Cluster1masks.pt\")\n",
145
- "clus2 = torch.load(\"/workspace/CCE_NLI/BERT/exp/lottery_ticket/Run2Full/Masks/0.0%Pruned/Cluster2masks.pt\")\n",
146
- "\n",
147
- "clus3 = torch.load(\"/workspace/CCE_NLI/BERT/exp/lottery_ticket/Run2Full/Masks/0.0%Pruned/Cluster3masks.pt\")"
148
- ]
149
- },
150
- {
151
- "cell_type": "code",
152
- "execution_count": 109,
153
- "id": "3ae3d4aa",
154
- "metadata": {},
155
- "outputs": [
156
- {
157
- "data": {
158
- "text/plain": [
159
- "tensor(2425.6191)"
160
- ]
161
- },
162
- "execution_count": 109,
163
- "metadata": {},
164
- "output_type": "execute_result"
165
- }
166
- ],
167
- "source": [
168
- "avg=0\n",
169
- "for i in clus1:\n",
170
- " avg += i.sum()\n",
171
- "avg/clus1.shape[0] "
172
- ]
173
- },
174
- {
175
- "cell_type": "code",
176
- "execution_count": 110,
177
- "id": "dafa1bb6",
178
- "metadata": {},
179
- "outputs": [
180
- {
181
- "data": {
182
- "text/plain": [
183
- "1024"
184
- ]
185
- },
186
- "execution_count": 110,
187
- "metadata": {},
188
- "output_type": "execute_result"
189
- }
190
- ],
191
- "source": [
192
- "clus3.shape[0] "
193
- ]
194
- },
195
- {
196
- "cell_type": "code",
197
- "execution_count": 111,
198
- "id": "f545c30a",
199
- "metadata": {},
200
- "outputs": [
201
- {
202
- "data": {
203
- "text/plain": [
204
- "tensor(721.9316)"
205
- ]
206
- },
207
- "execution_count": 111,
208
- "metadata": {},
209
- "output_type": "execute_result"
210
- }
211
- ],
212
- "source": [
213
- "avg=0\n",
214
- "for i in clus3:\n",
215
- " avg += i.sum()\n",
216
- "avg/clus3.shape[0] "
217
- ]
218
- },
219
- {
220
- "cell_type": "code",
221
- "execution_count": 112,
222
- "id": "91b6f22d",
223
- "metadata": {},
224
- "outputs": [
225
- {
226
- "ename": "SyntaxError",
227
- "evalue": "invalid syntax (786032204.py, line 2)",
228
- "output_type": "error",
229
- "traceback": [
230
- "\u001b[0;36m Cell \u001b[0;32mIn[112], line 2\u001b[0;36m\u001b[0m\n\u001b[0;31m so fmask has to maximize 1s to maximize iou\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"
231
- ]
232
- }
233
- ],
234
- "source": [
235
- "#on avg clustr1 has more samples activating at each neuron than clu2,3\n",
236
- "so fmask has to maximize 1s to maximize iou \n",
237
- "#so at lowest, best form convgs to firmula that civers most if the samples (ie not actuslly explaining convepts learned by the neuron its covering trying to maximize the range of sentences covered)\n",
238
- "# at jhigher activations fewer samples are activ per neuron and fmask overlap with original fmask very low indicating at higher activs the fmask are more specialized\n",
239
- "#at lower activ fmask similar throughout meaning its finding formulas covering the same-ish samples depsite pruning but at hgiher its finding formulas that end up covering different samples\n",
240
- "\n",
241
- "\n",
242
- "# does that mean that pruning leaves the same samples with low/high activations for a given neuron?? "
243
- ]
244
- },
245
- {
246
- "cell_type": "code",
247
- "execution_count": 113,
248
- "id": "99847b74",
249
- "metadata": {},
250
- "outputs": [],
251
- "source": [
252
- "clus1_36 = torch.load(\"/workspace/CCE_NLI/BERT/exp/lottery_ticket/Run2Full/Masks/36.0%Pruned/Cluster1masks.pt\")\n",
253
- "clus2_36 = torch.load(\"/workspace/CCE_NLI/BERT/exp/lottery_ticket/Run2Full/Masks/36.0%Pruned/Cluster2masks.pt\")\n",
254
- "clus3_36 = torch.load(\"/workspace/CCE_NLI/BERT/exp/lottery_ticket/Run2Full/Masks/36.0%Pruned/Cluster3masks.pt\")"
255
- ]
256
- },
257
- {
258
- "cell_type": "code",
259
- "execution_count": 147,
260
- "id": "b52b7482",
261
- "metadata": {},
262
- "outputs": [
263
- {
264
- "data": {
265
- "text/plain": [
266
- "torch.Size([1024, 10000])"
267
- ]
268
- },
269
- "execution_count": 147,
270
- "metadata": {},
271
- "output_type": "execute_result"
272
- }
273
- ],
274
- "source": [
275
- "clus3_36.shape"
276
- ]
277
- },
278
- {
279
- "cell_type": "code",
280
- "execution_count": 115,
281
- "id": "487e508b",
282
- "metadata": {},
283
- "outputs": [
284
- {
285
- "data": {
286
- "text/plain": [
287
- "(tensor(2409), tensor(1627), tensor(640))"
288
- ]
289
- },
290
- "execution_count": 115,
291
- "metadata": {},
292
- "output_type": "execute_result"
293
- }
294
- ],
295
- "source": [
296
- "clus1[102].sum(),clus2[12].sum(),clus3[12].sum()"
297
- ]
298
- },
299
- {
300
- "cell_type": "code",
301
- "execution_count": 137,
302
- "id": "9939de97",
303
- "metadata": {},
304
- "outputs": [
305
- {
306
- "data": {
307
- "text/plain": [
308
- "(tensor(868), tensor(3888))"
309
- ]
310
- },
311
- "execution_count": 137,
312
- "metadata": {},
313
- "output_type": "execute_result"
314
- }
315
- ],
316
- "source": [
317
- "(clus1_36[0] & clus1[0]).sum(), (clus1_36[0] | clus1[0]).sum()"
318
- ]
319
- },
320
- {
321
- "cell_type": "code",
322
- "execution_count": 145,
323
- "id": "0d2cc757",
324
- "metadata": {},
325
- "outputs": [
326
- {
327
- "data": {
328
- "text/plain": [
329
- "(567,\n",
330
- " 967,\n",
331
- " 0.36962190352020863,\n",
332
- " tensor(1045),\n",
333
- " tensor(1056),\n",
334
- " 0.41068854247521247)"
335
- ]
336
- },
337
- "execution_count": 145,
338
- "metadata": {},
339
- "output_type": "execute_result"
340
- }
341
- ],
342
- "source": [
343
- "#hihgest cluster num samples where og and pruned are both activ\n",
344
- "\n",
345
- "aiou=0\n",
346
- "for neuron in range(1024):\n",
347
- " ct = 0\n",
348
- " un=0\n",
349
- " for i in range(len(clus3_36[neuron])): #10000\n",
350
- " if clus3[neuron][i] == clus3_36[neuron][i] and clus3[neuron][i] : #neuron2 activ at cluster 1 for og and pruned \n",
351
- " ct += 1\n",
352
- " elif clus3[neuron][i] != clus3_36[neuron][i]:#neuron2 NOT activ at cluster 1 for og and pruned \n",
353
- " un += 1\n",
354
- " aiou += ct/(ct+un)\n",
355
- "\n",
356
- "ct, un, ct/(ct +un), clus3_36[neuron].sum(), clus3[neuron].sum(), aiou/1024"
357
- ]
358
- },
359
- {
360
- "cell_type": "code",
361
- "execution_count": 72,
362
- "id": "921bf1c8",
363
- "metadata": {},
364
- "outputs": [
365
- {
366
- "data": {
367
- "text/plain": [
368
- "(tensor([False, False, False, ..., False, False, False]),\n",
369
- " tensor([False, False, False, ..., False, False, False]))"
370
- ]
371
- },
372
- "execution_count": 72,
373
- "metadata": {},
374
- "output_type": "execute_result"
375
- }
376
- ],
377
- "source": [
378
- "clus3_36[2], clus3[2]"
379
- ]
380
- },
381
- {
382
- "cell_type": "code",
383
- "execution_count": 136,
384
- "id": "2bac7da1",
385
- "metadata": {},
386
- "outputs": [
387
- {
388
- "data": {
389
- "text/plain": [
390
- "(868,\n",
391
- " 3020,\n",
392
- " 0.22325102880658437,\n",
393
- " tensor(2313),\n",
394
- " tensor(2443),\n",
395
- " 0.22325102880658437)"
396
- ]
397
- },
398
- "execution_count": 136,
399
- "metadata": {},
400
- "output_type": "execute_result"
401
- }
402
- ],
403
- "source": [
404
- "#lowest cluster num samples where og and pruned are both activ\n",
405
- "aiou=0\n",
406
- "for neuron in range(1024):\n",
407
- " ct = 0\n",
408
- " un=0\n",
409
- " for i in range(len(clus1_36[neuron])): #10000\n",
410
- " if clus1[neuron][i] == clus1_36[neuron][i] and clus1[neuron][i] : #neuron2 activ at cluster 1 for og and pruned \n",
411
- " ct += 1\n",
412
- " elif clus1[neuron][i] != clus1_36[neuron][i]:#neuron2 NOT activ at cluster 1 for og and pruned \n",
413
- " un += 1\n",
414
- " aiou += ct/(ct+un)\n",
415
- "\n",
416
- "ct, un, ct/(ct +un), clus1_36[neuron].sum(), clus1[neuron].sum(), aiou/1024"
417
- ]
418
- },
419
- {
420
- "cell_type": "code",
421
- "execution_count": null,
422
- "id": "5a7b95f0",
423
- "metadata": {},
424
- "outputs": [],
425
- "source": [
426
- "#cluster masks say for each nueron 1 if neuron activated for that sample in that activation range \n",
427
- "# lowest cluster: low overlap means the same neuron is switching activation ranges so if for sent 1 in og \n",
428
- " #neuron 3 activated at the lowest range, once u prune that neuron 3 now activates for sentence 1 at a hgher range \n",
429
- "#higher overlap in highest cluster meaning more of the strongly activating samples remaingin strongly activating with pruning at that neuron\n",
430
- " #can i graph this (per neuron do the iou like in the cell above for each cluster and avg)\n",
431
- " \n",
432
- " #for neuron in common between 36% and 0%\n",
433
- " #avg += (clus3_36[2] & clus3[2]).sum()/(clus3_36[2] | clus3[2]).sum()\n",
434
- " #avg/(num neurons in common between 36% and 0%)\n",
435
- "\n",
436
- " "
437
- ]
438
- },
439
- {
440
- "cell_type": "code",
441
- "execution_count": null,
442
- "id": "cf072a2a",
443
- "metadata": {},
444
- "outputs": [],
445
- "source": [
446
- "#at hgher clusters the alignment acorrs pruning is more so that means for each neuron theres more similaritly in the samples that it activates for and despite this the formula masks are vastly different\n",
447
- " #why. well fewer samples activate at high clusters at a given neuron \n",
448
- " # and pruning doesnt change by much (comp to low clusters) which samples activate at the high value at the neuron\n",
449
- " \n",
450
- " #CORRECTED::::::\n",
451
- " #at the low clusters there a large overlap where og[neuron]==1 and prune[neuron]=1 for each sample. like neuron 2 is active at the lowest range for the same-ish neurons in og as it is in prune\n",
452
- " # at high clusters theres a small overlap (421 vs 1032) meaning formulas that cover more samples need to be different but at low clusters since theres\n",
453
- " # a lot more samples where its active for the neuron despite pruning the formula mask need to cover more of the same samples\n",
454
- " # so if u look at the formula mask at lower activations its going to be more similar and at hgiher its going to be more different \n",
455
- " \n",
456
- " #cna graph this by running 2 cells above on all neurons instead of just neuron 3 and plotting the ct values (or avg ct value per neuron and running this across all pruning%s)\n",
457
- " \n",
458
- "#but at low clusters the alginemnt is so low meaning each neuron activates at the low range for completely different samples yet fmask is so similar\n",
459
- " #measnt hat fmask is favoritn fromuals that just entail as many concepts thats common?"
460
- ]
461
- },
462
- {
463
- "cell_type": "code",
464
- "execution_count": null,
465
- "id": "d8e915de",
466
- "metadata": {},
467
- "outputs": [],
468
- "source": [
469
- "at low clusters more samples activate a given neuron\n",
470
- "\n",
471
- "neuron2 has high activation for fewer samples\n",
472
- "when you prune it still has a high activation for a good amount of THOSE samples (some new some removed)\n",
473
- "\n",
474
- "neuron2 has a low activation for a lot of samples when you prune still a lot of whcih half of them are the same \n",
475
- "\n",
476
- "goal of formula mask is to maximize the iou between the formula maska and activation maks\n",
477
- "if the activation mask has a lot of 1's the fomrula masks should too have a lot of ones to maximize the overlap\n",
478
- "if the activation mask has fewer 1;s the formula mask should cover the samples that are marked 1 and there are fewer so the mask is more specialized \n",
479
- "\n",
480
- "so lower activaiotns' fmask tries to cover as many of the samples as psosible and since there are more its probably just converging to the comomon words acros the dataset \n",
481
- "high activations' fmask needs to cover fewer samples so formula more specializied to those sampels\n",
482
- "\n",
483
- "with pruning still a lot of samples activate at lowest cluster oso masks are more similar since a lot of 1s. at lowest fmask high actv mask low\n",
484
- "with pruning at highest cluster fmask overlap low, activaton mask overlap higgher"
485
- ]
486
- },
487
- {
488
- "cell_type": "code",
489
- "execution_count": 167,
490
- "id": "4a877473",
491
- "metadata": {},
492
- "outputs": [
493
- {
494
- "name": "stdout",
495
- "output_type": "stream",
496
- "text": [
497
- "0.0%Pruned 1.0 tensor(721.9316) 0.0 721.931640625\n",
498
- "20.0%Pruned 0.63353277652473 tensor(723.8770) 269.625 454.251953125\n",
499
- "36.0%Pruned 0.5870197241561632 tensor(737.8115) 317.6953125 420.1162109375\n",
500
- "48.8%Pruned 0.542155193362132 tensor(732.0205) 344.4111328125 387.609375\n",
501
- "59.04%Pruned 0.5104976031473197 tensor(738.7979) 373.6806640625 365.1171875\n",
502
- "67.232%Pruned 0.4775875996729925 tensor(734.4824) 393.365234375 341.1171875\n",
503
- "73.786%Pruned 0.45344319001085037 tensor(736.4424) 412.8427734375 323.599609375\n",
504
- "79.028%Pruned 0.42633524043605053 tensor(731.7539) 427.384765625 304.369140625\n",
505
- "83.223%Pruned 0.40614504470322543 tensor(728.4932) 438.2900390625 290.203125\n",
506
- "86.578%Pruned 0.38869179570912715 tensor(724.6758) 446.884765625 277.791015625\n",
507
- "89.263%Pruned 0.37574400087483406 tensor(723.2080) 455.0087890625 268.19921875\n",
508
- "91.41%Pruned 0.3588653807151315 tensor(721.8672) 464.357421875 257.509765625\n",
509
- "93.128%Pruned 0.351142350258654 tensor(724.8066) 473.1220703125 251.6845703125\n",
510
- "94.502%Pruned 0.3385971351927113 tensor(715.9004) 473.0078125 242.892578125\n"
511
- ]
512
- }
513
- ],
514
- "source": [
515
- "import os\n",
516
- "def iou(t1,t2):\n",
517
- " t1 = torch.tensor(t1) \n",
518
- " t2 = torch.tensor(t2)\n",
519
- " return (t1&t2).sum()/(t1|t2).sum()\n",
520
- "\n",
521
- "def iou_mock(p,o):\n",
522
- " p = np.array(p) \n",
523
- " o = np.array(o)\n",
524
- " ct=0\n",
525
- " for i in np.where(o==1)[0]:\n",
526
- " if p[i] == 1:\n",
527
- " ct += 1\n",
528
- " return ct/len(np.where(o==1)[0]), ct, len(np.where(p==1)[0]) - ct\n",
529
- "\n",
530
- "\n",
531
- "def activation_overlap(cluster, percent):\n",
532
- " pruned = torch.load(f\"/workspace/CCE_NLI/BERT/exp/lottery_ticket/Run2Full/Masks/{percent}%Pruned/Cluster{cluster}masks.pt\")\n",
533
- " orig = torch.load(f\"/workspace/CCE_NLI/BERT/exp/lottery_ticket/Run2Full/Masks/0.0%Pruned/Cluster{cluster}masks.pt\")\n",
534
- " activ_mask_iou = 0\n",
535
- " ravg=0\n",
536
- " ct_=0\n",
537
- " for neuron in range(len(orig)):\n",
538
- " percent_ofsamplesinprunedthatalsoactivateinog, ct, num_of_samples_wherepruned_activatesneuronexcludingtheonesthat_alsoactivateforog = iou_mock(pruned[neuron], orig[neuron])\n",
539
- " activ_mask_iou += percent_ofsamplesinprunedthatalsoactivateinog\n",
540
- " ravg += num_of_samples_wherepruned_activatesneuronexcludingtheonesthat_alsoactivateforog\n",
541
- " ct_ += ct\n",
542
- " return activ_mask_iou/len(orig), ravg/len(orig), ct_/len(orig)\n",
543
- "\n",
544
- "def active_samples(cluster, percent):\n",
545
- " pruned = torch.load(f\"/workspace/CCE_NLI/BERT/exp/lottery_ticket/Run2Full/Masks/{percent}%Pruned/Cluster{cluster}masks.pt\")\n",
546
- " orig = torch.load(f\"/workspace/CCE_NLI/BERT/exp/lottery_ticket/Run2Full/Masks/0.0%Pruned/Cluster{cluster}masks.pt\")\n",
547
- " num_active = 0\n",
548
- " for neuron in range(len(orig)):\n",
549
- " num_active += torch.where(pruned[neuron]==1,1,0).sum()\n",
550
- " return num_active/len(orig)\n",
551
- " \n",
552
- "for o in sorted(os.listdir(\"/workspace/CCE_NLI/BERT/exp/lottery_ticket/Run2Full/Masks\")):\n",
553
- " if \".ipy\" in o: continue\n",
554
- " percent = o.split(\"%\")[0]\n",
555
- " aiou,avg_num_samples_where_pruned_alone_activates, avg_num_samples_where_pruned_and_og_activate = activation_overlap(cluster=3, percent=percent)\n",
556
- " na = active_samples(cluster=3, percent=percent) \n",
557
- " print(o, aiou, na, avg_num_samples_where_pruned_alone_activates, avg_num_samples_where_pruned_and_og_activate)\n",
558
- "#per neuron (on avg) 63% of samples that activate the neuron in the og at cluster3 activate it at 20% pruning\n",
559
- "\n",
560
- "#so the only way for the fmask overlap to be low is if the fmask is picking 1s in the areas correpsongind to where pruned along activates mostly?\n",
561
- "if fmask covered samples where both og and pruned activate the neuron the overlap would be high between the fmasks right bc there are more samples where pg and prune activate than pruned alone\n",
562
- "that means \n",
563
- "\n",
564
- "\n",
565
- "1 reason why fomual makss different at higher activations is the formula masks are convering different samples with pruning\n",
566
- "and why would they be covering different samples\n",
567
- " 1. the activation mask is such that different samples activate the neuron at that cluster so that means activation overlap should be low but this is not the case\n",
568
- " 2. the formula mask maximizes its iou by explaining alos the new samples that activate the neuron aftter pruning so its a different set of sentences its trying to cover and the best formula is the one that \n",
569
- " covers a different subset of these than that of the original --> more likely cuz that means that the large overlap in activaton maks doesnt rlly matter "
570
- ]
571
- },
572
- {
573
- "cell_type": "code",
574
- "execution_count": 168,
575
- "id": "cb84e5b3",
576
- "metadata": {},
577
- "outputs": [
578
- {
579
- "name": "stdout",
580
- "output_type": "stream",
581
- "text": [
582
- "0.0%Pruned 1.0 tensor(2425.6191) 0.0 2425.619140625\n",
583
- "20.0%Pruned 0.4795691539585096 tensor(2436.1260) 1273.3076171875 1162.818359375\n",
584
- "36.0%Pruned 0.4364710762552435 tensor(2466.3604) 1408.0849609375 1058.275390625\n",
585
- "48.8%Pruned 0.4063179449590072 tensor(2446.1436) 1460.8974609375 985.24609375\n",
586
- "59.04%Pruned 0.3880185934396624 tensor(2464.2178) 1523.1806640625 941.037109375\n",
587
- "67.232%Pruned 0.3696877555808726 tensor(2471.5029) 1574.873046875 896.6298828125\n",
588
- "73.786%Pruned 0.3586499949952349 tensor(2486.4053) 1616.423828125 869.9814453125\n",
589
- "79.028%Pruned 0.34606640613625494 tensor(2488.3691) 1649.0048828125 839.3642578125\n",
590
- "83.223%Pruned 0.3372108222952837 tensor(2486.8066) 1668.763671875 818.04296875\n",
591
- "86.578%Pruned 0.3295070241143398 tensor(2487.6650) 1688.1005859375 799.564453125\n",
592
- "89.263%Pruned 0.3258526359064324 tensor(2510.9932) 1720.5595703125 790.43359375\n",
593
- "91.41%Pruned 0.3180118670466814 tensor(2492.7900) 1721.0126953125 771.77734375\n",
594
- "93.128%Pruned 0.3151076345771013 tensor(2509.9775) 1745.3173828125 764.66015625\n",
595
- "94.502%Pruned 0.31089489518156466 tensor(2502.5498) 1747.9677734375 754.58203125\n"
596
- ]
597
- }
598
- ],
599
- "source": [
600
- "for o in sorted(os.listdir(\"/workspace/CCE_NLI/BERT/exp/lottery_ticket/Run2Full/Masks\")):\n",
601
- " if \".ipy\" in o: continue\n",
602
- " percent = o.split(\"%\")[0]\n",
603
- " aiou,avg_num_samples_where_pruned_alone_activates, avg_num_samples_where_pruned_and_og_activate = activation_overlap(cluster=1, percent=percent)\n",
604
- " na = active_samples(cluster=1, percent=percent) \n",
605
- " print(o, aiou, na, avg_num_samples_where_pruned_alone_activates,avg_num_samples_where_pruned_and_og_activate)\n",
606
- "#per neuron (on avg) 63% of samples that activate the neuron in the og at cluster3 activate it at 20% pruning"
607
- ]
608
- },
609
- {
610
- "cell_type": "code",
611
- "execution_count": null,
612
- "id": "2e0cd17e",
613
- "metadata": {},
614
- "outputs": [],
615
- "source": []
616
- }
617
- ],
618
- "metadata": {
619
- "kernelspec": {
620
- "display_name": "Python 3 (ipykernel)",
621
- "language": "python",
622
- "name": "python3"
623
- },
624
- "language_info": {
625
- "codemirror_mode": {
626
- "name": "ipython",
627
- "version": 3
628
- },
629
- "file_extension": ".py",
630
- "mimetype": "text/x-python",
631
- "name": "python",
632
- "nbconvert_exporter": "python",
633
- "pygments_lexer": "ipython3",
634
- "version": "3.8.10"
635
- }
636
- },
637
- "nbformat": 4,
638
- "nbformat_minor": 5
639
- }