Spaces:
Sleeping
Sleeping
clean basic LTT
Browse files
clean_selection/analyze_clean_hierarchical_loss_protein_vec.ipynb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b4d4e54f9a6d53d0fa29ae72291e719a394f18bae83a55408ad867934a19657b
|
| 3 |
+
size 49522
|
clean_selection/process_clean_ec.ipynb
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b6a7b8853fa3df8fe380cea2391bdace7b76605c3e80770b397c1ba5776aee6c
|
| 3 |
+
size 5860
|
protein_conformal/scope_utils.py
CHANGED
|
@@ -287,14 +287,17 @@ def get_hierarchical_loss(data_, lambda_):
|
|
| 287 |
return np.mean(losses)
|
| 288 |
|
| 289 |
|
| 290 |
-
def get_thresh_max_hierarchical(data, lambdas, alpha):
|
| 291 |
# get the worst case loss
|
| 292 |
wc_loss = 4 # in the max case, the max_loss is simply retrieving a protein with different class
|
| 293 |
loss_thresh = alpha - (wc_loss - alpha)/len(data) # normalize by size of calib set
|
| 294 |
losses = []
|
| 295 |
best_lam = None
|
| 296 |
-
|
| 297 |
-
|
|
|
|
|
|
|
|
|
|
| 298 |
if per_lam_loss > loss_thresh:
|
| 299 |
break
|
| 300 |
best_lam = lam
|
|
@@ -306,10 +309,13 @@ def get_thresh_max_hierarchical(data, lambdas, alpha):
|
|
| 306 |
|
| 307 |
return best_lam, losses
|
| 308 |
|
| 309 |
-
def get_hierarchical_max_loss(data_, lambda_):
|
| 310 |
losses = []
|
| 311 |
for query in data_:
|
| 312 |
-
|
|
|
|
|
|
|
|
|
|
| 313 |
if np.sum(thresh_idx) == 0:
|
| 314 |
loss = 0
|
| 315 |
else:
|
|
|
|
| 287 |
return np.mean(losses)
|
| 288 |
|
| 289 |
|
| 290 |
+
def get_thresh_max_hierarchical(data, lambdas, alpha, sim="cosine"):
|
| 291 |
# get the worst case loss
|
| 292 |
wc_loss = 4 # in the max case, the max_loss is simply retrieving a protein with different class
|
| 293 |
loss_thresh = alpha - (wc_loss - alpha)/len(data) # normalize by size of calib set
|
| 294 |
losses = []
|
| 295 |
best_lam = None
|
| 296 |
+
if sim == "cosine":
|
| 297 |
+
## reverse lambdas and return list
|
| 298 |
+
lambdas = list(reversed(lambdas))
|
| 299 |
+
for lam in lambdas: # start from the largest lambda since we are dealing with raw similarity scores
|
| 300 |
+
per_lam_loss = get_hierarchical_max_loss(data, lam, sim=sim)
|
| 301 |
if per_lam_loss > loss_thresh:
|
| 302 |
break
|
| 303 |
best_lam = lam
|
|
|
|
| 309 |
|
| 310 |
return best_lam, losses
|
| 311 |
|
| 312 |
+
def get_hierarchical_max_loss(data_, lambda_, sim="cosine"):
|
| 313 |
losses = []
|
| 314 |
for query in data_:
|
| 315 |
+
if sim == "cosine":
|
| 316 |
+
thresh_idx = query['S_i'] >= lambda_
|
| 317 |
+
else:
|
| 318 |
+
thresh_idx = query['S_i'] <= lambda_
|
| 319 |
if np.sum(thresh_idx) == 0:
|
| 320 |
loss = 0
|
| 321 |
else:
|