Spaces:
Sleeping
Sleeping
ngocminhta
commited on
Commit
·
2fffdc8
1
Parent(s):
617c3f7
update model search
Browse files
app.py
CHANGED
|
@@ -75,7 +75,8 @@ async def predict(request: Request):
|
|
| 75 |
is_mixed_dict=is_mixed_dict,
|
| 76 |
write_model_dict=write_model_dict,
|
| 77 |
text_list=text_list,
|
| 78 |
-
K=
|
|
|
|
| 79 |
return JSONResponse(content={"results": results})
|
| 80 |
|
| 81 |
app.mount("/", StaticFiles(directory="static", html=True), name="static")
|
|
|
|
| 75 |
is_mixed_dict=is_mixed_dict,
|
| 76 |
write_model_dict=write_model_dict,
|
| 77 |
text_list=text_list,
|
| 78 |
+
K=21,
|
| 79 |
+
K_model=9)
|
| 80 |
return JSONResponse(content={"results": results})
|
| 81 |
|
| 82 |
app.mount("/", StaticFiles(directory="static", html=True), name="static")
|
infer.py
CHANGED
|
@@ -81,7 +81,7 @@ def infer_3_class(model, tokenizer, index, label_dict, is_mixed_dict, text_list,
|
|
| 81 |
pred.append(final)
|
| 82 |
return pred
|
| 83 |
|
| 84 |
-
def infer_model_specific(model, tokenizer, index, label_dict, is_mixed_dict, write_model_dict, text_list, K):
|
| 85 |
encoded_text = tokenizer.batch_encode_plus(
|
| 86 |
text_list,
|
| 87 |
return_tensors="pt",
|
|
@@ -91,36 +91,47 @@ def infer_model_specific(model, tokenizer, index, label_dict, is_mixed_dict, wri
|
|
| 91 |
)
|
| 92 |
encoded_text = {k: v for k, v in encoded_text.items()}
|
| 93 |
embeddings = model(encoded_text).cpu().detach().numpy()
|
|
|
|
|
|
|
| 94 |
top_ids_and_scores = index.search_knn(embeddings, K)
|
| 95 |
pred = []
|
| 96 |
|
| 97 |
for i, (ids, scores) in enumerate(top_ids_and_scores):
|
| 98 |
-
sorted_scores = np.argsort(scores)
|
| 99 |
-
sorted_scores = sorted_scores[::-1]
|
| 100 |
|
|
|
|
| 101 |
topk_ids = [ids[j] for j in sorted_scores]
|
| 102 |
topk_scores = [scores[j] for j in sorted_scores]
|
| 103 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
candidate_models = [is_mixed_dict[int(_id)] for _id in topk_ids]
|
| 105 |
initial_pred = Counter(candidate_models).most_common(1)[0][0]
|
| 106 |
|
| 107 |
-
# Initialize fuzzy counts for both 3-class and model-specific predictions
|
| 108 |
fuzzy_cnt_3class = {(1,0): 0.0, (0,10^3): 0.0, (1,1): 0.0}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
fuzzy_cnt_model = {
|
| 110 |
(1, 0, 0): 0.0, # Human
|
| 111 |
(0, 10^3, 1): 0.0, (0, 10^3, 2): 0.0, (0, 10^3, 3): 0.0, (0, 10^3, 4): 0.0, # AI
|
| 112 |
(1, 1, 1): 0.0, (1, 1, 2): 0.0, (1, 1, 3): 0.0, (1, 1, 4): 0.0 # Human+AI
|
| 113 |
}
|
| 114 |
|
| 115 |
-
for id, weight in zip(
|
| 116 |
-
# Update 3-class fuzzy counts
|
| 117 |
-
label_3class = (label_dict[int(id)], is_mixed_dict[int(id)])
|
| 118 |
-
boost_3class = class_type_boost(is_mixed_dict[int(id)], initial_pred)
|
| 119 |
-
fuzzy_cnt_3class[label_3class] += weight * boost_3class
|
| 120 |
-
|
| 121 |
-
# Update model-specific fuzzy counts
|
| 122 |
label_model = (label_dict[int(id)], is_mixed_dict[int(id)], write_model_dict[int(id)])
|
| 123 |
-
boost_model = class_type_boost(is_mixed_dict[int(id)],
|
| 124 |
fuzzy_cnt_model[label_model] += weight * boost_model
|
| 125 |
|
| 126 |
# Calculate 3-class probabilities
|
|
|
|
| 81 |
pred.append(final)
|
| 82 |
return pred
|
| 83 |
|
| 84 |
+
def infer_model_specific(model, tokenizer, index, label_dict, is_mixed_dict, write_model_dict, text_list, K, K_model):
|
| 85 |
encoded_text = tokenizer.batch_encode_plus(
|
| 86 |
text_list,
|
| 87 |
return_tensors="pt",
|
|
|
|
| 91 |
)
|
| 92 |
encoded_text = {k: v for k, v in encoded_text.items()}
|
| 93 |
embeddings = model(encoded_text).cpu().detach().numpy()
|
| 94 |
+
|
| 95 |
+
# Get predictions using K=21
|
| 96 |
top_ids_and_scores = index.search_knn(embeddings, K)
|
| 97 |
pred = []
|
| 98 |
|
| 99 |
for i, (ids, scores) in enumerate(top_ids_and_scores):
|
| 100 |
+
sorted_scores = np.argsort(scores)[::-1]
|
|
|
|
| 101 |
|
| 102 |
+
# Get all 21 results for 3-class prediction
|
| 103 |
topk_ids = [ids[j] for j in sorted_scores]
|
| 104 |
topk_scores = [scores[j] for j in sorted_scores]
|
| 105 |
+
|
| 106 |
+
# Get top 9 results for model-specific prediction
|
| 107 |
+
topk_ids_model = topk_ids[:K_model]
|
| 108 |
+
topk_scores_model = topk_scores[:K_model]
|
| 109 |
+
|
| 110 |
+
# Process 3-class prediction (using all 21)
|
| 111 |
+
weights_3class = softmax_weights(topk_scores, temperature=0.1)
|
| 112 |
candidate_models = [is_mixed_dict[int(_id)] for _id in topk_ids]
|
| 113 |
initial_pred = Counter(candidate_models).most_common(1)[0][0]
|
| 114 |
|
|
|
|
| 115 |
fuzzy_cnt_3class = {(1,0): 0.0, (0,10^3): 0.0, (1,1): 0.0}
|
| 116 |
+
for id, weight in zip(topk_ids, weights_3class):
|
| 117 |
+
label_3class = (label_dict[int(id)], is_mixed_dict[int(id)])
|
| 118 |
+
boost_3class = class_type_boost(is_mixed_dict[int(id)], initial_pred)
|
| 119 |
+
fuzzy_cnt_3class[label_3class] += weight * boost_3class
|
| 120 |
+
|
| 121 |
+
# Process model-specific prediction (using top 9)
|
| 122 |
+
weights_model = softmax_weights(topk_scores_model, temperature=0.4)
|
| 123 |
+
candidate_models_model = [is_mixed_dict[int(_id)] for _id in topk_ids_model]
|
| 124 |
+
initial_pred_model = Counter(candidate_models_model).most_common(1)[0][0]
|
| 125 |
+
|
| 126 |
fuzzy_cnt_model = {
|
| 127 |
(1, 0, 0): 0.0, # Human
|
| 128 |
(0, 10^3, 1): 0.0, (0, 10^3, 2): 0.0, (0, 10^3, 3): 0.0, (0, 10^3, 4): 0.0, # AI
|
| 129 |
(1, 1, 1): 0.0, (1, 1, 2): 0.0, (1, 1, 3): 0.0, (1, 1, 4): 0.0 # Human+AI
|
| 130 |
}
|
| 131 |
|
| 132 |
+
for id, weight in zip(topk_ids_model, weights_model):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
label_model = (label_dict[int(id)], is_mixed_dict[int(id)], write_model_dict[int(id)])
|
| 134 |
+
boost_model = class_type_boost(is_mixed_dict[int(id)], initial_pred_model)
|
| 135 |
fuzzy_cnt_model[label_model] += weight * boost_model
|
| 136 |
|
| 137 |
# Calculate 3-class probabilities
|