ngocminhta commited on
Commit
2fffdc8
·
1 Parent(s): 617c3f7

update model search

Browse files
Files changed (2) hide show
  1. app.py +2 -1
  2. infer.py +24 -13
app.py CHANGED
@@ -75,7 +75,8 @@ async def predict(request: Request):
75
  is_mixed_dict=is_mixed_dict,
76
  write_model_dict=write_model_dict,
77
  text_list=text_list,
78
- K=9)
 
79
  return JSONResponse(content={"results": results})
80
 
81
  app.mount("/", StaticFiles(directory="static", html=True), name="static")
 
75
  is_mixed_dict=is_mixed_dict,
76
  write_model_dict=write_model_dict,
77
  text_list=text_list,
78
+ K=21,
79
+ K_model=9)
80
  return JSONResponse(content={"results": results})
81
 
82
  app.mount("/", StaticFiles(directory="static", html=True), name="static")
infer.py CHANGED
@@ -81,7 +81,7 @@ def infer_3_class(model, tokenizer, index, label_dict, is_mixed_dict, text_list,
81
  pred.append(final)
82
  return pred
83
 
84
- def infer_model_specific(model, tokenizer, index, label_dict, is_mixed_dict, write_model_dict, text_list, K):
85
  encoded_text = tokenizer.batch_encode_plus(
86
  text_list,
87
  return_tensors="pt",
@@ -91,36 +91,47 @@ def infer_model_specific(model, tokenizer, index, label_dict, is_mixed_dict, wri
91
  )
92
  encoded_text = {k: v for k, v in encoded_text.items()}
93
  embeddings = model(encoded_text).cpu().detach().numpy()
 
 
94
  top_ids_and_scores = index.search_knn(embeddings, K)
95
  pred = []
96
 
97
  for i, (ids, scores) in enumerate(top_ids_and_scores):
98
- sorted_scores = np.argsort(scores)
99
- sorted_scores = sorted_scores[::-1]
100
 
 
101
  topk_ids = [ids[j] for j in sorted_scores]
102
  topk_scores = [scores[j] for j in sorted_scores]
103
- weights = softmax_weights(topk_scores, temperature=0.4)
 
 
 
 
 
 
104
  candidate_models = [is_mixed_dict[int(_id)] for _id in topk_ids]
105
  initial_pred = Counter(candidate_models).most_common(1)[0][0]
106
 
107
- # Initialize fuzzy counts for both 3-class and model-specific predictions
108
  fuzzy_cnt_3class = {(1,0): 0.0, (0,10^3): 0.0, (1,1): 0.0}
 
 
 
 
 
 
 
 
 
 
109
  fuzzy_cnt_model = {
110
  (1, 0, 0): 0.0, # Human
111
  (0, 10^3, 1): 0.0, (0, 10^3, 2): 0.0, (0, 10^3, 3): 0.0, (0, 10^3, 4): 0.0, # AI
112
  (1, 1, 1): 0.0, (1, 1, 2): 0.0, (1, 1, 3): 0.0, (1, 1, 4): 0.0 # Human+AI
113
  }
114
 
115
- for id, weight in zip(topk_ids, weights):
116
- # Update 3-class fuzzy counts
117
- label_3class = (label_dict[int(id)], is_mixed_dict[int(id)])
118
- boost_3class = class_type_boost(is_mixed_dict[int(id)], initial_pred)
119
- fuzzy_cnt_3class[label_3class] += weight * boost_3class
120
-
121
- # Update model-specific fuzzy counts
122
  label_model = (label_dict[int(id)], is_mixed_dict[int(id)], write_model_dict[int(id)])
123
- boost_model = class_type_boost(is_mixed_dict[int(id)], initial_pred)
124
  fuzzy_cnt_model[label_model] += weight * boost_model
125
 
126
  # Calculate 3-class probabilities
 
81
  pred.append(final)
82
  return pred
83
 
84
+ def infer_model_specific(model, tokenizer, index, label_dict, is_mixed_dict, write_model_dict, text_list, K, K_model):
85
  encoded_text = tokenizer.batch_encode_plus(
86
  text_list,
87
  return_tensors="pt",
 
91
  )
92
  encoded_text = {k: v for k, v in encoded_text.items()}
93
  embeddings = model(encoded_text).cpu().detach().numpy()
94
+
95
+ # Get predictions using K=21
96
  top_ids_and_scores = index.search_knn(embeddings, K)
97
  pred = []
98
 
99
  for i, (ids, scores) in enumerate(top_ids_and_scores):
100
+ sorted_scores = np.argsort(scores)[::-1]
 
101
 
102
+ # Get all 21 results for 3-class prediction
103
  topk_ids = [ids[j] for j in sorted_scores]
104
  topk_scores = [scores[j] for j in sorted_scores]
105
+
106
+ # Get top 9 results for model-specific prediction
107
+ topk_ids_model = topk_ids[:K_model]
108
+ topk_scores_model = topk_scores[:K_model]
109
+
110
+ # Process 3-class prediction (using all 21)
111
+ weights_3class = softmax_weights(topk_scores, temperature=0.1)
112
  candidate_models = [is_mixed_dict[int(_id)] for _id in topk_ids]
113
  initial_pred = Counter(candidate_models).most_common(1)[0][0]
114
 
 
115
  fuzzy_cnt_3class = {(1,0): 0.0, (0,10^3): 0.0, (1,1): 0.0}
116
+ for id, weight in zip(topk_ids, weights_3class):
117
+ label_3class = (label_dict[int(id)], is_mixed_dict[int(id)])
118
+ boost_3class = class_type_boost(is_mixed_dict[int(id)], initial_pred)
119
+ fuzzy_cnt_3class[label_3class] += weight * boost_3class
120
+
121
+ # Process model-specific prediction (using top 9)
122
+ weights_model = softmax_weights(topk_scores_model, temperature=0.4)
123
+ candidate_models_model = [is_mixed_dict[int(_id)] for _id in topk_ids_model]
124
+ initial_pred_model = Counter(candidate_models_model).most_common(1)[0][0]
125
+
126
  fuzzy_cnt_model = {
127
  (1, 0, 0): 0.0, # Human
128
  (0, 10^3, 1): 0.0, (0, 10^3, 2): 0.0, (0, 10^3, 3): 0.0, (0, 10^3, 4): 0.0, # AI
129
  (1, 1, 1): 0.0, (1, 1, 2): 0.0, (1, 1, 3): 0.0, (1, 1, 4): 0.0 # Human+AI
130
  }
131
 
132
+ for id, weight in zip(topk_ids_model, weights_model):
 
 
 
 
 
 
133
  label_model = (label_dict[int(id)], is_mixed_dict[int(id)], write_model_dict[int(id)])
134
+ boost_model = class_type_boost(is_mixed_dict[int(id)], initial_pred_model)
135
  fuzzy_cnt_model[label_model] += weight * boost_model
136
 
137
  # Calculate 3-class probabilities