Spaces:

ngocminhta
/

falcon-api

Sleeping

App Files Files Community

ngocminhta commited on Jun 10, 2025

Commit

2fffdc8

1 Parent(s): 617c3f7

update model search

Browse files

Files changed (2) hide show

app.py +2 -1
infer.py +24 -13

app.py CHANGED Viewed

@@ -75,7 +75,8 @@ async def predict(request: Request):
             is_mixed_dict=is_mixed_dict,
             write_model_dict=write_model_dict,
             text_list=text_list,
-            K=9)
         return JSONResponse(content={"results": results})
 app.mount("/", StaticFiles(directory="static", html=True), name="static")

             is_mixed_dict=is_mixed_dict,
             write_model_dict=write_model_dict,
             text_list=text_list,
+            K=21,
+            K_model=9)
         return JSONResponse(content={"results": results})
 app.mount("/", StaticFiles(directory="static", html=True), name="static")

infer.py CHANGED Viewed

@@ -81,7 +81,7 @@ def infer_3_class(model, tokenizer, index, label_dict, is_mixed_dict, text_list,
         pred.append(final)
     return pred
-def infer_model_specific(model, tokenizer, index, label_dict, is_mixed_dict, write_model_dict, text_list, K):
     encoded_text = tokenizer.batch_encode_plus(
                         text_list,
                         return_tensors="pt",
@@ -91,36 +91,47 @@ def infer_model_specific(model, tokenizer, index, label_dict, is_mixed_dict, wri
                     )
     encoded_text = {k: v for k, v in encoded_text.items()}
     embeddings = model(encoded_text).cpu().detach().numpy()
     top_ids_and_scores = index.search_knn(embeddings, K)
     pred = []
     for i, (ids, scores) in enumerate(top_ids_and_scores):
-        sorted_scores = np.argsort(scores)
-        sorted_scores = sorted_scores[::-1]
         topk_ids = [ids[j] for j in sorted_scores]
         topk_scores = [scores[j] for j in sorted_scores]
-        weights = softmax_weights(topk_scores, temperature=0.4)
         candidate_models = [is_mixed_dict[int(_id)] for _id in topk_ids]
         initial_pred = Counter(candidate_models).most_common(1)[0][0]
-        # Initialize fuzzy counts for both 3-class and model-specific predictions
         fuzzy_cnt_3class = {(1,0): 0.0, (0,10^3): 0.0, (1,1): 0.0}
         fuzzy_cnt_model = {
             (1, 0, 0): 0.0,  # Human
             (0, 10^3, 1): 0.0, (0, 10^3, 2): 0.0, (0, 10^3, 3): 0.0, (0, 10^3, 4): 0.0,  # AI
             (1, 1, 1): 0.0, (1, 1, 2): 0.0, (1, 1, 3): 0.0, (1, 1, 4): 0.0  # Human+AI
         }
-        for id, weight in zip(topk_ids, weights):
-            # Update 3-class fuzzy counts
-            label_3class = (label_dict[int(id)], is_mixed_dict[int(id)])
-            boost_3class = class_type_boost(is_mixed_dict[int(id)], initial_pred)
-            fuzzy_cnt_3class[label_3class] += weight * boost_3class
-            # Update model-specific fuzzy counts
             label_model = (label_dict[int(id)], is_mixed_dict[int(id)], write_model_dict[int(id)])
-            boost_model = class_type_boost(is_mixed_dict[int(id)], initial_pred)
             fuzzy_cnt_model[label_model] += weight * boost_model
         # Calculate 3-class probabilities

         pred.append(final)
     return pred
+def infer_model_specific(model, tokenizer, index, label_dict, is_mixed_dict, write_model_dict, text_list, K, K_model):
     encoded_text = tokenizer.batch_encode_plus(
                         text_list,
                         return_tensors="pt",
                     )
     encoded_text = {k: v for k, v in encoded_text.items()}
     embeddings = model(encoded_text).cpu().detach().numpy()
+    # Get predictions using K=21
     top_ids_and_scores = index.search_knn(embeddings, K)
     pred = []
     for i, (ids, scores) in enumerate(top_ids_and_scores):
+        sorted_scores = np.argsort(scores)[::-1]
+        # Get all 21 results for 3-class prediction
         topk_ids = [ids[j] for j in sorted_scores]
         topk_scores = [scores[j] for j in sorted_scores]
+        # Get top 9 results for model-specific prediction
+        topk_ids_model = topk_ids[:K_model]
+        topk_scores_model = topk_scores[:K_model]
+        # Process 3-class prediction (using all 21)
+        weights_3class = softmax_weights(topk_scores, temperature=0.1)
         candidate_models = [is_mixed_dict[int(_id)] for _id in topk_ids]
         initial_pred = Counter(candidate_models).most_common(1)[0][0]
         fuzzy_cnt_3class = {(1,0): 0.0, (0,10^3): 0.0, (1,1): 0.0}
+        for id, weight in zip(topk_ids, weights_3class):
+            label_3class = (label_dict[int(id)], is_mixed_dict[int(id)])
+            boost_3class = class_type_boost(is_mixed_dict[int(id)], initial_pred)
+            fuzzy_cnt_3class[label_3class] += weight * boost_3class
+        # Process model-specific prediction (using top 9)
+        weights_model = softmax_weights(topk_scores_model, temperature=0.4)
+        candidate_models_model = [is_mixed_dict[int(_id)] for _id in topk_ids_model]
+        initial_pred_model = Counter(candidate_models_model).most_common(1)[0][0]
         fuzzy_cnt_model = {
             (1, 0, 0): 0.0,  # Human
             (0, 10^3, 1): 0.0, (0, 10^3, 2): 0.0, (0, 10^3, 3): 0.0, (0, 10^3, 4): 0.0,  # AI
             (1, 1, 1): 0.0, (1, 1, 2): 0.0, (1, 1, 3): 0.0, (1, 1, 4): 0.0  # Human+AI
         }
+        for id, weight in zip(topk_ids_model, weights_model):
             label_model = (label_dict[int(id)], is_mixed_dict[int(id)], write_model_dict[int(id)])
+            boost_model = class_type_boost(is_mixed_dict[int(id)], initial_pred_model)
             fuzzy_cnt_model[label_model] += weight * boost_model
         # Calculate 3-class probabilities