Spaces:

ngocminhta
/

falcon-api

Sleeping

App Files Files Community

ngocminhta commited on Jun 10, 2025

Commit

617c3f7

1 Parent(s): 667fbf3

update model specific pred

Browse files

Files changed (2) hide show

app.py +12 -3
infer.py +62 -0

app.py CHANGED Viewed

@@ -8,7 +8,7 @@ from src.text_embedding import TextEmbeddingModel
 from src.index import Indexer
 import os
 import pickle
-from infer import infer_3_class
 import uvicorn
 from datasets import disable_caching
 disable_caching()
@@ -40,7 +40,7 @@ def load_pkl(path):
 @app.on_event("startup")
 def load_model_resources():
-    global model, tokenizer, index, label_dict, is_mixed_dict
     model = TextEmbeddingModel(opt.model_name)
     tokenizer=model.tokenizer
@@ -49,6 +49,7 @@ def load_model_resources():
     index.deserialize_from(opt.database_path)
     label_dict=load_pkl(os.path.join(opt.database_path,'label_dict.pkl'))
     is_mixed_dict=load_pkl(os.path.join(opt.database_path,'is_mixed_dict.pkl'))
 @app.route('/predict', methods=['POST'])
@@ -67,7 +68,15 @@ async def predict(request: Request):
             K=21)
         return JSONResponse(content={"results": results})
     elif mode == "advanced":
-        return 0
 app.mount("/", StaticFiles(directory="static", html=True), name="static")

 from src.index import Indexer
 import os
 import pickle
+from infer import infer_3_class, infer_model_specific
 import uvicorn
 from datasets import disable_caching
 disable_caching()
 @app.on_event("startup")
 def load_model_resources():
+    global model, tokenizer, index, label_dict, is_mixed_dict, write_model_dict
     model = TextEmbeddingModel(opt.model_name)
     tokenizer=model.tokenizer
     index.deserialize_from(opt.database_path)
     label_dict=load_pkl(os.path.join(opt.database_path,'label_dict.pkl'))
     is_mixed_dict=load_pkl(os.path.join(opt.database_path,'is_mixed_dict.pkl'))
+    write_model_dict=load_pkl(os.path.join(opt.database_path,'write_model_dict.pkl'))
 @app.route('/predict', methods=['POST'])
             K=21)
         return JSONResponse(content={"results": results})
     elif mode == "advanced":
+        results = infer_model_specific(model=model,
+            tokenizer=tokenizer,
+            index=index,
+            label_dict=label_dict,
+            is_mixed_dict=is_mixed_dict,
+            write_model_dict=write_model_dict,
+            text_list=text_list,
+            K=9)
+        return JSONResponse(content={"results": results})
 app.mount("/", StaticFiles(directory="static", html=True), name="static")

infer.py CHANGED Viewed

@@ -79,4 +79,66 @@ def infer_3_class(model, tokenizer, index, label_dict, is_mixed_dict, text_list,
         final[1] = round(fuzzy_cnt[(0,10^3)] / total_score*100,2)
         final[2] = round(fuzzy_cnt[(1,1)] / total_score*100,2)
         pred.append(final)
     return pred

         final[1] = round(fuzzy_cnt[(0,10^3)] / total_score*100,2)
         final[2] = round(fuzzy_cnt[(1,1)] / total_score*100,2)
         pred.append(final)
+    return pred
+def infer_model_specific(model, tokenizer, index, label_dict, is_mixed_dict, write_model_dict, text_list, K):
+    encoded_text = tokenizer.batch_encode_plus(
+                        text_list,
+                        return_tensors="pt",
+                        max_length=512,
+                        padding="max_length",
+                        truncation=True,
+                    )
+    encoded_text = {k: v for k, v in encoded_text.items()}
+    embeddings = model(encoded_text).cpu().detach().numpy()
+    top_ids_and_scores = index.search_knn(embeddings, K)
+    pred = []
+    for i, (ids, scores) in enumerate(top_ids_and_scores):
+        sorted_scores = np.argsort(scores)
+        sorted_scores = sorted_scores[::-1]
+        topk_ids = [ids[j] for j in sorted_scores]
+        topk_scores = [scores[j] for j in sorted_scores]
+        weights = softmax_weights(topk_scores, temperature=0.4)
+        candidate_models = [is_mixed_dict[int(_id)] for _id in topk_ids]
+        initial_pred = Counter(candidate_models).most_common(1)[0][0]
+        # Initialize fuzzy counts for both 3-class and model-specific predictions
+        fuzzy_cnt_3class = {(1,0): 0.0, (0,10^3): 0.0, (1,1): 0.0}
+        fuzzy_cnt_model = {
+            (1, 0, 0): 0.0,  # Human
+            (0, 10^3, 1): 0.0, (0, 10^3, 2): 0.0, (0, 10^3, 3): 0.0, (0, 10^3, 4): 0.0,  # AI
+            (1, 1, 1): 0.0, (1, 1, 2): 0.0, (1, 1, 3): 0.0, (1, 1, 4): 0.0  # Human+AI
+        }
+        for id, weight in zip(topk_ids, weights):
+            # Update 3-class fuzzy counts
+            label_3class = (label_dict[int(id)], is_mixed_dict[int(id)])
+            boost_3class = class_type_boost(is_mixed_dict[int(id)], initial_pred)
+            fuzzy_cnt_3class[label_3class] += weight * boost_3class
+            # Update model-specific fuzzy counts
+            label_model = (label_dict[int(id)], is_mixed_dict[int(id)], write_model_dict[int(id)])
+            boost_model = class_type_boost(is_mixed_dict[int(id)], initial_pred)
+            fuzzy_cnt_model[label_model] += weight * boost_model
+        # Calculate 3-class probabilities
+        total_score_3class = sum(fuzzy_cnt_3class.values())
+        final_3class = {
+            0: round(fuzzy_cnt_3class[(1,0)] / total_score_3class * 100, 2),
+            1: round(fuzzy_cnt_3class[(0,10^3)] / total_score_3class * 100, 2),
+            2: round(fuzzy_cnt_3class[(1,1)] / total_score_3class * 100, 2)
+        }
+        # Get model-specific prediction
+        final_model = max(fuzzy_cnt_model, key=fuzzy_cnt_model.get)
+        # Combine both predictions
+        final = {
+            "score": final_3class,
+            "model": final_model
+        }
+        pred.append(final)
     return pred