Spaces:

ngocminhta
/

falcon-api

Sleeping

App Files Files Community

ngocminhta commited on Jun 6, 2025

Commit

667fbf3

1 Parent(s): 9770ff8

update faidsetv2

Browse files

Files changed (7) hide show

app.py +1 -1
core/seen_db/index.faiss +3 -0
core/seen_db/index_meta.faiss +3 -0
core/seen_db/is_mixed_dict.pkl +3 -0
core/seen_db/label_dict.pkl +3 -0
core/seen_db/write_model_dict.pkl +3 -0
infer.py +1 -37

app.py CHANGED Viewed

@@ -64,7 +64,7 @@ async def predict(request: Request):
             label_dict=label_dict,
             is_mixed_dict=is_mixed_dict,
             text_list=text_list,
-            K=20)
         return JSONResponse(content={"results": results})
     elif mode == "advanced":
         return 0

             label_dict=label_dict,
             is_mixed_dict=is_mixed_dict,
             text_list=text_list,
+            K=21)
         return JSONResponse(content={"results": results})
     elif mode == "advanced":
         return 0

core/seen_db/index.faiss ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:afedda60bfd82c2579bbc7bf15a7ab59a0cb3f885377f28f1e9c5f06c756ca1e
+size 398736429

core/seen_db/index_meta.faiss ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dc9346766d599b9d87e3521d77ea908d21ac9263b02bf46a0ede282f29a92ac8
+size 1297873

core/seen_db/is_mixed_dict.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e5879f5bbcec0881436a54bcbed6be999269017cc8aa4542eb55f8a8689590fa
+size 1555703

core/seen_db/label_dict.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ce731039f21189f4a7911ce9eb30566a5b03ae4912d5ba938a22ac9e350ee128
+size 1555703

core/seen_db/write_model_dict.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f18b37b64ef2556406fa303a31db1d17e64b220859fe2fc48e09c8d46311497e
+size 1555703

infer.py CHANGED Viewed

@@ -45,21 +45,6 @@ def load_pkl(path):
         return pickle.load(f)
 def infer_3_class(model, tokenizer, index, label_dict, is_mixed_dict, text_list, K):
-    # model = TextEmbeddingModel(opt.model_name).cuda()
-    # state_dict = torch.load(opt.model_path, map_location=model.model.device)
-    # new_state_dict={}
-    # for key in state_dict.keys():
-    #     if key.startswith('model.'):
-    #         new_state_dict[key[6:]]=state_dict[key]
-    # model.load_state_dict(state_dict)
-    # tokenizer=model.tokenizer
-    # index = Indexer(opt.embedding_dim)
-    # index.deserialize_from(opt.database_path)
-    # label_dict=load_pkl(os.path.join(opt.database_path,'label_dict.pkl'))
-    # is_mixed_dict=load_pkl(os.path.join(opt.database_path,'is_mixed_dict.pkl'))
-    # text = opt.text
     encoded_text = tokenizer.batch_encode_plus(
                         text_list,
                         return_tensors="pt",
@@ -72,7 +57,6 @@ def infer_3_class(model, tokenizer, index, label_dict, is_mixed_dict, text_list,
     top_ids_and_scores = index.search_knn(embeddings, K)
     pred = []
     for i, (ids, scores) in enumerate(top_ids_and_scores):
-        # print(f"Top {K} results for text:")
         sorted_scores = np.argsort(scores)
         sorted_scores = sorted_scores[::-1]
@@ -94,25 +78,5 @@ def infer_3_class(model, tokenizer, index, label_dict, is_mixed_dict, text_list,
         final[0] = round(fuzzy_cnt[(1,0)] / total_score*100,2)
         final[1] = round(fuzzy_cnt[(0,10^3)] / total_score*100,2)
         final[2] = round(fuzzy_cnt[(1,1)] / total_score*100,2)
-        # print(f"Final prediction: {final}")
         pred.append(final)
-    return pred
-# if __name__ == "__main__":
-#     parser = argparse.ArgumentParser()
-#     parser.add_argument('--embedding_dim', type=int, default=768)
-#     parser.add_argument('--database_path', type=str, default="database", help="Path to the index file")
-#     parser.add_argument("--model_path", type=str, default="core/model.pth",\
-#                          help="Path to the embedding model checkpoint")
-#     parser.add_argument('--model_name', type=str, default="ZurichNLPZurichNLP/unsup-simcse-xlm-roberta-base", help="Model name")
-#     parser.add_argument('--K', type=int, default=20, help="Search [1,K] nearest neighbors,choose the best K")
-#     parser.add_argument('--pooling', type=str, default="average", help="Pooling method, average or cls")
-#     parser.add_argument('--text', type=str, default="")
-#     parser.add_argument('--seed', type=int, default=0)
-#     opt = parser.parse_args()
-#     set_seed(opt.seed)
-#     infer(opt)

         return pickle.load(f)
 def infer_3_class(model, tokenizer, index, label_dict, is_mixed_dict, text_list, K):
     encoded_text = tokenizer.batch_encode_plus(
                         text_list,
                         return_tensors="pt",
     top_ids_and_scores = index.search_knn(embeddings, K)
     pred = []
     for i, (ids, scores) in enumerate(top_ids_and_scores):
         sorted_scores = np.argsort(scores)
         sorted_scores = sorted_scores[::-1]
         final[0] = round(fuzzy_cnt[(1,0)] / total_score*100,2)
         final[1] = round(fuzzy_cnt[(0,10^3)] / total_score*100,2)
         final[2] = round(fuzzy_cnt[(1,1)] / total_score*100,2)
         pred.append(final)
+    return pred