Spaces:
Sleeping
Sleeping
ngocminhta
commited on
Commit
·
667fbf3
1
Parent(s):
9770ff8
update faidsetv2
Browse files- app.py +1 -1
- core/seen_db/index.faiss +3 -0
- core/seen_db/index_meta.faiss +3 -0
- core/seen_db/is_mixed_dict.pkl +3 -0
- core/seen_db/label_dict.pkl +3 -0
- core/seen_db/write_model_dict.pkl +3 -0
- infer.py +1 -37
app.py
CHANGED
|
@@ -64,7 +64,7 @@ async def predict(request: Request):
|
|
| 64 |
label_dict=label_dict,
|
| 65 |
is_mixed_dict=is_mixed_dict,
|
| 66 |
text_list=text_list,
|
| 67 |
-
K=
|
| 68 |
return JSONResponse(content={"results": results})
|
| 69 |
elif mode == "advanced":
|
| 70 |
return 0
|
|
|
|
| 64 |
label_dict=label_dict,
|
| 65 |
is_mixed_dict=is_mixed_dict,
|
| 66 |
text_list=text_list,
|
| 67 |
+
K=21)
|
| 68 |
return JSONResponse(content={"results": results})
|
| 69 |
elif mode == "advanced":
|
| 70 |
return 0
|
core/seen_db/index.faiss
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:afedda60bfd82c2579bbc7bf15a7ab59a0cb3f885377f28f1e9c5f06c756ca1e
|
| 3 |
+
size 398736429
|
core/seen_db/index_meta.faiss
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dc9346766d599b9d87e3521d77ea908d21ac9263b02bf46a0ede282f29a92ac8
|
| 3 |
+
size 1297873
|
core/seen_db/is_mixed_dict.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e5879f5bbcec0881436a54bcbed6be999269017cc8aa4542eb55f8a8689590fa
|
| 3 |
+
size 1555703
|
core/seen_db/label_dict.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ce731039f21189f4a7911ce9eb30566a5b03ae4912d5ba938a22ac9e350ee128
|
| 3 |
+
size 1555703
|
core/seen_db/write_model_dict.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f18b37b64ef2556406fa303a31db1d17e64b220859fe2fc48e09c8d46311497e
|
| 3 |
+
size 1555703
|
infer.py
CHANGED
|
@@ -45,21 +45,6 @@ def load_pkl(path):
|
|
| 45 |
return pickle.load(f)
|
| 46 |
|
| 47 |
def infer_3_class(model, tokenizer, index, label_dict, is_mixed_dict, text_list, K):
|
| 48 |
-
# model = TextEmbeddingModel(opt.model_name).cuda()
|
| 49 |
-
# state_dict = torch.load(opt.model_path, map_location=model.model.device)
|
| 50 |
-
# new_state_dict={}
|
| 51 |
-
# for key in state_dict.keys():
|
| 52 |
-
# if key.startswith('model.'):
|
| 53 |
-
# new_state_dict[key[6:]]=state_dict[key]
|
| 54 |
-
# model.load_state_dict(state_dict)
|
| 55 |
-
# tokenizer=model.tokenizer
|
| 56 |
-
|
| 57 |
-
# index = Indexer(opt.embedding_dim)
|
| 58 |
-
# index.deserialize_from(opt.database_path)
|
| 59 |
-
# label_dict=load_pkl(os.path.join(opt.database_path,'label_dict.pkl'))
|
| 60 |
-
# is_mixed_dict=load_pkl(os.path.join(opt.database_path,'is_mixed_dict.pkl'))
|
| 61 |
-
|
| 62 |
-
# text = opt.text
|
| 63 |
encoded_text = tokenizer.batch_encode_plus(
|
| 64 |
text_list,
|
| 65 |
return_tensors="pt",
|
|
@@ -72,7 +57,6 @@ def infer_3_class(model, tokenizer, index, label_dict, is_mixed_dict, text_list,
|
|
| 72 |
top_ids_and_scores = index.search_knn(embeddings, K)
|
| 73 |
pred = []
|
| 74 |
for i, (ids, scores) in enumerate(top_ids_and_scores):
|
| 75 |
-
# print(f"Top {K} results for text:")
|
| 76 |
sorted_scores = np.argsort(scores)
|
| 77 |
sorted_scores = sorted_scores[::-1]
|
| 78 |
|
|
@@ -94,25 +78,5 @@ def infer_3_class(model, tokenizer, index, label_dict, is_mixed_dict, text_list,
|
|
| 94 |
final[0] = round(fuzzy_cnt[(1,0)] / total_score*100,2)
|
| 95 |
final[1] = round(fuzzy_cnt[(0,10^3)] / total_score*100,2)
|
| 96 |
final[2] = round(fuzzy_cnt[(1,1)] / total_score*100,2)
|
| 97 |
-
# print(f"Final prediction: {final}")
|
| 98 |
pred.append(final)
|
| 99 |
-
return pred
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
# if __name__ == "__main__":
|
| 103 |
-
# parser = argparse.ArgumentParser()
|
| 104 |
-
# parser.add_argument('--embedding_dim', type=int, default=768)
|
| 105 |
-
# parser.add_argument('--database_path', type=str, default="database", help="Path to the index file")
|
| 106 |
-
|
| 107 |
-
# parser.add_argument("--model_path", type=str, default="core/model.pth",\
|
| 108 |
-
# help="Path to the embedding model checkpoint")
|
| 109 |
-
# parser.add_argument('--model_name', type=str, default="ZurichNLPZurichNLP/unsup-simcse-xlm-roberta-base", help="Model name")
|
| 110 |
-
|
| 111 |
-
# parser.add_argument('--K', type=int, default=20, help="Search [1,K] nearest neighbors,choose the best K")
|
| 112 |
-
# parser.add_argument('--pooling', type=str, default="average", help="Pooling method, average or cls")
|
| 113 |
-
# parser.add_argument('--text', type=str, default="")
|
| 114 |
-
# parser.add_argument('--seed', type=int, default=0)
|
| 115 |
-
|
| 116 |
-
# opt = parser.parse_args()
|
| 117 |
-
# set_seed(opt.seed)
|
| 118 |
-
# infer(opt)
|
|
|
|
| 45 |
return pickle.load(f)
|
| 46 |
|
| 47 |
def infer_3_class(model, tokenizer, index, label_dict, is_mixed_dict, text_list, K):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
encoded_text = tokenizer.batch_encode_plus(
|
| 49 |
text_list,
|
| 50 |
return_tensors="pt",
|
|
|
|
| 57 |
top_ids_and_scores = index.search_knn(embeddings, K)
|
| 58 |
pred = []
|
| 59 |
for i, (ids, scores) in enumerate(top_ids_and_scores):
|
|
|
|
| 60 |
sorted_scores = np.argsort(scores)
|
| 61 |
sorted_scores = sorted_scores[::-1]
|
| 62 |
|
|
|
|
| 78 |
final[0] = round(fuzzy_cnt[(1,0)] / total_score*100,2)
|
| 79 |
final[1] = round(fuzzy_cnt[(0,10^3)] / total_score*100,2)
|
| 80 |
final[2] = round(fuzzy_cnt[(1,1)] / total_score*100,2)
|
|
|
|
| 81 |
pred.append(final)
|
| 82 |
+
return pred
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|