ngocminhta commited on
Commit
5b2f797
·
1 Parent(s): 6b4e43b

update batch process

Browse files
app.py CHANGED
@@ -43,12 +43,6 @@ def load_model_resources():
43
  global model, tokenizer, index, label_dict, is_mixed_dict
44
 
45
  model = TextEmbeddingModel(opt.model_name)
46
- # state_dict = torch.load(opt.model_path, map_location=model.model.device)
47
- # new_state_dict={}
48
- # for key in state_dict.keys():
49
- # if key.startswith('model.'):
50
- # new_state_dict[key[6:]]=state_dict[key]
51
- # model.load_state_dict(state_dict)
52
  tokenizer=model.tokenizer
53
 
54
  index = Indexer(opt.embedding_dim)
@@ -64,16 +58,13 @@ async def predict(request: Request):
64
  text_list = data.get("text", [])
65
 
66
  if mode == "normal":
67
- results = []
68
- for text in text_list:
69
- result = infer_3_class(model=model,
70
- tokenizer=tokenizer,
71
- index=index,
72
- label_dict=label_dict,
73
- is_mixed_dict=is_mixed_dict,
74
- text=text,
75
- K=20)
76
- results.append(result)
77
  return JSONResponse(content={"results": results})
78
  elif mode == "advanced":
79
  return 0
 
43
  global model, tokenizer, index, label_dict, is_mixed_dict
44
 
45
  model = TextEmbeddingModel(opt.model_name)
 
 
 
 
 
 
46
  tokenizer=model.tokenizer
47
 
48
  index = Indexer(opt.embedding_dim)
 
58
  text_list = data.get("text", [])
59
 
60
  if mode == "normal":
61
+ results = infer_3_class(model=model,
62
+ tokenizer=tokenizer,
63
+ index=index,
64
+ label_dict=label_dict,
65
+ is_mixed_dict=is_mixed_dict,
66
+ text_list=text_list,
67
+ K=20)
 
 
 
68
  return JSONResponse(content={"results": results})
69
  elif mode == "advanced":
70
  return 0
infer.py CHANGED
@@ -44,7 +44,7 @@ def load_pkl(path):
44
  with open(path, 'rb') as f:
45
  return pickle.load(f)
46
 
47
- def infer_3_class(model, tokenizer, index, label_dict, is_mixed_dict, text, K):
48
  # model = TextEmbeddingModel(opt.model_name).cuda()
49
  # state_dict = torch.load(opt.model_path, map_location=model.model.device)
50
  # new_state_dict={}
@@ -61,7 +61,7 @@ def infer_3_class(model, tokenizer, index, label_dict, is_mixed_dict, text, K):
61
 
62
  # text = opt.text
63
  encoded_text = tokenizer.batch_encode_plus(
64
- [text],
65
  return_tensors="pt",
66
  max_length=512,
67
  padding="max_length",
@@ -89,32 +89,14 @@ def infer_3_class(model, tokenizer, index, label_dict, is_mixed_dict, text, K):
89
  boost = class_type_boost(is_mixed_dict[int(id)],initial_pred)
90
  fuzzy_cnt[label] += weight * boost
91
 
92
- # final = max(fuzzy_cnt, key=fuzzy_cnt.get)
93
-
94
- # print(f"Top {opt.K} results for text:")
95
- # cnt = {(1,0):0,(0,10^3):0,(1,1):0}
96
- # for j, (id, score) in enumerate(zip(ids, scores)):
97
- # print(f"{j+1}. ID {id} Label {label_dict[int(id)]} Is_mixed {is_mixed_dict[int(id)]} Score {score}")
98
- # cnt[(label_dict[int(id)], is_mixed_dict[int(id)])]+=1
99
- # final = max(cnt, key=cnt.get)
100
- # pred.append(final)
101
- # if final==(1,0):
102
- # print("Human")
103
- # return 0
104
- # elif final==(0,10^3):
105
- # print("AI")
106
- # return 1
107
- # else:
108
- # print("Mixed")
109
- # return 2
110
- # pred.append(final)
111
- total_score = sum(fuzzy_cnt.values())
112
- final = dict()
113
- final[0] = round(fuzzy_cnt[(1,0)] / total_score*100,2)
114
- final[1] = round(fuzzy_cnt[(0,10^3)] / total_score*100,2)
115
- final[2] = round(fuzzy_cnt[(1,1)] / total_score*100,2)
116
- print(f"Final prediction: {final}")
117
- return final
118
 
119
 
120
  if __name__ == "__main__":
 
44
  with open(path, 'rb') as f:
45
  return pickle.load(f)
46
 
47
+ def infer_3_class(model, tokenizer, index, label_dict, is_mixed_dict, text_list, K):
48
  # model = TextEmbeddingModel(opt.model_name).cuda()
49
  # state_dict = torch.load(opt.model_path, map_location=model.model.device)
50
  # new_state_dict={}
 
61
 
62
  # text = opt.text
63
  encoded_text = tokenizer.batch_encode_plus(
64
+ text_list,
65
  return_tensors="pt",
66
  max_length=512,
67
  padding="max_length",
 
89
  boost = class_type_boost(is_mixed_dict[int(id)],initial_pred)
90
  fuzzy_cnt[label] += weight * boost
91
 
92
+ total_score = sum(fuzzy_cnt.values())
93
+ final = dict()
94
+ final[0] = round(fuzzy_cnt[(1,0)] / total_score*100,2)
95
+ final[1] = round(fuzzy_cnt[(0,10^3)] / total_score*100,2)
96
+ final[2] = round(fuzzy_cnt[(1,1)] / total_score*100,2)
97
+ print(f"Final prediction: {final}")
98
+ pred.append(final)
99
+ return pred
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
 
102
  if __name__ == "__main__":
unsup-simcse-xlm-roberta-base/.DS_Store DELETED
Binary file (6.15 kB)
 
unsup-simcse-xlm-roberta-base/config.json DELETED
@@ -1,27 +0,0 @@
1
- {
2
- "_name_or_path": "xlm-roberta-base",
3
- "architectures": [
4
- "RobertaForCL"
5
- ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "bos_token_id": 0,
8
- "eos_token_id": 2,
9
- "gradient_checkpointing": false,
10
- "hidden_act": "gelu",
11
- "hidden_dropout_prob": 0.1,
12
- "hidden_size": 768,
13
- "initializer_range": 0.02,
14
- "intermediate_size": 3072,
15
- "layer_norm_eps": 1e-05,
16
- "max_position_embeddings": 514,
17
- "model_type": "xlm-roberta",
18
- "num_attention_heads": 12,
19
- "num_hidden_layers": 12,
20
- "output_past": true,
21
- "pad_token_id": 1,
22
- "position_embedding_type": "absolute",
23
- "transformers_version": "4.2.1",
24
- "type_vocab_size": 1,
25
- "use_cache": true,
26
- "vocab_size": 250002
27
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
unsup-simcse-xlm-roberta-base/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
unsup-simcse-xlm-roberta-base/trainer_state.json DELETED
@@ -1,40 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 0.9999040030719017,
5
- "global_step": 1953,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.26,
12
- "learning_rate": 7.43983614951357e-06,
13
- "loss": 0.1187,
14
- "step": 500
15
- },
16
- {
17
- "epoch": 0.51,
18
- "learning_rate": 4.8796722990271386e-06,
19
- "loss": 0.0011,
20
- "step": 1000
21
- },
22
- {
23
- "epoch": 0.77,
24
- "learning_rate": 2.319508448540707e-06,
25
- "loss": 0.0006,
26
- "step": 1500
27
- },
28
- {
29
- "epoch": 1.0,
30
- "step": 1953,
31
- "train_runtime": 2937.1499,
32
- "train_samples_per_second": 0.665
33
- }
34
- ],
35
- "max_steps": 1953,
36
- "num_train_epochs": 1,
37
- "total_flos": 262568325479694336,
38
- "trial_name": null,
39
- "trial_params": null
40
- }