Update model to V.0.5.3
Browse files- {Model-V0.5.1 β Model-V0.5.3}/config.json +5 -5
- {Model-V0.5.1 β Model-V0.5.3}/merges.txt +0 -0
- {Model-V0.5.1 β Model-V0.5.3}/special_tokens_map.json +0 -0
- {Model-V0.5.1 β Model-V0.5.3}/tokenizer.json +0 -0
- {Model-V0.5.1 β Model-V0.5.3}/tokenizer_config.json +0 -0
- Model-V0.5.3/training_args.bin +3 -0
- {Model-V0.5.1 β Model-V0.5.3}/vocab.json +0 -0
- Model_Evaluate.py +8 -5
{Model-V0.5.1 β Model-V0.5.3}/config.json
RENAMED
|
@@ -10,14 +10,14 @@
|
|
| 10 |
"hidden_dropout_prob": 0.1,
|
| 11 |
"hidden_size": 768,
|
| 12 |
"id2label": {
|
| 13 |
-
"0": "
|
| 14 |
-
"1": "
|
| 15 |
},
|
| 16 |
"initializer_range": 0.02,
|
| 17 |
"intermediate_size": 3072,
|
| 18 |
"label2id": {
|
| 19 |
-
"AI":
|
| 20 |
-
"Human":
|
| 21 |
},
|
| 22 |
"layer_norm_eps": 1e-05,
|
| 23 |
"max_position_embeddings": 514,
|
|
@@ -29,7 +29,7 @@
|
|
| 29 |
"position_embedding_type": "absolute",
|
| 30 |
"problem_type": "single_label_classification",
|
| 31 |
"torch_dtype": "float32",
|
| 32 |
-
"transformers_version": "4.52.
|
| 33 |
"type_vocab_size": 1,
|
| 34 |
"use_cache": true,
|
| 35 |
"vocab_size": 50265
|
|
|
|
| 10 |
"hidden_dropout_prob": 0.1,
|
| 11 |
"hidden_size": 768,
|
| 12 |
"id2label": {
|
| 13 |
+
"0": "AI",
|
| 14 |
+
"1": "Human"
|
| 15 |
},
|
| 16 |
"initializer_range": 0.02,
|
| 17 |
"intermediate_size": 3072,
|
| 18 |
"label2id": {
|
| 19 |
+
"AI": 0,
|
| 20 |
+
"Human": 1
|
| 21 |
},
|
| 22 |
"layer_norm_eps": 1e-05,
|
| 23 |
"max_position_embeddings": 514,
|
|
|
|
| 29 |
"position_embedding_type": "absolute",
|
| 30 |
"problem_type": "single_label_classification",
|
| 31 |
"torch_dtype": "float32",
|
| 32 |
+
"transformers_version": "4.52.3",
|
| 33 |
"type_vocab_size": 1,
|
| 34 |
"use_cache": true,
|
| 35 |
"vocab_size": 50265
|
{Model-V0.5.1 β Model-V0.5.3}/merges.txt
RENAMED
|
File without changes
|
{Model-V0.5.1 β Model-V0.5.3}/special_tokens_map.json
RENAMED
|
File without changes
|
{Model-V0.5.1 β Model-V0.5.3}/tokenizer.json
RENAMED
|
File without changes
|
{Model-V0.5.1 β Model-V0.5.3}/tokenizer_config.json
RENAMED
|
File without changes
|
Model-V0.5.3/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5aa5a076c150d5c02aac3d0571d3ace496160b6ab102fbec8f48ae9b4ccc0cf1
|
| 3 |
+
size 5304
|
{Model-V0.5.1 β Model-V0.5.3}/vocab.json
RENAMED
|
File without changes
|
Model_Evaluate.py
CHANGED
|
@@ -6,7 +6,7 @@ import subprocess
|
|
| 6 |
import shutil
|
| 7 |
import torch
|
| 8 |
|
| 9 |
-
model_path = "Model-V0.5.
|
| 10 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
| 11 |
config = AutoConfig.from_pretrained(model_path)
|
| 12 |
model = AutoModelForSequenceClassification.from_pretrained(model_path, config=config)
|
|
@@ -16,7 +16,8 @@ label2id = model.config.label2id
|
|
| 16 |
import re
|
| 17 |
preprocessor_pattern = re.compile(r'^\s*#.*$', re.MULTILINE)
|
| 18 |
block_comment_pattern = re.compile(r'/\*.*?\*/', re.DOTALL)
|
| 19 |
-
|
|
|
|
| 20 |
line_comment_pattern = re.compile(r'//.*')
|
| 21 |
|
| 22 |
def remove_comments(code):
|
|
@@ -26,7 +27,9 @@ def remove_comments(code):
|
|
| 26 |
|
| 27 |
def replace_preprocessor(code):
|
| 28 |
code = preprocessor_pattern.sub('<PREPROCESSOR>', code)
|
| 29 |
-
|
|
|
|
|
|
|
| 30 |
|
| 31 |
def strip_lines(text, max_blank_lines=0):
|
| 32 |
text += '\n'
|
|
@@ -42,12 +45,12 @@ def strip_lines(text, max_blank_lines=0):
|
|
| 42 |
kept.append(line)
|
| 43 |
return '\n'.join(kept)
|
| 44 |
|
| 45 |
-
|
| 46 |
multiline_function_pattern = re.compile(r'(\([^\)]*\))\s*\n\s*\{')
|
| 47 |
|
| 48 |
def normalize_braces(code):
|
| 49 |
code = multiline_function_pattern.sub(r'\1{', code)
|
| 50 |
-
|
| 51 |
return code
|
| 52 |
|
| 53 |
def format_cpp(code: str, style: str = "Google") -> str:
|
|
|
|
| 6 |
import shutil
|
| 7 |
import torch
|
| 8 |
|
| 9 |
+
model_path = "Model-V0.5.3"
|
| 10 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
| 11 |
config = AutoConfig.from_pretrained(model_path)
|
| 12 |
model = AutoModelForSequenceClassification.from_pretrained(model_path, config=config)
|
|
|
|
| 16 |
import re
|
| 17 |
preprocessor_pattern = re.compile(r'^\s*#.*$', re.MULTILINE)
|
| 18 |
block_comment_pattern = re.compile(r'/\*.*?\*/', re.DOTALL)
|
| 19 |
+
using_pattern = re.compile(r'^\s*using\s+[^\n;]+;', re.MULTILINE)
|
| 20 |
+
typedef_pattern = re.compile(r'^\s*typedef\s+[^\n;]+;', re.MULTILINE)
|
| 21 |
line_comment_pattern = re.compile(r'//.*')
|
| 22 |
|
| 23 |
def remove_comments(code):
|
|
|
|
| 27 |
|
| 28 |
def replace_preprocessor(code):
|
| 29 |
code = preprocessor_pattern.sub('<PREPROCESSOR>', code)
|
| 30 |
+
code = using_pattern.sub('<PREPROCESSOR>', code)
|
| 31 |
+
code = typedef_pattern.sub('<PREPROCESSOR>',code)
|
| 32 |
+
return code
|
| 33 |
|
| 34 |
def strip_lines(text, max_blank_lines=0):
|
| 35 |
text += '\n'
|
|
|
|
| 45 |
kept.append(line)
|
| 46 |
return '\n'.join(kept)
|
| 47 |
|
| 48 |
+
space_braces_function_pattern = re.compile(r'(\([^\)]*\))\s*\{')
|
| 49 |
multiline_function_pattern = re.compile(r'(\([^\)]*\))\s*\n\s*\{')
|
| 50 |
|
| 51 |
def normalize_braces(code):
|
| 52 |
code = multiline_function_pattern.sub(r'\1{', code)
|
| 53 |
+
code = space_braces_function_pattern.sub(r'\1{',code)
|
| 54 |
return code
|
| 55 |
|
| 56 |
def format_cpp(code: str, style: str = "Google") -> str:
|