a0ms1n commited on
Commit
cd68abb
Β·
1 Parent(s): e6e78f1

Update model to V.0.5.3

Browse files
{Model-V0.5.1 β†’ Model-V0.5.3}/config.json RENAMED
@@ -10,14 +10,14 @@
10
  "hidden_dropout_prob": 0.1,
11
  "hidden_size": 768,
12
  "id2label": {
13
- "0": "Human",
14
- "1": "AI"
15
  },
16
  "initializer_range": 0.02,
17
  "intermediate_size": 3072,
18
  "label2id": {
19
- "AI": 1,
20
- "Human": 0
21
  },
22
  "layer_norm_eps": 1e-05,
23
  "max_position_embeddings": 514,
@@ -29,7 +29,7 @@
29
  "position_embedding_type": "absolute",
30
  "problem_type": "single_label_classification",
31
  "torch_dtype": "float32",
32
- "transformers_version": "4.52.2",
33
  "type_vocab_size": 1,
34
  "use_cache": true,
35
  "vocab_size": 50265
 
10
  "hidden_dropout_prob": 0.1,
11
  "hidden_size": 768,
12
  "id2label": {
13
+ "0": "AI",
14
+ "1": "Human"
15
  },
16
  "initializer_range": 0.02,
17
  "intermediate_size": 3072,
18
  "label2id": {
19
+ "AI": 0,
20
+ "Human": 1
21
  },
22
  "layer_norm_eps": 1e-05,
23
  "max_position_embeddings": 514,
 
29
  "position_embedding_type": "absolute",
30
  "problem_type": "single_label_classification",
31
  "torch_dtype": "float32",
32
+ "transformers_version": "4.52.3",
33
  "type_vocab_size": 1,
34
  "use_cache": true,
35
  "vocab_size": 50265
{Model-V0.5.1 β†’ Model-V0.5.3}/merges.txt RENAMED
File without changes
{Model-V0.5.1 β†’ Model-V0.5.3}/special_tokens_map.json RENAMED
File without changes
{Model-V0.5.1 β†’ Model-V0.5.3}/tokenizer.json RENAMED
File without changes
{Model-V0.5.1 β†’ Model-V0.5.3}/tokenizer_config.json RENAMED
File without changes
Model-V0.5.3/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5aa5a076c150d5c02aac3d0571d3ace496160b6ab102fbec8f48ae9b4ccc0cf1
3
+ size 5304
{Model-V0.5.1 β†’ Model-V0.5.3}/vocab.json RENAMED
File without changes
Model_Evaluate.py CHANGED
@@ -6,7 +6,7 @@ import subprocess
6
  import shutil
7
  import torch
8
 
9
- model_path = "Model-V0.5.1"
10
  tokenizer = AutoTokenizer.from_pretrained(model_path)
11
  config = AutoConfig.from_pretrained(model_path)
12
  model = AutoModelForSequenceClassification.from_pretrained(model_path, config=config)
@@ -16,7 +16,8 @@ label2id = model.config.label2id
16
  import re
17
  preprocessor_pattern = re.compile(r'^\s*#.*$', re.MULTILINE)
18
  block_comment_pattern = re.compile(r'/\*.*?\*/', re.DOTALL)
19
- using_namespace_pattern = re.compile(r'^\s*using\s+namespace\s+[a-zA-Z_][a-zA-Z0-9_:]*\s*;', re.MULTILINE)
 
20
  line_comment_pattern = re.compile(r'//.*')
21
 
22
  def remove_comments(code):
@@ -26,7 +27,9 @@ def remove_comments(code):
26
 
27
  def replace_preprocessor(code):
28
  code = preprocessor_pattern.sub('<PREPROCESSOR>', code)
29
- return using_namespace_pattern.sub('<NAMESPACE>', code)
 
 
30
 
31
  def strip_lines(text, max_blank_lines=0):
32
  text += '\n'
@@ -42,12 +45,12 @@ def strip_lines(text, max_blank_lines=0):
42
  kept.append(line)
43
  return '\n'.join(kept)
44
 
45
- paren_brace_space_pattern = re.compile(r'\([^\)]*\)\s+\{')
46
  multiline_function_pattern = re.compile(r'(\([^\)]*\))\s*\n\s*\{')
47
 
48
  def normalize_braces(code):
49
  code = multiline_function_pattern.sub(r'\1{', code)
50
- # code = paren_brace_space_pattern.sub('(){', code)
51
  return code
52
 
53
  def format_cpp(code: str, style: str = "Google") -> str:
 
6
  import shutil
7
  import torch
8
 
9
+ model_path = "Model-V0.5.3"
10
  tokenizer = AutoTokenizer.from_pretrained(model_path)
11
  config = AutoConfig.from_pretrained(model_path)
12
  model = AutoModelForSequenceClassification.from_pretrained(model_path, config=config)
 
16
  import re
17
  preprocessor_pattern = re.compile(r'^\s*#.*$', re.MULTILINE)
18
  block_comment_pattern = re.compile(r'/\*.*?\*/', re.DOTALL)
19
+ using_pattern = re.compile(r'^\s*using\s+[^\n;]+;', re.MULTILINE)
20
+ typedef_pattern = re.compile(r'^\s*typedef\s+[^\n;]+;', re.MULTILINE)
21
  line_comment_pattern = re.compile(r'//.*')
22
 
23
  def remove_comments(code):
 
27
 
28
  def replace_preprocessor(code):
29
  code = preprocessor_pattern.sub('<PREPROCESSOR>', code)
30
+ code = using_pattern.sub('<PREPROCESSOR>', code)
31
+ code = typedef_pattern.sub('<PREPROCESSOR>',code)
32
+ return code
33
 
34
  def strip_lines(text, max_blank_lines=0):
35
  text += '\n'
 
45
  kept.append(line)
46
  return '\n'.join(kept)
47
 
48
+ space_braces_function_pattern = re.compile(r'(\([^\)]*\))\s*\{')
49
  multiline_function_pattern = re.compile(r'(\([^\)]*\))\s*\n\s*\{')
50
 
51
  def normalize_braces(code):
52
  code = multiline_function_pattern.sub(r'\1{', code)
53
+ code = space_braces_function_pattern.sub(r'\1{',code)
54
  return code
55
 
56
  def format_cpp(code: str, style: str = "Google") -> str: