a0ms1n commited on
Commit
ef679ef
·
1 Parent(s): da06399

Add 2 evaluates

Browse files
Files changed (4) hide show
  1. Evaluate1.py +44 -0
  2. Evaluate2.py +44 -0
  3. Model_Evaluate.py → Preprocess.py +56 -99
  4. app.py +20 -6
Evaluate1.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, AutoConfig, AutoModel
2
+ from datasets import Dataset, DatasetDict, Features, Sequence, ClassLabel, Value
3
+ import pandas as pd
4
+ import re
5
+ import torch
6
+ from Preprocess import *
7
+
8
+ model_path = "Model-V0.5.3"
9
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
10
+ config = AutoConfig.from_pretrained(model_path)
11
+ model = AutoModelForSequenceClassification.from_pretrained(model_path, config=config)
12
+ labels = model.config.id2label
13
+ label2id = model.config.label2id
14
+
15
+ def preprocess(code):
16
+ # code = format_cpp(code)
17
+ code = remove_comments(code)
18
+ code = replace_preprocessor(code)
19
+ code = normalize_braces(code)
20
+ code = strip_lines(code)
21
+ return code
22
+
23
+
24
+ def eval(source):
25
+ source = preprocess(source)
26
+ inputs = tokenizer(
27
+ source,
28
+ truncation=True,
29
+ padding='max_length',
30
+ max_length=512,
31
+ return_tensors='pt'
32
+ )
33
+
34
+ model.cpu()
35
+ model.eval()
36
+ inputs = {k: v.cpu() for k, v in inputs.items()}
37
+
38
+ with torch.no_grad():
39
+ outputs = model(**inputs)
40
+
41
+ probs = torch.softmax(outputs.logits, dim=-1).detach().cpu().numpy()[0]
42
+ pred_id = probs.argmax()
43
+ # print("Label:", labels[pred_id], " | Score:", probs[pred_id])
44
+ return labels[pred_id], f"{probs[label2id['AI']]*100:.2f} %"
Evaluate2.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, AutoConfig, AutoModel
2
+ from datasets import Dataset, DatasetDict, Features, Sequence, ClassLabel, Value
3
+ import pandas as pd
4
+ import re
5
+ import torch
6
+ from Preprocess import *
7
+
8
+ model_path = "Model-V0.5.2"
9
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
10
+ config = AutoConfig.from_pretrained(model_path)
11
+ model = AutoModelForSequenceClassification.from_pretrained(model_path, config=config)
12
+ labels = model.config.id2label
13
+ label2id = model.config.label2id
14
+
15
+ def preprocess(code):
16
+ code = format_cpp(code)
17
+ code = remove_comments(code)
18
+ code = replace_preprocessor(code)
19
+ # code = normalize_braces(code)
20
+ code = strip_lines(code)
21
+ return code
22
+
23
+
24
+ def eval(source):
25
+ source = preprocess(source)
26
+ inputs = tokenizer(
27
+ source,
28
+ truncation=True,
29
+ padding='max_length',
30
+ max_length=512,
31
+ return_tensors='pt'
32
+ )
33
+
34
+ model.cpu()
35
+ model.eval()
36
+ inputs = {k: v.cpu() for k, v in inputs.items()}
37
+
38
+ with torch.no_grad():
39
+ outputs = model(**inputs)
40
+
41
+ probs = torch.softmax(outputs.logits, dim=-1).detach().cpu().numpy()[0]
42
+ pred_id = probs.argmax()
43
+ # print("Label:", labels[pred_id], " | Score:", probs[pred_id])
44
+ return labels[pred_id], f"{probs[label2id['AI']]*100:.2f} %"
Model_Evaluate.py → Preprocess.py RENAMED
@@ -1,99 +1,56 @@
1
- from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, AutoConfig, AutoModel
2
- from datasets import Dataset, DatasetDict, Features, Sequence, ClassLabel, Value
3
- import pandas as pd
4
- import re
5
- import subprocess
6
- import shutil
7
- import torch
8
-
9
- model_path = "Model-V0.5.3"
10
- tokenizer = AutoTokenizer.from_pretrained(model_path)
11
- config = AutoConfig.from_pretrained(model_path)
12
- model = AutoModelForSequenceClassification.from_pretrained(model_path, config=config)
13
- labels = model.config.id2label
14
- label2id = model.config.label2id
15
-
16
- import re
17
- preprocessor_pattern = re.compile(r'^\s*#.*$', re.MULTILINE)
18
- block_comment_pattern = re.compile(r'/\*.*?\*/', re.DOTALL)
19
- using_pattern = re.compile(r'^\s*using\s+[^\n;]+;', re.MULTILINE)
20
- typedef_pattern = re.compile(r'^\s*typedef\s+[^\n;]+;', re.MULTILINE)
21
- line_comment_pattern = re.compile(r'//.*')
22
-
23
- def remove_comments(code):
24
- code = block_comment_pattern.sub('', code)
25
- code = line_comment_pattern.sub('', code)
26
- return code
27
-
28
- def replace_preprocessor(code):
29
- code = preprocessor_pattern.sub('<PREPROCESSOR>', code)
30
- code = using_pattern.sub('<PREPROCESSOR>', code)
31
- code = typedef_pattern.sub('<PREPROCESSOR>',code)
32
- return code
33
-
34
- def strip_lines(text, max_blank_lines=0):
35
- text += '\n'
36
- lines = text.splitlines()
37
- kept = []
38
- consec = 0
39
- for line in lines:
40
- if line.strip() == "":
41
- consec +=1
42
- else:
43
- consec = 0
44
- if consec <= max_blank_lines:
45
- kept.append(line)
46
- return '\n'.join(kept)
47
-
48
- space_braces_function_pattern = re.compile(r'(\([^\)]*\))\s*\{')
49
- multiline_function_pattern = re.compile(r'(\([^\)]*\))\s*\n\s*\{')
50
-
51
- def normalize_braces(code):
52
- code = multiline_function_pattern.sub(r'\1{', code)
53
- code = space_braces_function_pattern.sub(r'\1{',code)
54
- return code
55
-
56
- def format_cpp(code: str, style: str = "Google") -> str:
57
- if not shutil.which("clang-format"):
58
- raise EnvironmentError("clang-format is not installed or not in PATH.")
59
-
60
- result = subprocess.run(
61
- ["clang-format", f"--style={style}"],
62
- input=code.encode(),
63
- stdout=subprocess.PIPE,
64
- stderr=subprocess.PIPE,
65
- check=True
66
- )
67
-
68
- return result.stdout.decode()
69
-
70
- def preprocess(code):
71
- # code = format_cpp(code)
72
- code = remove_comments(code)
73
- code = replace_preprocessor(code)
74
- code = normalize_braces(code)
75
- code = strip_lines(code)
76
- return code
77
-
78
-
79
- def eval(source):
80
- source = preprocess(source)
81
- inputs = tokenizer(
82
- source,
83
- truncation=True,
84
- padding='max_length',
85
- max_length=512,
86
- return_tensors='pt'
87
- )
88
-
89
- model.cpu()
90
- model.eval()
91
- inputs = {k: v.cpu() for k, v in inputs.items()}
92
-
93
- with torch.no_grad():
94
- outputs = model(**inputs)
95
-
96
- probs = torch.softmax(outputs.logits, dim=-1).detach().cpu().numpy()[0]
97
- pred_id = probs.argmax()
98
- # print("Label:", labels[pred_id], " | Score:", probs[pred_id])
99
- return labels[pred_id], f"{probs[label2id['AI']]*100:.2f} %"
 
1
+
2
+ import re
3
+ import subprocess
4
+ import shutil
5
+ preprocessor_pattern = re.compile(r'^\s*#.*$', re.MULTILINE)
6
+ block_comment_pattern = re.compile(r'/\*.*?\*/', re.DOTALL)
7
+ using_pattern = re.compile(r'^\s*using\s+[^\n;]+;', re.MULTILINE)
8
+ typedef_pattern = re.compile(r'^\s*typedef\s+[^\n;]+;', re.MULTILINE)
9
+ line_comment_pattern = re.compile(r'//.*')
10
+
11
+ def remove_comments(code):
12
+ code = block_comment_pattern.sub('', code)
13
+ code = line_comment_pattern.sub('', code)
14
+ return code
15
+
16
+ def replace_preprocessor(code):
17
+ code = preprocessor_pattern.sub('<PREPROCESSOR>', code)
18
+ code = using_pattern.sub('<PREPROCESSOR>', code)
19
+ code = typedef_pattern.sub('<PREPROCESSOR>',code)
20
+ return code
21
+
22
+ def strip_lines(text, max_blank_lines=0):
23
+ text += '\n'
24
+ lines = text.splitlines()
25
+ kept = []
26
+ consec = 0
27
+ for line in lines:
28
+ if line.strip() == "":
29
+ consec +=1
30
+ else:
31
+ consec = 0
32
+ if consec <= max_blank_lines:
33
+ kept.append(line)
34
+ return '\n'.join(kept)
35
+
36
+ space_braces_function_pattern = re.compile(r'(\([^\)]*\))\s*\{')
37
+ multiline_function_pattern = re.compile(r'(\([^\)]*\))\s*\n\s*\{')
38
+
39
+ def normalize_braces(code):
40
+ code = multiline_function_pattern.sub(r'\1{', code)
41
+ code = space_braces_function_pattern.sub(r'\1{',code)
42
+ return code
43
+
44
+ def format_cpp(code: str, style: str = "Google") -> str:
45
+ if not shutil.which("clang-format"):
46
+ raise EnvironmentError("clang-format is not installed or not in PATH.")
47
+
48
+ result = subprocess.run(
49
+ ["clang-format", f"--style={style}"],
50
+ input=code.encode(),
51
+ stdout=subprocess.PIPE,
52
+ stderr=subprocess.PIPE,
53
+ check=True
54
+ )
55
+
56
+ return result.stdout.decode()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import gradio as gr
2
- from Model_Evaluate import *
 
3
  import re
4
 
5
  def load_cpp_file(file):
@@ -8,7 +9,7 @@ def load_cpp_file(file):
8
  content = f.read()
9
  return content
10
 
11
- with gr.Blocks() as demo:
12
  with gr.Row():
13
  with gr.Column(scale=1):
14
  code_box = gr.Textbox(lines=20, label="C++ Code")
@@ -22,12 +23,25 @@ with gr.Blocks() as demo:
22
 
23
  with gr.Row():
24
  with gr.Column(scale=1):
25
- label_box = gr.Textbox(label="Label", interactive=False)
26
  with gr.Column(scale=1):
27
- confidence_box = gr.Textbox(label="AI Percentage", interactive=False)
 
 
 
 
 
 
 
 
 
28
 
29
  # Bind functions
30
  cpp_file.change(fn=load_cpp_file, inputs=cpp_file, outputs=code_box)
31
- check_btn.click(fn=eval, inputs=[code_box], outputs=[label_box, confidence_box])
 
 
 
 
32
 
33
- demo.launch()
 
1
  import gradio as gr
2
+ import Evaluate1
3
+ import Evaluate2
4
  import re
5
 
6
  def load_cpp_file(file):
 
9
  content = f.read()
10
  return content
11
 
12
+ with gr.Blocks() as web:
13
  with gr.Row():
14
  with gr.Column(scale=1):
15
  code_box = gr.Textbox(lines=20, label="C++ Code")
 
23
 
24
  with gr.Row():
25
  with gr.Column(scale=1):
26
+ label_box1 = gr.Textbox(label="Label", interactive=False)
27
  with gr.Column(scale=1):
28
+ confidence_box1 = gr.Textbox(label="AI Percentage", interactive=False)
29
+
30
+ with gr.Row():
31
+ gr.Markdown("### Result (Code Format) :")
32
+
33
+ with gr.Row():
34
+ with gr.Column(scale=1):
35
+ label_box2 = gr.Textbox(label="Label", interactive=False)
36
+ with gr.Column(scale=1):
37
+ confidence_box2 = gr.Textbox(label="AI Percentage", interactive=False)
38
 
39
  # Bind functions
40
  cpp_file.change(fn=load_cpp_file, inputs=cpp_file, outputs=code_box)
41
+ check_btn.click(
42
+ fn=lambda code: Evaluate1.eval(code) + Evaluate2.eval(code),
43
+ inputs=[code_box],
44
+ outputs=[label_box1, confidence_box1, label_box2, confidence_box2]
45
+ )
46
 
47
+ web.launch()