Spaces:

a0ms1n
/

AI-Code-Detector_for-Competitive-Programming

Running

App Files Files Community

a0ms1n commited on Jun 7, 2025

Commit

ef679ef

1 Parent(s): da06399

Add 2 evaluates

Browse files

Files changed (4) hide show

Evaluate1.py +44 -0
Evaluate2.py +44 -0
Model_Evaluate.py → Preprocess.py +56 -99
app.py +20 -6

Evaluate1.py ADDED Viewed

	@@ -0,0 +1,44 @@

+from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, AutoConfig, AutoModel
+from datasets import Dataset, DatasetDict, Features, Sequence, ClassLabel, Value
+import pandas as pd
+import re
+import torch
+from Preprocess import *
+model_path = "Model-V0.5.3"
+tokenizer = AutoTokenizer.from_pretrained(model_path)
+config = AutoConfig.from_pretrained(model_path)
+model = AutoModelForSequenceClassification.from_pretrained(model_path, config=config)
+labels = model.config.id2label
+label2id = model.config.label2id
+def preprocess(code):
+    # code = format_cpp(code)
+    code = remove_comments(code)
+    code = replace_preprocessor(code)
+    code = normalize_braces(code)
+    code = strip_lines(code)
+    return code
+def eval(source):
+    source = preprocess(source)
+    inputs = tokenizer(
+        source,
+        truncation=True,
+        padding='max_length',
+        max_length=512,
+        return_tensors='pt'
+    )
+    model.cpu()
+    model.eval()
+    inputs = {k: v.cpu() for k, v in inputs.items()}
+    with torch.no_grad():
+        outputs = model(**inputs)
+    probs = torch.softmax(outputs.logits, dim=-1).detach().cpu().numpy()[0]
+    pred_id = probs.argmax()
+    # print("Label:", labels[pred_id], " | Score:", probs[pred_id])
+    return labels[pred_id], f"{probs[label2id['AI']]*100:.2f} %"

Evaluate2.py ADDED Viewed

	@@ -0,0 +1,44 @@

+from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, AutoConfig, AutoModel
+from datasets import Dataset, DatasetDict, Features, Sequence, ClassLabel, Value
+import pandas as pd
+import re
+import torch
+from Preprocess import *
+model_path = "Model-V0.5.2"
+tokenizer = AutoTokenizer.from_pretrained(model_path)
+config = AutoConfig.from_pretrained(model_path)
+model = AutoModelForSequenceClassification.from_pretrained(model_path, config=config)
+labels = model.config.id2label
+label2id = model.config.label2id
+def preprocess(code):
+    code = format_cpp(code)
+    code = remove_comments(code)
+    code = replace_preprocessor(code)
+    # code = normalize_braces(code)
+    code = strip_lines(code)
+    return code
+def eval(source):
+    source = preprocess(source)
+    inputs = tokenizer(
+        source,
+        truncation=True,
+        padding='max_length',
+        max_length=512,
+        return_tensors='pt'
+    )
+    model.cpu()
+    model.eval()
+    inputs = {k: v.cpu() for k, v in inputs.items()}
+    with torch.no_grad():
+        outputs = model(**inputs)
+    probs = torch.softmax(outputs.logits, dim=-1).detach().cpu().numpy()[0]
+    pred_id = probs.argmax()
+    # print("Label:", labels[pred_id], " | Score:", probs[pred_id])
+    return labels[pred_id], f"{probs[label2id['AI']]*100:.2f} %"

Model_Evaluate.py → Preprocess.py RENAMED Viewed

@@ -1,99 +1,56 @@
-from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, AutoConfig, AutoModel
-from datasets import Dataset, DatasetDict, Features, Sequence, ClassLabel, Value
-import pandas as pd
-import re
-import subprocess
-import shutil
-import torch
-model_path = "Model-V0.5.3"
-tokenizer = AutoTokenizer.from_pretrained(model_path)
-config = AutoConfig.from_pretrained(model_path)
-model = AutoModelForSequenceClassification.from_pretrained(model_path, config=config)
-labels = model.config.id2label
-label2id = model.config.label2id
-import re
-preprocessor_pattern = re.compile(r'^\s*#.*$', re.MULTILINE)
-block_comment_pattern = re.compile(r'/\*.*?\*/', re.DOTALL)
-using_pattern = re.compile(r'^\s*using\s+[^\n;]+;', re.MULTILINE)
-typedef_pattern = re.compile(r'^\s*typedef\s+[^\n;]+;', re.MULTILINE)
-line_comment_pattern = re.compile(r'//.*')
-def remove_comments(code):
-    code = block_comment_pattern.sub('', code)
-    code = line_comment_pattern.sub('', code)
-    return code
-def replace_preprocessor(code):
-    code = preprocessor_pattern.sub('<PREPROCESSOR>', code)
-    code = using_pattern.sub('<PREPROCESSOR>', code)
-    code = typedef_pattern.sub('<PREPROCESSOR>',code)
-    return code
-def strip_lines(text, max_blank_lines=0):
-    text += '\n'
-    lines = text.splitlines()
-    kept = []
-    consec = 0
-    for line in lines:
-        if line.strip() == "":
-            consec +=1
-        else:
-            consec = 0
-        if consec <= max_blank_lines:
-            kept.append(line)
-    return '\n'.join(kept)
-space_braces_function_pattern = re.compile(r'(\([^\)]*\))\s*\{')
-multiline_function_pattern = re.compile(r'(\([^\)]*\))\s*\n\s*\{')
-def normalize_braces(code):
-    code = multiline_function_pattern.sub(r'\1{', code)
-    code = space_braces_function_pattern.sub(r'\1{',code)
-    return code
-def format_cpp(code: str, style: str = "Google") -> str:
-    if not shutil.which("clang-format"):
-        raise EnvironmentError("clang-format is not installed or not in PATH.")
-    result = subprocess.run(
-        ["clang-format", f"--style={style}"],
-        input=code.encode(),
-        stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE,
-        check=True
-    )
-    return result.stdout.decode()
-def preprocess(code):
-    # code = format_cpp(code)
-    code = remove_comments(code)
-    code = replace_preprocessor(code)
-    code = normalize_braces(code)
-    code = strip_lines(code)
-    return code
-def eval(source):
-    source = preprocess(source)
-    inputs = tokenizer(
-        source,
-        truncation=True,
-        padding='max_length',
-        max_length=512,
-        return_tensors='pt'
-    )
-    model.cpu()
-    model.eval()
-    inputs = {k: v.cpu() for k, v in inputs.items()}
-    with torch.no_grad():
-        outputs = model(**inputs)
-    probs = torch.softmax(outputs.logits, dim=-1).detach().cpu().numpy()[0]
-    pred_id = probs.argmax()
-    # print("Label:", labels[pred_id], " | Score:", probs[pred_id])
-    return labels[pred_id], f"{probs[label2id['AI']]*100:.2f} %"

+import re
+import subprocess
+import shutil
+preprocessor_pattern = re.compile(r'^\s*#.*$', re.MULTILINE)
+block_comment_pattern = re.compile(r'/\*.*?\*/', re.DOTALL)
+using_pattern = re.compile(r'^\s*using\s+[^\n;]+;', re.MULTILINE)
+typedef_pattern = re.compile(r'^\s*typedef\s+[^\n;]+;', re.MULTILINE)
+line_comment_pattern = re.compile(r'//.*')
+def remove_comments(code):
+    code = block_comment_pattern.sub('', code)
+    code = line_comment_pattern.sub('', code)
+    return code
+def replace_preprocessor(code):
+    code = preprocessor_pattern.sub('<PREPROCESSOR>', code)
+    code = using_pattern.sub('<PREPROCESSOR>', code)
+    code = typedef_pattern.sub('<PREPROCESSOR>',code)
+    return code
+def strip_lines(text, max_blank_lines=0):
+    text += '\n'
+    lines = text.splitlines()
+    kept = []
+    consec = 0
+    for line in lines:
+        if line.strip() == "":
+            consec +=1
+        else:
+            consec = 0
+        if consec <= max_blank_lines:
+            kept.append(line)
+    return '\n'.join(kept)
+space_braces_function_pattern = re.compile(r'(\([^\)]*\))\s*\{')
+multiline_function_pattern = re.compile(r'(\([^\)]*\))\s*\n\s*\{')
+def normalize_braces(code):
+    code = multiline_function_pattern.sub(r'\1{', code)
+    code = space_braces_function_pattern.sub(r'\1{',code)
+    return code
+def format_cpp(code: str, style: str = "Google") -> str:
+    if not shutil.which("clang-format"):
+        raise EnvironmentError("clang-format is not installed or not in PATH.")
+    result = subprocess.run(
+        ["clang-format", f"--style={style}"],
+        input=code.encode(),
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        check=True
+    )
+    return result.stdout.decode()

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import gradio as gr
-from Model_Evaluate import *
 import re
 def load_cpp_file(file):
@@ -8,7 +9,7 @@ def load_cpp_file(file):
         content = f.read()
     return content
-with gr.Blocks() as demo:
     with gr.Row():
         with gr.Column(scale=1):
             code_box = gr.Textbox(lines=20, label="C++ Code")
@@ -22,12 +23,25 @@ with gr.Blocks() as demo:
     with gr.Row():
         with gr.Column(scale=1):
-            label_box = gr.Textbox(label="Label", interactive=False)
         with gr.Column(scale=1):
-            confidence_box = gr.Textbox(label="AI Percentage", interactive=False)
     # Bind functions
     cpp_file.change(fn=load_cpp_file, inputs=cpp_file, outputs=code_box)
-    check_btn.click(fn=eval, inputs=[code_box], outputs=[label_box, confidence_box])
-demo.launch()

 import gradio as gr
+import Evaluate1
+import Evaluate2
 import re
 def load_cpp_file(file):
         content = f.read()
     return content
+with gr.Blocks() as web:
     with gr.Row():
         with gr.Column(scale=1):
             code_box = gr.Textbox(lines=20, label="C++ Code")
     with gr.Row():
         with gr.Column(scale=1):
+            label_box1 = gr.Textbox(label="Label", interactive=False)
         with gr.Column(scale=1):
+            confidence_box1 = gr.Textbox(label="AI Percentage", interactive=False)
+    with gr.Row():
+        gr.Markdown("### Result (Code Format) :")
+    with gr.Row():
+        with gr.Column(scale=1):
+            label_box2 = gr.Textbox(label="Label", interactive=False)
+        with gr.Column(scale=1):
+            confidence_box2 = gr.Textbox(label="AI Percentage", interactive=False)
     # Bind functions
     cpp_file.change(fn=load_cpp_file, inputs=cpp_file, outputs=code_box)
+    check_btn.click(
+    fn=lambda code: Evaluate1.eval(code) + Evaluate2.eval(code),
+    inputs=[code_box],
+    outputs=[label_box1, confidence_box1, label_box2, confidence_box2]
+    )
+web.launch()