joshnavip commited on
Commit
b144cb7
·
0 Parent(s):

Initial commit: AI code detection project (without binary files)

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitignore +30 -0
  2. .states/ae923ba0906f0144843dfd2720081370.pkl +0 -0
  3. README.md +43 -0
  4. __pycache__/app.cpython-312.pyc +0 -0
  5. app.py +83 -0
  6. basemodel/README.md +3 -0
  7. basemodel/evaluate_randomforest.py +39 -0
  8. basemodel/test_features.csv +75 -0
  9. basemodel/token_level_feature_extraction.py +81 -0
  10. basemodel/train_features.csv +345 -0
  11. basemodel/train_randomforest.py +44 -0
  12. basemodel/val_features.csv +75 -0
  13. classifier/__init__.py +0 -0
  14. classifier/__pycache__/__init__.cpython-310.pyc +0 -0
  15. classifier/__pycache__/__init__.cpython-312.pyc +0 -0
  16. classifier/__pycache__/inference.cpython-310.pyc +0 -0
  17. classifier/__pycache__/inference.cpython-312.pyc +0 -0
  18. classifier/inference.py +130 -0
  19. classifier/models/xgboost_final_model.json +0 -0
  20. classifier/test_xgboost.py +28 -0
  21. classifier/train_xgboost.py +68 -0
  22. classifier/tune_xgboost.py +72 -0
  23. classifier/xgboost_final_model.json +0 -0
  24. dataset/processed/dataset_step1_length_normalized.csv +0 -0
  25. dataset/processed/dataset_step2_cleaned.csv +0 -0
  26. dataset/processed/dataset_test.csv +0 -0
  27. dataset/processed/dataset_train.csv +0 -0
  28. dataset/processed/dataset_val.csv +0 -0
  29. dataset/raw/raw_dataset.csv +0 -0
  30. featureextraction/__init__.py +0 -0
  31. featureextraction/__pycache__/__init__.cpython-310.pyc +0 -0
  32. featureextraction/__pycache__/__init__.cpython-312.pyc +0 -0
  33. featureextraction/__pycache__/__init__.cpython-313.pyc +0 -0
  34. featureextraction/feature_concatenation.py +56 -0
  35. featureextraction/semantic_features/__init__,py +0 -0
  36. featureextraction/semantic_features/__pycache__/unixcoder_embedding.cpython-310.pyc +0 -0
  37. featureextraction/semantic_features/__pycache__/unixcoder_embedding.cpython-312.pyc +0 -0
  38. featureextraction/semantic_features/__pycache__/unixcoder_embedding.cpython-313.pyc +0 -0
  39. featureextraction/semantic_features/unixcoder_embedding.py +35 -0
  40. featureextraction/step1_statistical_extraction/__init__.py +0 -0
  41. featureextraction/step1_statistical_extraction/__pycache__/__init__.cpython-310.pyc +0 -0
  42. featureextraction/step1_statistical_extraction/__pycache__/__init__.cpython-312.pyc +0 -0
  43. featureextraction/step1_statistical_extraction/__pycache__/__init__.cpython-313.pyc +0 -0
  44. featureextraction/step1_statistical_extraction/__pycache__/step1_statistical_extraction.cpython-310.pyc +0 -0
  45. featureextraction/step1_statistical_extraction/__pycache__/step1_statistical_extraction.cpython-312.pyc +0 -0
  46. featureextraction/step1_statistical_extraction/__pycache__/step1_statistical_extraction.cpython-313.pyc +0 -0
  47. featureextraction/step1_statistical_extraction/step1_statistical_extraction.py +97 -0
  48. featureextraction/step1_statistical_extraction/test_features.csv +75 -0
  49. featureextraction/step1_statistical_extraction/train_features.csv +345 -0
  50. featureextraction/step1_statistical_extraction/val_features.csv +75 -0
.gitignore ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Binary model files
2
+ *.pkl
3
+ *.npy
4
+ *.joblib
5
+
6
+ # Cache files
7
+ __pycache__/
8
+ *.pyc
9
+ *.pyo
10
+ *.pyd
11
+ .Python
12
+ *.egg-info/
13
+ dist/
14
+ build/
15
+
16
+ # IDE
17
+ .vscode/
18
+ .idea/
19
+ *.swp
20
+ *.swo
21
+
22
+ # OS
23
+ .DS_Store
24
+ Thumbs.db
25
+
26
+ # Data (optional - comment out if you want to include)
27
+ dataset/raw/
28
+
29
+ # State files
30
+ .states/
.states/ae923ba0906f0144843dfd2720081370.pkl ADDED
Binary file (1.54 kB). View file
 
README.md ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: AI vs Human Code Classifier
3
+ emoji: 🤖
4
+ colorFrom: purple
5
+ colorTo: blue
6
+ sdk: gradio
7
+ app_file: app.py
8
+ python_version: "3.11.9"
9
+ suggested_hardware: cpu-basic
10
+ startup_duration_timeout: 60m
11
+ preload_from_hub:
12
+ - microsoft/unixcoder-base
13
+ models:
14
+ - microsoft/unixcoder-base
15
+ short_description: Classify code as AI- or human-written (XGBoost + UnixCoder + SHAP).
16
+ tags:
17
+ - code-classification
18
+ - explainability
19
+ - xgboost
20
+ ---
21
+
22
+ # AI vs Human Code Classifier
23
+
24
+ Gradio demo for [Hugging Face Spaces](https://huggingface.co/docs/hub/spaces-overview) using the same inference stack as the Reflex app (`reflex_ui`).
25
+
26
+ ## Deploy on Hugging Face
27
+
28
+ 1. Create a [new Space](https://huggingface.co/new-space), pick **Gradio**, then push this repository (or connect GitHub and set the repository).
29
+ 2. In **Settings → Hardware**, keep **CPU Basic** (free: 16 GB RAM) unless you need a GPU.
30
+ 3. Open the **App** tab. The first run may take a while while weights load.
31
+
32
+ Public URL shape: `https://huggingface.co/spaces/<your-username>/<space-name>` (and the `.hf.space` embed URL from the Space menu).
33
+
34
+ ## Local Gradio (optional)
35
+
36
+ ```bash
37
+ pip install -r requirements.txt
38
+ python app.py
39
+ ```
40
+
41
+ ## Local Reflex UI
42
+
43
+ See `reflex_ui/README.md` for the full Reflex front end.
__pycache__/app.cpython-312.pyc ADDED
Binary file (4.79 kB). View file
 
app.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Hugging Face Spaces entrypoint (Gradio).
3
+ Uses the same backend as the Reflex UI via `reflex_ui.backend_bridge.run_prediction`.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import os
9
+ import sys
10
+ from pathlib import Path
11
+
12
+ import gradio as gr
13
+ import pandas as pd
14
+
15
+ REPO_ROOT = Path(__file__).resolve().parent
16
+ os.chdir(REPO_ROOT)
17
+ if str(REPO_ROOT) not in sys.path:
18
+ sys.path.insert(0, str(REPO_ROOT))
19
+
20
+ _reflex_pkg_parent = REPO_ROOT / "reflex_ui"
21
+ if str(_reflex_pkg_parent) not in sys.path:
22
+ sys.path.insert(0, str(_reflex_pkg_parent))
23
+
24
+ from reflex_ui.backend_bridge import run_prediction # noqa: E402
25
+
26
+
27
+ def _predict(code: str, language: str, top_k: float):
28
+ text = (code or "").strip()
29
+ if not text:
30
+ return (
31
+ "*Paste a non-empty code snippet.*",
32
+ "",
33
+ pd.DataFrame(),
34
+ "",
35
+ )
36
+ lang = (language or "python").lower()
37
+ if lang not in ("python", "java"):
38
+ lang = "python"
39
+ k = int(top_k) if top_k else 6
40
+ k = max(1, min(30, k))
41
+ try:
42
+ r = run_prediction(code=text, language=lang, top_k=k)
43
+ except Exception as e:
44
+ return (f"**{type(e).__name__}:** `{e}`", "", pd.DataFrame(), "")
45
+
46
+ label_md = f"## Prediction: **{r.label}**\n\nP(AI) = **{r.prob_ai:.4f}**"
47
+ grouped_lines = [
48
+ f"- **{name}:** {val:.4f}" for name, val in sorted(r.grouped_importance.items(), key=lambda x: -x[1])
49
+ ]
50
+ grouped_md = "### Group importance\n" + ("\n".join(grouped_lines) if grouped_lines else "_n/a_")
51
+ df = pd.DataFrame(r.shap_rows)
52
+ if not df.empty and "impact" in df.columns:
53
+ show = df.drop(columns=["impact"], errors="ignore")
54
+ else:
55
+ show = df
56
+ return label_md, r.explanation, show, grouped_md
57
+
58
+
59
+ with gr.Blocks(title="AI vs Human Code Classifier") as demo:
60
+ gr.Markdown(
61
+ "# AI vs Human Code Classifier\n"
62
+ "Classifies snippets as **AI** or **Human** using XGBoost on statistical, AST, "
63
+ "stylometry, language, and **UnixCoder** semantic features, with **SHAP** attributions."
64
+ )
65
+ with gr.Row():
66
+ language = gr.Radio(choices=["python", "java"], value="python", label="Language")
67
+ top_k = gr.Slider(1, 20, value=6, step=1, label="Top SHAP features")
68
+ code = gr.Code(label="Code", language="python", lines=18)
69
+ run_btn = gr.Button("Run prediction", variant="primary")
70
+ out_head = gr.Markdown()
71
+ out_grouped = gr.Markdown()
72
+ out_expl = gr.Markdown(label="Narrative explanation")
73
+ out_table = gr.Dataframe(label="Top feature attributions", wrap=True)
74
+
75
+ run_btn.click(
76
+ _predict,
77
+ inputs=[code, language, top_k],
78
+ outputs=[out_head, out_expl, out_table, out_grouped],
79
+ )
80
+
81
+
82
+ if __name__ == "__main__":
83
+ demo.launch(server_name="0.0.0.0", server_port=7860)
basemodel/README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ This module implements a token‑level statistical baseline for AI vs human code detection. It relies solely on distributional properties of code tokens, such as entropy, burstiness, and predictability, without using structural or stylistic features. The goal is to evaluate whether LLM‑generated code exhibits detectable statistical signatures prior to incorporating higher‑level program structure and stylometry.
2
+
3
+ The statistical baseline achieves 58.6% validation accuracy, indicating that token‑level regularities alone are insufficient for robust authorship discrimination, motivating the inclusion of structural and stylistic features.”
basemodel/evaluate_randomforest.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import joblib
3
+ from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
4
+
5
+
6
+ test_df = pd.read_csv("basemodel/test_features.csv")
7
+
8
+
9
+ drop_cols = ["Label"]
10
+ if "language" in test_df.columns:
11
+ drop_cols.append("language")
12
+
13
+ X_test = test_df.drop(columns=drop_cols)
14
+ y_test = test_df["Label"]
15
+
16
+
17
+ rf = joblib.load("basemodel/random_forest_baseline.pkl")
18
+
19
+
20
+ test_preds = rf.predict(X_test)
21
+
22
+ accuracy = accuracy_score(y_test, test_preds)
23
+
24
+
25
+ print("\n🧪 TEST SET EVALUATION (Statistical Baseline)\n")
26
+ print("Test Accuracy:", round(accuracy, 4))
27
+
28
+ print("\nClassification Report:\n")
29
+ print(
30
+ classification_report(
31
+ y_test,
32
+ test_preds,
33
+ target_names=["Human", "AI"],
34
+ zero_division=0
35
+ )
36
+ )
37
+
38
+ print("\nConfusion Matrix:\n")
39
+ print(confusion_matrix(y_test, test_preds))
basemodel/test_features.csv ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ entropy,burstiness,repetition_ratio,avg_token_length,vocab_richness,num_tokens,unique_token_ratio,avg_line_length,language,Label
2
+ 4.83141993487849,0.781021897810219,0.583941605839416,3.153284671532847,0.41605839416058393,137,0.41605839416058393,30.09090909090909,python,1.0
3
+ 4.100105160750668,0.8333333333333334,0.6111111111111112,2.7962962962962963,0.3888888888888889,54,0.3888888888888889,33.5,python,0.0
4
+ 3.549523459597832,0.7857142857142857,0.5,6.5,0.5,28,0.5,27.818181818181817,java,1.0
5
+ 4.598877796136016,0.7543859649122807,0.49122807017543857,3.473684210526316,0.5087719298245614,57,0.5087719298245614,43.666666666666664,python,1.0
6
+ 4.008947073809811,0.7708333333333334,0.5625,2.5208333333333335,0.4375,48,0.4375,24.181818181818183,java,1.0
7
+ 3.9620439376607153,0.8448275862068966,0.6379310344827587,2.689655172413793,0.3620689655172414,58,0.3620689655172414,24.5,java,0.0
8
+ 4.496439344671016,0.65,0.375,4.325,0.625,40,0.625,41.0,python,1.0
9
+ 5.038474870244337,0.7261904761904762,0.5119047619047619,4.083333333333333,0.4880952380952381,84,0.4880952380952381,40.916666666666664,python,0.0
10
+ 3.828442584567261,0.9724770642201835,0.8256880733944953,2.8990825688073394,0.1743119266055046,109,0.1743119266055046,34.111111111111114,python,1.0
11
+ 5.158447453320608,0.7702702702702703,0.45945945945945943,4.581081081081081,0.5405405405405406,74,0.5405405405405406,19.52173913043478,java,0.0
12
+ 4.074700960493769,0.6470588235294118,0.5098039215686274,4.823529411764706,0.49019607843137253,51,0.49019607843137253,31.272727272727273,python,0.0
13
+ 5.216551170557516,0.32653061224489793,0.18367346938775508,6.489795918367347,0.8163265306122449,49,0.8163265306122449,31.466666666666665,java,1.0
14
+ 4.662553238462382,0.8235294117647058,0.5588235294117647,2.926470588235294,0.4411764705882353,68,0.4411764705882353,25.866666666666667,python,1.0
15
+ 4.718863069670588,0.8344827586206897,0.6827586206896552,4.703448275862069,0.31724137931034485,145,0.31724137931034485,28.0,java,0.0
16
+ 4.186133469144905,0.7878787878787878,0.6060606060606061,3.015151515151515,0.3939393939393939,66,0.3939393939393939,30.636363636363637,python,1.0
17
+ 4.201497374489525,0.9393939393939394,0.7474747474747474,1.9292929292929293,0.25252525252525254,99,0.25252525252525254,46.142857142857146,java,1.0
18
+ 5.555743206064364,0.7756410256410257,0.5769230769230769,4.397435897435898,0.4230769230769231,156,0.4230769230769231,33.875,java,0.0
19
+ 5.120767187212948,0.819672131147541,0.5819672131147541,4.122950819672131,0.4180327868852459,122,0.4180327868852459,27.517241379310345,java,0.0
20
+ 5.411695856960623,0.5733333333333334,0.3466666666666667,4.773333333333333,0.6533333333333333,75,0.6533333333333333,44.36363636363637,python,0.0
21
+ 5.9364666815595095,0.9323308270676691,0.7969924812030076,4.402255639097745,0.20300751879699247,532,0.20300751879699247,46.348314606741575,java,0.0
22
+ 5.921411473217016,0.8119266055045872,0.6146788990825688,5.041284403669724,0.3853211009174312,218,0.3853211009174312,43.567567567567565,python,0.0
23
+ 5.681975847739398,0.5217391304347826,0.3369565217391305,6.336956521739131,0.6630434782608695,92,0.6630434782608695,41.10526315789474,python,1.0
24
+ 4.9007043302212505,0.7849462365591398,0.5591397849462365,5.236559139784946,0.44086021505376344,93,0.44086021505376344,38.111111111111114,python,0.0
25
+ 5.3228285773092585,0.9466019417475728,0.7524271844660194,3.5194174757281553,0.24757281553398058,206,0.24757281553398058,33.48837209302326,java,0.0
26
+ 4.886224508551541,0.6811594202898551,0.4492753623188406,4.956521739130435,0.5507246376811594,69,0.5507246376811594,27.904761904761905,java,0.0
27
+ 3.7468422480983827,0.7317073170731707,0.5609756097560976,2.5853658536585367,0.43902439024390244,41,0.43902439024390244,23.1,python,1.0
28
+ 5.711349874129563,0.9204545454545454,0.7727272727272727,3.7017045454545454,0.22727272727272727,352,0.22727272727272727,33.15068493150685,java,0.0
29
+ 4.708644255945031,0.8160919540229885,0.6091954022988506,4.551724137931035,0.39080459770114945,87,0.39080459770114945,38.73684210526316,java,0.0
30
+ 4.366729296672175,0.46875,0.28125,6.21875,0.71875,32,0.71875,24.0,java,1.0
31
+ 4.819680984926536,0.7704918032786885,0.47540983606557374,3.19672131147541,0.5245901639344263,61,0.5245901639344263,33.18181818181818,python,0.0
32
+ 4.982067485400015,0.8495575221238938,0.6283185840707964,2.8849557522123894,0.37168141592920356,113,0.37168141592920356,37.5,python,0.0
33
+ 5.792349655949547,0.8646288209606987,0.6506550218340612,6.423580786026201,0.34934497816593885,229,0.34934497816593885,31.1,java,1.0
34
+ 5.172672355162728,0.8211382113821138,0.6016260162601625,3.2195121951219514,0.3983739837398374,123,0.3983739837398374,29.7,java,0.0
35
+ 4.509883902076781,0.5869565217391305,0.3913043478260869,3.9782608695652173,0.6086956521739131,46,0.6086956521739131,31.6,java,1.0
36
+ 5.0781577563714695,0.7582417582417582,0.5274725274725275,4.912087912087912,0.4725274725274725,91,0.4725274725274725,42.94736842105263,java,0.0
37
+ 4.752102792050065,0.8555555555555555,0.6222222222222222,2.6,0.37777777777777777,90,0.37777777777777777,30.875,python,1.0
38
+ 5.385167845762692,0.6842105263157895,0.4631578947368421,5.442105263157894,0.5368421052631579,95,0.5368421052631579,39.34782608695652,java,0.0
39
+ 4.698453748752097,0.8333333333333334,0.6111111111111112,2.3,0.3888888888888889,90,0.3888888888888889,30.75,python,1.0
40
+ 5.0936606896881855,0.48,0.26,4.5,0.74,50,0.74,36.44444444444444,python,1.0
41
+ 4.762058188070034,0.5689655172413793,0.3793103448275862,3.8793103448275863,0.6206896551724138,58,0.6206896551724138,35.875,python,0.0
42
+ 3.842216900031167,0.8703703703703703,0.6666666666666667,3.4444444444444446,0.3333333333333333,54,0.3333333333333333,26.571428571428573,python,1.0
43
+ 5.394236721618798,0.8340425531914893,0.6851063829787234,3.1659574468085108,0.3148936170212766,235,0.3148936170212766,45.16129032258065,java,0.0
44
+ 3.6368421881310113,0.5416666666666666,0.375,5.916666666666667,0.625,24,0.625,29.857142857142858,python,1.0
45
+ 4.371179616205941,0.821917808219178,0.6164383561643836,4.575342465753424,0.3835616438356164,73,0.3835616438356164,31.31578947368421,python,0.0
46
+ 5.292298716803247,0.6103896103896104,0.38961038961038963,4.597402597402597,0.6103896103896104,77,0.6103896103896104,43.63636363636363,python,0.0
47
+ 4.9019636334438,0.8170731707317073,0.5609756097560976,4.7926829268292686,0.43902439024390244,82,0.43902439024390244,34.21739130434783,java,0.0
48
+ 6.249267385727016,0.8602150537634409,0.6774193548387097,5.467741935483871,0.3225806451612903,372,0.3225806451612903,44.231884057971016,python,0.0
49
+ 5.022946255731872,0.905982905982906,0.6495726495726496,2.769230769230769,0.3504273504273504,117,0.3504273504273504,34.44,python,0.0
50
+ 5.183364648336087,0.578125,0.34375,6.140625,0.65625,64,0.65625,36.5,python,1.0
51
+ 3.8514475312208925,0.891566265060241,0.7469879518072289,2.5542168674698793,0.25301204819277107,83,0.25301204819277107,33.15384615384615,java,1.0
52
+ 4.76725052491479,0.8088235294117647,0.5294117647058824,4.382352941176471,0.47058823529411764,68,0.47058823529411764,29.857142857142858,java,0.0
53
+ 4.053149247727424,0.93,0.76,2.96,0.24,100,0.24,30.523809523809526,java,1.0
54
+ 5.508457113311519,0.8251748251748252,0.5944055944055944,5.1328671328671325,0.40559440559440557,143,0.40559440559440557,42.411764705882355,java,0.0
55
+ 4.041108871665959,0.7560975609756098,0.5121951219512195,5.048780487804878,0.4878048780487805,41,0.4878048780487805,26.75,java,1.0
56
+ 5.264706383768677,0.7769784172661871,0.5827338129496402,4.683453237410072,0.4172661870503597,139,0.4172661870503597,71.57142857142857,python,0.0
57
+ 4.888197350707755,0.9251700680272109,0.7210884353741497,2.5374149659863945,0.2789115646258503,147,0.2789115646258503,37.392857142857146,python,0.0
58
+ 4.246439344671016,0.65,0.42500000000000004,4.5,0.575,40,0.575,25.818181818181817,python,1.0
59
+ 5.3571067941182635,0.4838709677419355,0.27419354838709675,7.532258064516129,0.7258064516129032,62,0.7258064516129032,46.15384615384615,java,1.0
60
+ 4.882260151910624,0.8173076923076923,0.625,3.951923076923077,0.375,104,0.375,29.307692307692307,java,0.0
61
+ 4.413909765557392,0.40625,0.25,3.84375,0.75,32,0.75,28.666666666666668,python,0.0
62
+ 4.003044786734202,0.6666666666666666,0.4242424242424242,4.424242424242424,0.5757575757575758,33,0.5757575757575758,28.714285714285715,python,0.0
63
+ 4.138502179294739,0.8235294117647058,0.6176470588235294,3.8088235294117645,0.38235294117647056,68,0.38235294117647056,38.45454545454545,java,1.0
64
+ 3.490664945449586,0.8979591836734694,0.6938775510204082,4.571428571428571,0.30612244897959184,49,0.30612244897959184,26.526315789473685,java,0.0
65
+ 4.742155477497019,0.7049180327868853,0.47540983606557374,2.80327868852459,0.5245901639344263,61,0.5245901639344263,27.4,python,0.0
66
+ 4.907962016217061,0.4318181818181818,0.25,3.0454545454545454,0.75,44,0.75,62.0,python,1.0
67
+ 5.299012661837314,0.7024793388429752,0.5371900826446281,3.677685950413223,0.4628099173553719,121,0.4628099173553719,48.0,python,0.0
68
+ 5.643744827511846,0.8793774319066148,0.7081712062256809,7.603112840466926,0.2918287937743191,257,0.2918287937743191,36.523809523809526,java,1.0
69
+ 3.640223928941852,0.26666666666666666,0.1333333333333333,4.4,0.8666666666666667,15,0.8666666666666667,32.333333333333336,python,1.0
70
+ 4.220518125914522,0.9583333333333334,0.8194444444444444,2.0277777777777777,0.18055555555555555,144,0.18055555555555555,41.92307692307692,java,1.0
71
+ 5.706414205462762,0.7961783439490446,0.5796178343949044,7.503184713375796,0.42038216560509556,157,0.42038216560509556,38.61363636363637,java,1.0
72
+ 5.222228298430333,0.8230088495575221,0.5663716814159292,3.982300884955752,0.4336283185840708,113,0.4336283185840708,41.75,python,0.0
73
+ 4.277261292670126,0.8588235294117647,0.6705882352941177,3.0,0.32941176470588235,85,0.32941176470588235,34.35294117647059,python,0.0
74
+ 3.8035088547976783,0.5416666666666666,0.33333333333333337,4.208333333333333,0.6666666666666666,24,0.6666666666666666,24.571428571428573,python,1.0
75
+ 5.251158101143722,0.8026315789473685,0.618421052631579,4.894736842105263,0.3815789473684211,152,0.3815789473684211,44.06896551724138,java,0.0
basemodel/token_level_feature_extraction.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import re
4
+ import os
5
+ from collections import Counter
6
+ from math import log2
7
+
8
+
9
+ def tokenize(code):
10
+ # identifiers, numbers, operators
11
+ return re.findall(r"[A-Za-z_]+|\d+|==|!=|<=|>=|[+\-*/%]", code)
12
+
13
+
14
+ def token_entropy(tokens):
15
+ if not tokens:
16
+ return 0.0
17
+ counts = Counter(tokens)
18
+ total = len(tokens)
19
+ probs = [c / total for c in counts.values()]
20
+ return -sum(p * log2(p) for p in probs)
21
+
22
+ def burstiness(tokens):
23
+ if not tokens:
24
+ return 0.0
25
+ counts = Counter(tokens)
26
+ repeated = sum(c for c in counts.values() if c > 1)
27
+ return repeated / len(tokens)
28
+
29
+ def repetition_ratio(tokens):
30
+ if not tokens:
31
+ return 0.0
32
+ return 1 - (len(set(tokens)) / len(tokens))
33
+
34
+ def avg_token_length(tokens):
35
+ if not tokens:
36
+ return 0.0
37
+ return np.mean([len(t) for t in tokens])
38
+
39
+ def vocab_richness(tokens):
40
+ if not tokens:
41
+ return 0.0
42
+ return len(set(tokens)) / len(tokens)
43
+
44
+
45
+ def extract_features(df):
46
+ features = []
47
+
48
+ for _, row in df.iterrows():
49
+ code = str(row["normalized_code"])
50
+ tokens = tokenize(code)
51
+
52
+ features.append({
53
+ "entropy": token_entropy(tokens),
54
+ "burstiness": burstiness(tokens),
55
+ "repetition_ratio": repetition_ratio(tokens),
56
+ "avg_token_length": avg_token_length(tokens),
57
+ "vocab_richness": vocab_richness(tokens),
58
+ "language": row.get("Language", "unknown")
59
+ })
60
+
61
+ return pd.DataFrame(features)
62
+
63
+
64
+ if __name__ == "__main__":
65
+
66
+ os.makedirs("basemodel", exist_ok=True)
67
+
68
+ for split in ["train", "val", "test"]:
69
+ input_path = f"dataset/processed/dataset_{split}.csv"
70
+ df = pd.read_csv(input_path)
71
+
72
+ if "Label (0- HUMAN, 1-AI)" in df.columns:
73
+ df = df.rename(columns={"Label (0- HUMAN, 1-AI)": "Label"})
74
+
75
+ X = extract_features(df)
76
+ X["Label"] = df["Label"]
77
+
78
+ output_path = f"basemodel/{split}_features.csv"
79
+ X.to_csv(output_path, index=False)
80
+
81
+ print(f"Statistical baseline features extracted for {split}")
basemodel/train_features.csv ADDED
@@ -0,0 +1,345 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ entropy,burstiness,repetition_ratio,avg_token_length,vocab_richness,num_tokens,unique_token_ratio,avg_line_length,language,Label
2
+ 5.207956079154271,0.8712121212121212,0.6515151515151515,3.9015151515151514,0.3484848484848485,132,0.3484848484848485,41.464285714285715,python,0.0
3
+ 4.844227218082805,0.8962962962962963,0.7037037037037037,3.1259259259259258,0.2962962962962963,135,0.2962962962962963,15.704545454545455,java,0.0
4
+ 5.110041502517433,0.916083916083916,0.6783216783216783,4.5174825174825175,0.32167832167832167,143,0.32167832167832167,25.784313725490197,java,0.0
5
+ 4.599986090068817,0.8839285714285714,0.7053571428571428,4.616071428571429,0.29464285714285715,112,0.29464285714285715,30.68421052631579,java,0.0
6
+ 4.469392725378344,0.8589743589743589,0.6153846153846154,3.230769230769231,0.38461538461538464,78,0.38461538461538464,23.923076923076923,java,0.0
7
+ 4.293744383186907,0.859375,0.625,2.953125,0.375,64,0.375,26.210526315789473,java,0.0
8
+ 4.549676205034737,0.860655737704918,0.6885245901639344,5.918032786885246,0.3114754098360656,122,0.3114754098360656,33.96666666666667,python,0.0
9
+ 4.328596578632476,0.8163265306122449,0.5306122448979591,3.2244897959183674,0.46938775510204084,49,0.46938775510204084,28.333333333333332,python,0.0
10
+ 4.597753650865813,0.7941176470588235,0.5588235294117647,3.0441176470588234,0.4411764705882353,68,0.4411764705882353,30.916666666666668,java,1.0
11
+ 4.0675361498812554,0.8333333333333334,0.6666666666666667,2.0384615384615383,0.3333333333333333,78,0.3333333333333333,28.0,python,0.0
12
+ 4.853956434293166,0.898989898989899,0.6464646464646464,4.101010101010101,0.35353535353535354,99,0.35353535353535354,31.40740740740741,java,0.0
13
+ 6.10213285294983,0.875,0.6736111111111112,7.423611111111111,0.3263888888888889,288,0.3263888888888889,37.31395348837209,java,1.0
14
+ 3.053791405126089,0.7777777777777778,0.6388888888888888,4.027777777777778,0.3611111111111111,36,0.3611111111111111,22.25,python,1.0
15
+ 4.379556754405944,0.8620689655172413,0.5689655172413793,4.120689655172414,0.43103448275862066,58,0.43103448275862066,35.36363636363637,java,1.0
16
+ 4.538844415918181,0.8026315789473685,0.5921052631578947,3.3026315789473686,0.40789473684210525,76,0.40789473684210525,15.043478260869565,java,0.0
17
+ 4.435455286899907,0.847457627118644,0.576271186440678,2.694915254237288,0.423728813559322,59,0.423728813559322,26.818181818181817,python,1.0
18
+ 4.1277054819279435,0.8333333333333334,0.5625,3.0,0.4375,48,0.4375,24.6,java,1.0
19
+ 4.498419401169,0.7058823529411765,0.47058823529411764,3.2941176470588234,0.5294117647058824,51,0.5294117647058824,25.866666666666667,java,0.0
20
+ 3.615922063835167,0.7333333333333333,0.5,5.433333333333334,0.5,30,0.5,30.125,python,0.0
21
+ 5.043983303761563,0.5833333333333334,0.3666666666666667,4.983333333333333,0.6333333333333333,60,0.6333333333333333,37.5,python,1.0
22
+ 3.6897037321995474,0.42857142857142855,0.2857142857142857,7.095238095238095,0.7142857142857143,21,0.7142857142857143,22.7,java,1.0
23
+ 3.791950834682379,0.7142857142857143,0.5934065934065934,2.769230769230769,0.4065934065934066,91,0.4065934065934066,34.0,python,1.0
24
+ 5.542412227395111,0.7934782608695652,0.6141304347826086,3.6793478260869565,0.3858695652173913,184,0.3858695652173913,61.75,java,0.0
25
+ 3.8463189626846366,0.8947368421052632,0.5526315789473684,3.0526315789473686,0.4473684210526316,38,0.4473684210526316,20.6,python,1.0
26
+ 4.7720552088742005,0.6666666666666666,0.375,6.229166666666667,0.625,48,0.625,29.214285714285715,java,1.0
27
+ 5.085703343051975,0.7099236641221374,0.549618320610687,3.931297709923664,0.45038167938931295,131,0.45038167938931295,32.40909090909091,python,0.0
28
+ 4.253863368422077,0.6578947368421053,0.42105263157894735,3.6052631578947367,0.5789473684210527,38,0.5789473684210527,34.833333333333336,java,1.0
29
+ 5.285942953673669,0.8157894736842105,0.5614035087719298,3.4035087719298245,0.43859649122807015,114,0.43859649122807015,45.666666666666664,java,0.0
30
+ 4.324534762707879,0.7111111111111111,0.4666666666666667,8.177777777777777,0.5333333333333333,45,0.5333333333333333,37.30769230769231,java,1.0
31
+ 4.895714600514266,0.8457943925233645,0.7383177570093458,2.6448598130841123,0.2616822429906542,214,0.2616822429906542,18.784313725490197,java,0.0
32
+ 3.327339900083509,0.6666666666666666,0.4285714285714286,3.0952380952380953,0.5714285714285714,21,0.5714285714285714,22.8,python,1.0
33
+ 5.581633377149314,0.5934065934065934,0.37362637362637363,6.318681318681318,0.6263736263736264,91,0.6263736263736264,40.78947368421053,python,1.0
34
+ 5.657995400316549,0.8997289972899729,0.7425474254742548,3.907859078590786,0.25745257452574527,369,0.25745257452574527,56.26315789473684,java,0.0
35
+ 4.029187712039474,0.8367346938775511,0.5918367346938775,2.9183673469387754,0.40816326530612246,49,0.40816326530612246,30.0,java,1.0
36
+ 4.8582227682726,0.9393939393939394,0.7348484848484849,2.6363636363636362,0.26515151515151514,132,0.26515151515151514,32.0,python,1.0
37
+ 4.42309119581308,0.7941176470588235,0.5735294117647058,2.5588235294117645,0.4264705882352941,68,0.4264705882352941,30.454545454545453,java,1.0
38
+ 3.7665853954588724,0.7704918032786885,0.639344262295082,2.8852459016393444,0.36065573770491804,61,0.36065573770491804,33.90909090909091,python,0.0
39
+ 5.197908393371423,0.8072289156626506,0.6325301204819277,5.0843373493975905,0.3674698795180723,166,0.3674698795180723,38.65625,java,0.0
40
+ 4.536286231168867,0.42857142857142855,0.2571428571428571,3.6,0.7428571428571429,35,0.7428571428571429,31.166666666666668,python,1.0
41
+ 4.709586788340617,0.8957055214723927,0.7300613496932515,3.361963190184049,0.26993865030674846,163,0.26993865030674846,17.816326530612244,java,0.0
42
+ 4.5049847535671015,0.9545454545454546,0.7897727272727273,3.3806818181818183,0.21022727272727273,176,0.21022727272727273,36.16129032258065,java,0.0
43
+ 4.9423908406293835,0.6538461538461539,0.4358974358974359,3.8333333333333335,0.5641025641025641,78,0.5641025641025641,45.125,python,0.0
44
+ 4.41120441892766,0.7777777777777778,0.4666666666666667,6.488888888888889,0.5333333333333333,45,0.5333333333333333,44.77777777777778,python,0.0
45
+ 5.262208612706845,0.7327586206896551,0.5431034482758621,4.146551724137931,0.45689655172413796,116,0.45689655172413796,28.666666666666668,python,1.0
46
+ 4.208410187268525,0.48148148148148145,0.2592592592592593,4.222222222222222,0.7407407407407407,27,0.7407407407407407,32.2,python,1.0
47
+ 5.793376775298043,0.8823529411764706,0.680672268907563,5.529411764705882,0.31932773109243695,238,0.31932773109243695,27.633333333333333,java,1.0
48
+ 5.691854216621956,0.7849462365591398,0.5967741935483871,4.973118279569892,0.4032258064516129,186,0.4032258064516129,40.333333333333336,python,0.0
49
+ 5.058984089445427,0.3333333333333333,0.16666666666666663,6.785714285714286,0.8333333333333334,42,0.8333333333333334,30.923076923076923,python,1.0
50
+ 4.433157434964864,0.9736842105263158,0.7543859649122807,2.4035087719298245,0.24561403508771928,114,0.24561403508771928,31.5,python,1.0
51
+ 5.268123017576671,0.6164383561643836,0.3835616438356164,4.917808219178082,0.6164383561643836,73,0.6164383561643836,44.09090909090909,python,0.0
52
+ 6.198108865484992,0.7208121827411168,0.5025380710659899,5.0456852791878175,0.49746192893401014,197,0.49746192893401014,41.638888888888886,python,1.0
53
+ 4.5615159206608356,0.8227848101265823,0.6075949367088608,3.5569620253164556,0.3924050632911392,79,0.3924050632911392,13.678571428571429,java,0.0
54
+ 5.04371468310881,0.6351351351351351,0.43243243243243246,4.1891891891891895,0.5675675675675675,74,0.5675675675675675,37.0,java,0.0
55
+ 3.995906598484245,0.6944444444444444,0.4722222222222222,4.333333333333333,0.5277777777777778,36,0.5277777777777778,27.5,python,1.0
56
+ 4.757950193468896,0.9148936170212766,0.648936170212766,2.893617021276596,0.35106382978723405,94,0.35106382978723405,34.0,python,1.0
57
+ 5.053560274699695,0.9136690647482014,0.7050359712230216,4.705035971223022,0.2949640287769784,139,0.2949640287769784,39.69565217391305,python,0.0
58
+ 3.833323296470577,0.8813559322033898,0.6779661016949152,2.288135593220339,0.3220338983050847,59,0.3220338983050847,27.5,java,1.0
59
+ 4.253212018409155,0.5882352941176471,0.3529411764705882,4.117647058823529,0.6470588235294118,34,0.6470588235294118,13.75,java,0.0
60
+ 4.611949334080443,0.5384615384615384,0.3076923076923077,4.717948717948718,0.6923076923076923,39,0.6923076923076923,34.42857142857143,python,0.0
61
+ 4.756182308560408,0.9186991869918699,0.7073170731707317,3.3089430894308944,0.2926829268292683,123,0.2926829268292683,28.09090909090909,java,0.0
62
+ 5.1256959966316895,0.9022988505747126,0.6954022988505747,3.8045977011494254,0.3045977011494253,174,0.3045977011494253,31.727272727272727,java,0.0
63
+ 4.274386660093705,0.8103448275862069,0.5862068965517242,2.2413793103448274,0.41379310344827586,58,0.41379310344827586,39.166666666666664,java,1.0
64
+ 5.649232493101554,0.8258064516129032,0.5870967741935484,5.961290322580645,0.4129032258064516,155,0.4129032258064516,29.104166666666668,java,0.0
65
+ 3.8005595576361078,0.875,0.6785714285714286,3.0892857142857144,0.32142857142857145,56,0.32142857142857145,24.357142857142858,python,1.0
66
+ 4.635290969626768,0.6666666666666666,0.43137254901960786,3.7254901960784315,0.5686274509803921,51,0.5686274509803921,25.46153846153846,java,0.0
67
+ 4.854346937661789,0.5740740740740741,0.37037037037037035,5.0,0.6296296296296297,54,0.6296296296296297,25.4,java,0.0
68
+ 4.393061650825727,0.5,0.2941176470588235,7.352941176470588,0.7058823529411765,34,0.7058823529411765,40.625,python,1.0
69
+ 4.422442136473173,0.5135135135135135,0.32432432432432434,5.081081081081081,0.6756756756756757,37,0.6756756756756757,39.166666666666664,python,1.0
70
+ 4.207435516759526,0.6571428571428571,0.4,3.0285714285714285,0.6,35,0.6,22.375,python,1.0
71
+ 5.99203259332159,0.7687074829931972,0.48299319727891155,5.122448979591836,0.5170068027210885,147,0.5170068027210885,39.357142857142854,python,1.0
72
+ 6.007074413338004,0.711764705882353,0.5058823529411764,4.129411764705883,0.49411764705882355,170,0.49411764705882355,58.0,python,0.0
73
+ 3.6464393446710157,0.5,0.30000000000000004,3.85,0.7,20,0.7,27.0,python,1.0
74
+ 4.497874075147441,0.5333333333333333,0.37777777777777777,4.311111111111111,0.6222222222222222,45,0.6222222222222222,30.636363636363637,java,1.0
75
+ 4.728255941527934,0.9318181818181818,0.7348484848484849,2.6363636363636362,0.26515151515151514,132,0.26515151515151514,33.68,python,0.0
76
+ 4.2354799985372935,0.6862745098039216,0.5098039215686274,2.9411764705882355,0.49019607843137253,51,0.49019607843137253,28.333333333333332,python,0.0
77
+ 5.560363930440956,0.8622754491017964,0.6347305389221557,4.029940119760479,0.3652694610778443,167,0.3652694610778443,31.70212765957447,java,0.0
78
+ 6.014883786593109,0.5323741007194245,0.38129496402877694,4.863309352517986,0.6187050359712231,139,0.6187050359712231,34.25,python,1.0
79
+ 4.384183719779189,0.55,0.375,6.35,0.625,40,0.625,32.916666666666664,java,1.0
80
+ 4.36346329043209,0.7105263157894737,0.39473684210526316,6.157894736842105,0.6052631578947368,38,0.6052631578947368,27.733333333333334,java,1.0
81
+ 5.92272462852426,0.7397959183673469,0.5510204081632653,4.969387755102041,0.4489795918367347,196,0.4489795918367347,72.89473684210526,java,0.0
82
+ 4.17415473143563,0.9078947368421053,0.7105263157894737,2.1578947368421053,0.2894736842105263,76,0.2894736842105263,30.692307692307693,python,1.0
83
+ 5.271209951802516,0.9140271493212669,0.7330316742081449,4.97737556561086,0.2669683257918552,221,0.2669683257918552,43.416666666666664,java,0.0
84
+ 5.014527492014123,0.9281045751633987,0.7320261437908497,2.7777777777777777,0.2679738562091503,153,0.2679738562091503,38.869565217391305,python,0.0
85
+ 4.482483990770269,0.9565217391304348,0.7391304347826086,4.026086956521739,0.2608695652173913,115,0.2608695652173913,36.90909090909091,java,1.0
86
+ 5.722786643861381,0.8493975903614458,0.5783132530120482,3.7710843373493974,0.42168674698795183,166,0.42168674698795183,19.595744680851062,java,0.0
87
+ 5.300160551498397,0.5909090909090909,0.33333333333333337,5.757575757575758,0.6666666666666666,66,0.6666666666666666,36.75,java,1.0
88
+ 4.45148902643068,0.5,0.3157894736842105,3.4210526315789473,0.6842105263157895,38,0.6842105263157895,21.7,python,1.0
89
+ 5.072571258365157,0.8296703296703297,0.6428571428571428,3.4450549450549453,0.35714285714285715,182,0.35714285714285715,28.515151515151516,python,1.0
90
+ 4.168243795012672,0.5161290322580645,0.32258064516129037,6.0,0.6774193548387096,31,0.6774193548387096,31.375,java,0.0
91
+ 4.123939173306749,0.6511627906976745,0.4651162790697675,3.6744186046511627,0.5348837209302325,43,0.5348837209302325,28.2,java,1.0
92
+ 4.534110630792036,0.8888888888888888,0.6666666666666667,3.1222222222222222,0.3333333333333333,90,0.3333333333333333,35.3,java,0.0
93
+ 4.889260316971673,1.0,0.7348484848484849,3.6515151515151514,0.26515151515151514,132,0.26515151515151514,23.066666666666666,java,0.0
94
+ 4.883002384777129,0.9366197183098591,0.7323943661971831,2.852112676056338,0.2676056338028169,142,0.2676056338028169,36.25806451612903,python,0.0
95
+ 3.8200705941688406,0.9666666666666667,0.8111111111111111,5.011111111111111,0.18888888888888888,90,0.18888888888888888,37.421052631578945,java,1.0
96
+ 5.866907260896749,0.7725321888412017,0.5965665236051503,5.141630901287554,0.4034334763948498,233,0.4034334763948498,71.04347826086956,python,0.0
97
+ 4.592226257370053,0.7936507936507936,0.5396825396825398,3.5714285714285716,0.4603174603174603,63,0.4603174603174603,31.5,python,0.0
98
+ 4.979264809390598,0.2926829268292683,0.1707317073170732,6.317073170731708,0.8292682926829268,41,0.8292682926829268,33.416666666666664,java,1.0
99
+ 3.1693150678026982,0.8705882352941177,0.7294117647058824,2.1411764705882352,0.27058823529411763,85,0.27058823529411763,29.88888888888889,python,1.0
100
+ 4.718291773866265,0.8803418803418803,0.6837606837606838,2.871794871794872,0.3162393162393162,117,0.3162393162393162,33.17857142857143,python,0.0
101
+ 6.057756669289524,0.5080645161290323,0.3306451612903226,5.588709677419355,0.6693548387096774,124,0.6693548387096774,37.57692307692308,python,1.0
102
+ 4.94407911950056,0.8181818181818182,0.5324675324675325,7.077922077922078,0.4675324675324675,77,0.4675324675324675,37.05,java,1.0
103
+ 4.726143834398879,0.6136363636363636,0.34090909090909094,4.840909090909091,0.6590909090909091,44,0.6590909090909091,28.25,python,1.0
104
+ 4.475969855129791,0.8305084745762712,0.5593220338983051,3.389830508474576,0.4406779661016949,59,0.4406779661016949,27.0,java,1.0
105
+ 5.689839588541731,0.783410138248848,0.6082949308755761,3.640552995391705,0.391705069124424,217,0.391705069124424,31.355555555555554,java,0.0
106
+ 4.518215514675149,0.9473684210526315,0.6578947368421053,2.526315789473684,0.34210526315789475,76,0.34210526315789475,44.285714285714285,python,1.0
107
+ 4.124232718690979,0.7678571428571429,0.5892857142857143,3.1607142857142856,0.4107142857142857,56,0.4107142857142857,25.3125,java,0.0
108
+ 4.112426730000285,0.8846153846153846,0.7403846153846154,2.951923076923077,0.25961538461538464,104,0.25961538461538464,34.09090909090909,python,0.0
109
+ 5.039480564148708,0.7866666666666666,0.4933333333333333,4.093333333333334,0.5066666666666667,75,0.5066666666666667,32.111111111111114,java,0.0
110
+ 4.677139104850461,0.8181818181818182,0.5454545454545454,3.621212121212121,0.45454545454545453,66,0.45454545454545453,22.0,python,0.0
111
+ 4.131300342505361,0.7096774193548387,0.3870967741935484,6.161290322580645,0.6129032258064516,31,0.6129032258064516,31.625,java,1.0
112
+ 5.462480227696889,0.7959183673469388,0.5850340136054422,4.707482993197279,0.41496598639455784,147,0.41496598639455784,24.872340425531913,java,0.0
113
+ 3.6914280318460246,0.8125,0.53125,2.09375,0.46875,32,0.46875,23.333333333333332,python,1.0
114
+ 4.214114119970501,0.7142857142857143,0.5102040816326531,2.816326530612245,0.4897959183673469,49,0.4897959183673469,22.583333333333332,java,1.0
115
+ 5.697217023615801,0.8171428571428572,0.5828571428571429,5.154285714285714,0.41714285714285715,175,0.41714285714285715,37.93023255813954,java,0.0
116
+ 4.0791433740260095,0.6571428571428571,0.4285714285714286,5.057142857142857,0.5714285714285714,35,0.5714285714285714,25.0,python,1.0
117
+ 3.7489948035250964,0.47619047619047616,0.2857142857142857,3.4285714285714284,0.7142857142857143,21,0.7142857142857143,26.25,python,1.0
118
+ 4.907735246015272,0.75,0.525,4.2125,0.475,80,0.475,29.869565217391305,python,0.0
119
+ 4.822037278990095,0.7808219178082192,0.5342465753424658,2.958904109589041,0.4657534246575342,73,0.4657534246575342,33.0,python,0.0
120
+ 5.030104271105216,0.8301886792452831,0.6037735849056604,3.349056603773585,0.39622641509433965,106,0.39622641509433965,33.958333333333336,java,1.0
121
+ 4.816108939837481,0.7142857142857143,0.4285714285714286,4.392857142857143,0.5714285714285714,56,0.5714285714285714,29.23076923076923,python,1.0
122
+ 2.4276042408847402,0.8461538461538461,0.7230769230769231,1.7692307692307692,0.27692307692307694,65,0.27692307692307694,27.666666666666668,python,1.0
123
+ 6.584083875285213,0.8148148148148148,0.6271604938271604,6.212345679012346,0.3728395061728395,405,0.3728395061728395,40.2247191011236,python,0.0
124
+ 4.1673958618292355,0.8536585365853658,0.6951219512195121,2.7560975609756095,0.3048780487804878,82,0.3048780487804878,29.65,java,0.0
125
+ 3.741962789037907,0.6896551724137931,0.4482758620689655,2.0689655172413794,0.5517241379310345,29,0.5517241379310345,18.285714285714285,python,1.0
126
+ 4.86754794914527,0.5178571428571429,0.3571428571428571,3.375,0.6428571428571429,56,0.6428571428571429,19.2,java,0.0
127
+ 3.9331695057515157,0.9285714285714286,0.5952380952380952,3.5714285714285716,0.40476190476190477,42,0.40476190476190477,38.166666666666664,python,1.0
128
+ 4.196483601699262,0.49019607843137253,0.4117647058823529,6.254901960784314,0.5882352941176471,51,0.5882352941176471,34.25,python,0.0
129
+ 5.906626836308724,0.8967254408060453,0.7481108312342569,3.44080604534005,0.2518891687657431,397,0.2518891687657431,53.51219512195122,java,0.0
130
+ 6.369927477083823,0.5966850828729282,0.4088397790055248,4.900552486187845,0.5911602209944752,181,0.5911602209944752,49.65384615384615,python,1.0
131
+ 4.384795487373134,0.5277777777777778,0.33333333333333337,4.611111111111111,0.6666666666666666,36,0.6666666666666666,18.25,java,0.0
132
+ 5.139173834043169,0.8348623853211009,0.5871559633027523,4.935779816513762,0.41284403669724773,109,0.41284403669724773,35.370370370370374,java,0.0
133
+ 4.238521032323433,0.7708333333333334,0.5208333333333333,4.5625,0.4791666666666667,48,0.4791666666666667,27.571428571428573,python,0.0
134
+ 4.460619643646408,0.8309859154929577,0.5915492957746479,4.704225352112676,0.4084507042253521,71,0.4084507042253521,35.72222222222222,java,0.0
135
+ 5.2878758225325395,0.9408450704225352,0.8169014084507042,4.366197183098592,0.18309859154929578,355,0.18309859154929578,42.14545454545455,java,0.0
136
+ 4.8335434078297626,0.6666666666666666,0.4545454545454546,4.46969696969697,0.5454545454545454,66,0.5454545454545454,65.66666666666667,python,0.0
137
+ 3.3889821967054377,0.9555555555555556,0.7333333333333334,5.022222222222222,0.26666666666666666,45,0.26666666666666666,53.0,java,1.0
138
+ 5.626267512421823,0.8212290502793296,0.6145251396648045,3.8491620111731844,0.3854748603351955,179,0.3854748603351955,55.166666666666664,python,1.0
139
+ 4.5054500825687525,0.9090909090909091,0.696969696969697,3.6464646464646466,0.30303030303030304,99,0.30303030303030304,41.36363636363637,python,0.0
140
+ 4.66816999226466,0.5609756097560976,0.31707317073170727,4.7560975609756095,0.6829268292682927,41,0.6829268292682927,30.2,java,1.0
141
+ 4.1138276215159735,0.8461538461538461,0.6307692307692307,2.876923076923077,0.36923076923076925,65,0.36923076923076925,31.071428571428573,java,0.0
142
+ 3.7489948035250964,0.47619047619047616,0.2857142857142857,6.904761904761905,0.7142857142857143,21,0.7142857142857143,33.57142857142857,java,1.0
143
+ 4.639932288803049,0.8472222222222222,0.6944444444444444,3.9166666666666665,0.3055555555555556,144,0.3055555555555556,40.34615384615385,java,0.0
144
+ 5.503071993063162,0.7610619469026548,0.504424778761062,3.2920353982300883,0.49557522123893805,113,0.49557522123893805,51.36363636363637,python,1.0
145
+ 5.320245758154888,0.8130081300813008,0.5853658536585367,2.227642276422764,0.4146341463414634,123,0.4146341463414634,33.5625,python,1.0
146
+ 4.448860987743577,0.5789473684210527,0.3421052631578947,4.131578947368421,0.6578947368421053,38,0.6578947368421053,32.125,python,1.0
147
+ 4.526180668235663,0.5897435897435898,0.33333333333333337,4.17948717948718,0.6666666666666666,39,0.6666666666666666,26.9,java,1.0
148
+ 4.202589101773195,0.9411764705882353,0.7294117647058824,1.7058823529411764,0.27058823529411763,85,0.27058823529411763,31.3,java,1.0
149
+ 5.934435083287192,0.6229508196721312,0.4016393442622951,5.0,0.5983606557377049,122,0.5983606557377049,36.0,python,1.0
150
+ 4.713707321865714,0.6885245901639344,0.4590163934426229,6.459016393442623,0.5409836065573771,61,0.5409836065573771,33.8125,python,0.0
151
+ 4.3180873032876175,0.9,0.7111111111111111,2.2777777777777777,0.28888888888888886,90,0.28888888888888886,38.833333333333336,python,0.0
152
+ 4.285367908896077,0.6875,0.47916666666666663,4.520833333333333,0.5208333333333334,48,0.5208333333333334,40.875,java,1.0
153
+ 4.366596139176847,0.6585365853658537,0.41463414634146345,5.609756097560975,0.5853658536585366,41,0.5853658536585366,34.8,java,1.0
154
+ 5.343782087298768,0.8652482269503546,0.624113475177305,5.7304964539007095,0.375886524822695,141,0.375886524822695,40.40625,java,0.0
155
+ 4.327819531114783,0.375,0.25,6.28125,0.75,32,0.75,29.5,java,1.0
156
+ 3.8033524347703986,0.7105263157894737,0.5,2.763157894736842,0.5,38,0.5,26.375,python,0.0
157
+ 5.72576174609731,0.797979797979798,0.6161616161616161,3.984848484848485,0.3838383838383838,198,0.3838383838383838,42.55882352941177,java,0.0
158
+ 5.409509758627718,0.841025641025641,0.676923076923077,5.671794871794872,0.3230769230769231,195,0.3230769230769231,34.90384615384615,python,0.0
159
+ 5.679736241553765,0.7013888888888888,0.5138888888888888,4.743055555555555,0.4861111111111111,144,0.4861111111111111,52.55555555555556,python,0.0
160
+ 4.4912054473944405,0.8148148148148148,0.5185185185185186,5.277777777777778,0.48148148148148145,54,0.48148148148148145,36.23076923076923,java,0.0
161
+ 3.9362434129830604,0.6333333333333333,0.4,3.1333333333333333,0.6,30,0.6,24.75,python,0.0
162
+ 4.215928174708332,0.8852459016393442,0.6229508196721312,2.721311475409836,0.3770491803278688,61,0.3770491803278688,27.923076923076923,python,0.0
163
+ 5.255072207410051,0.9081081081081082,0.7135135135135136,3.675675675675676,0.2864864864864865,185,0.2864864864864865,38.34375,python,0.0
164
+ 4.8132935840925875,0.8902439024390244,0.5853658536585367,4.365853658536586,0.4146341463414634,82,0.4146341463414634,39.166666666666664,python,0.0
165
+ 3.728999521334141,0.9047619047619048,0.7301587301587302,4.619047619047619,0.2698412698412698,63,0.2698412698412698,34.84615384615385,java,1.0
166
+ 4.4003105301349255,0.6888888888888889,0.4444444444444444,2.5555555555555554,0.5555555555555556,45,0.5555555555555556,21.90909090909091,python,1.0
167
+ 5.192613181399552,0.8343949044585988,0.6624203821656052,3.445859872611465,0.3375796178343949,157,0.3375796178343949,42.476190476190474,python,0.0
168
+ 4.896037310447887,0.8761061946902655,0.6548672566371682,3.2123893805309733,0.34513274336283184,113,0.34513274336283184,27.653846153846153,java,0.0
169
+ 4.652782517460112,0.6447368421052632,0.513157894736842,2.8684210526315788,0.4868421052631579,76,0.4868421052631579,32.76470588235294,java,0.0
170
+ 3.757539644379974,0.9032258064516129,0.7096774193548387,1.6612903225806452,0.2903225806451613,62,0.2903225806451613,24.375,java,1.0
171
+ 5.4053549514703,0.7605633802816901,0.5633802816901409,5.640845070422535,0.43661971830985913,142,0.43661971830985913,38.833333333333336,java,0.0
172
+ 4.715163766425403,0.9142857142857143,0.6761904761904762,4.580952380952381,0.3238095238095238,105,0.3238095238095238,27.733333333333334,java,0.0
173
+ 5.048749755702801,0.8085106382978723,0.5638297872340425,2.74468085106383,0.43617021276595747,94,0.43617021276595747,34.375,java,1.0
174
+ 4.412142047769037,0.734375,0.546875,2.9375,0.453125,64,0.453125,31.0,python,0.0
175
+ 4.162294909570876,0.4444444444444444,0.2592592592592593,4.148148148148148,0.7407407407407407,27,0.7407407407407407,21.428571428571427,java,0.0
176
+ 5.17784699780413,0.46153846153846156,0.32307692307692304,3.4153846153846152,0.676923076923077,65,0.676923076923077,38.92307692307692,java,0.0
177
+ 5.4865950410391395,0.8578199052132701,0.6872037914691943,4.1753554502369665,0.3127962085308057,211,0.3127962085308057,37.65217391304348,java,0.0
178
+ 5.104500026854035,0.875,0.6333333333333333,4.291666666666667,0.36666666666666664,120,0.36666666666666664,39.80769230769231,java,0.0
179
+ 6.146771906965858,0.8115183246073299,0.5445026178010471,5.350785340314136,0.45549738219895286,191,0.45549738219895286,40.8,python,0.0
180
+ 4.898714188796334,0.8387096774193549,0.6021505376344086,4.559139784946237,0.3978494623655914,93,0.3978494623655914,40.0,java,0.0
181
+ 4.33379987011787,0.6521739130434783,0.4565217391304348,4.195652173913044,0.5434782608695652,46,0.5434782608695652,21.944444444444443,java,0.0
182
+ 4.720077865655612,0.8817204301075269,0.6344086021505376,4.806451612903226,0.3655913978494624,93,0.3655913978494624,35.95238095238095,java,0.0
183
+ 4.971145882958361,0.543859649122807,0.3508771929824561,4.0701754385964914,0.6491228070175439,57,0.6491228070175439,40.375,python,1.0
184
+ 4.608465630737972,0.89,0.6799999999999999,2.32,0.32,100,0.32,30.428571428571427,java,1.0
185
+ 4.553348286183388,0.8556701030927835,0.6597938144329897,5.391752577319588,0.3402061855670103,97,0.3402061855670103,31.903225806451612,java,0.0
186
+ 5.111219097255348,0.6422018348623854,0.5045871559633027,4.009174311926605,0.4954128440366973,109,0.4954128440366973,29.88888888888889,java,0.0
187
+ 5.169436190447377,0.8862275449101796,0.7005988023952097,2.8862275449101795,0.2994011976047904,167,0.2994011976047904,42.2,java,1.0
188
+ 4.897236854659848,0.6271186440677966,0.4067796610169492,4.593220338983051,0.5932203389830508,59,0.5932203389830508,29.384615384615383,java,0.0
189
+ 5.5494840429514545,0.9004739336492891,0.6966824644549763,6.156398104265403,0.3033175355450237,211,0.3033175355450237,33.016949152542374,java,0.0
190
+ 4.316499122181017,0.8412698412698413,0.6031746031746033,2.6507936507936507,0.3968253968253968,63,0.3968253968253968,29.818181818181817,java,1.0
191
+ 4.946526678933779,0.5,0.31481481481481477,4.203703703703703,0.6851851851851852,54,0.6851851851851852,48.5,python,1.0
192
+ 3.980473809580999,0.9195402298850575,0.7471264367816092,2.528735632183908,0.25287356321839083,87,0.25287356321839083,22.53846153846154,java,0.0
193
+ 4.931343145380855,0.8930817610062893,0.6666666666666667,2.0440251572327046,0.3333333333333333,159,0.3333333333333333,25.423076923076923,python,1.0
194
+ 5.865359596361463,0.7432432432432432,0.5067567567567568,5.256756756756757,0.49324324324324326,148,0.49324324324324326,34.970588235294116,python,1.0
195
+ 5.124006295628484,0.45614035087719296,0.2807017543859649,3.754385964912281,0.7192982456140351,57,0.7192982456140351,38.0,python,0.0
196
+ 6.085910006150067,0.8445378151260504,0.6218487394957983,6.142857142857143,0.37815126050420167,238,0.37815126050420167,28.604395604395606,java,1.0
197
+ 4.868510971586463,0.7411764705882353,0.5411764705882354,3.2470588235294118,0.4588235294117647,85,0.4588235294117647,28.17391304347826,java,0.0
198
+ 4.605408294302115,0.8611111111111112,0.5972222222222222,3.138888888888889,0.4027777777777778,72,0.4027777777777778,33.857142857142854,python,0.0
199
+ 4.73995425222158,0.6964285714285714,0.4464285714285714,6.482142857142857,0.5535714285714286,56,0.5535714285714286,33.4375,java,1.0
200
+ 4.8675567139411235,0.7701149425287356,0.5517241379310345,4.816091954022989,0.4482758620689655,87,0.4482758620689655,36.13636363636363,java,0.0
201
+ 4.246481972040831,0.7454545454545455,0.5272727272727273,4.2727272727272725,0.4727272727272727,55,0.4727272727272727,30.416666666666668,python,0.0
202
+ 2.8016094970590277,0.8,0.55,5.0,0.45,20,0.45,28.8,python,1.0
203
+ 5.7659039797576215,0.5669291338582677,0.40944881889763785,5.377952755905512,0.5905511811023622,127,0.5905511811023622,35.2962962962963,java,0.0
204
+ 4.8369927564722754,0.5978260869565217,0.46739130434782605,5.695652173913044,0.532608695652174,92,0.532608695652174,30.0,python,1.0
205
+ 5.650410881834686,0.8103448275862069,0.6091954022988506,4.022988505747127,0.39080459770114945,174,0.39080459770114945,39.65853658536585,java,0.0
206
+ 4.33611521033683,0.6363636363636364,0.43181818181818177,4.75,0.5681818181818182,44,0.5681818181818182,34.6,java,1.0
207
+ 6.041274356789173,0.8076923076923077,0.657051282051282,3.801282051282051,0.34294871794871795,312,0.34294871794871795,30.096385542168676,java,0.0
208
+ 5.289310575466358,0.6428571428571429,0.5,4.190476190476191,0.5,126,0.5,31.615384615384617,python,1.0
209
+ 4.700186536808578,0.7741935483870968,0.5,3.532258064516129,0.5,62,0.5,33.76923076923077,python,0.0
210
+ 4.180451390892101,0.4444444444444444,0.2592592592592593,4.777777777777778,0.7407407407407407,27,0.7407407407407407,22.25,python,1.0
211
+ 4.786213536530154,0.9016393442622951,0.6967213114754098,2.762295081967213,0.30327868852459017,122,0.30327868852459017,29.083333333333332,python,1.0
212
+ 5.081486951808688,0.7666666666666667,0.5222222222222221,3.7888888888888888,0.4777777777777778,90,0.4777777777777778,35.8,python,1.0
213
+ 4.44849995084568,0.8461538461538461,0.6593406593406593,3.5934065934065935,0.34065934065934067,91,0.34065934065934067,31.045454545454547,python,0.0
214
+ 4.7077148025974385,0.37142857142857144,0.19999999999999996,7.228571428571429,0.8,35,0.8,40.888888888888886,java,1.0
215
+ 4.51662508214898,0.8837209302325582,0.6627906976744187,3.302325581395349,0.3372093023255814,86,0.3372093023255814,23.25,java,0.0
216
+ 4.591236738386586,0.7903225806451613,0.532258064516129,4.193548387096774,0.46774193548387094,62,0.46774193548387094,26.352941176470587,java,1.0
217
+ 4.770332655554241,0.7123287671232876,0.5205479452054795,2.5753424657534247,0.4794520547945205,73,0.4794520547945205,14.681818181818182,java,0.0
218
+ 4.193064830243576,0.9555555555555556,0.7333333333333334,2.3555555555555556,0.26666666666666666,90,0.26666666666666666,24.68421052631579,python,1.0
219
+ 3.935732282211645,0.90625,0.703125,3.1875,0.296875,64,0.296875,34.72727272727273,java,1.0
220
+ 5.255459047825728,0.7096774193548387,0.4838709677419355,4.32258064516129,0.5161290322580645,93,0.5161290322580645,47.583333333333336,python,0.0
221
+ 5.56295430052689,0.9239543726235742,0.7490494296577948,6.391634980988593,0.2509505703422053,263,0.2509505703422053,51.39655172413793,java,0.0
222
+ 4.7624023788690275,0.8846153846153846,0.5897435897435898,3.41025641025641,0.41025641025641024,78,0.41025641025641024,27.95,python,0.0
223
+ 4.641341251556263,0.7164179104477612,0.5223880597014925,3.5671641791044775,0.47761194029850745,67,0.47761194029850745,32.69230769230769,java,1.0
224
+ 3.9580339180966533,0.9142857142857143,0.7142857142857143,4.014285714285714,0.2857142857142857,70,0.2857142857142857,24.5,python,1.0
225
+ 3.821296333684861,0.6071428571428571,0.3928571428571429,4.357142857142857,0.6071428571428571,28,0.6071428571428571,26.5,java,1.0
226
+ 5.99080968202175,0.8926553672316384,0.7033898305084746,4.759887005649717,0.2966101694915254,354,0.2966101694915254,40.125,python,0.0
227
+ 5.023079206577155,0.8592592592592593,0.674074074074074,2.77037037037037,0.32592592592592595,135,0.32592592592592595,31.88888888888889,java,0.0
228
+ 4.427567157116929,0.675,0.4,5.55,0.6,40,0.6,31.4,java,1.0
229
+ 4.183636205660773,0.8666666666666667,0.6916666666666667,2.8583333333333334,0.30833333333333335,120,0.30833333333333335,28.38888888888889,python,1.0
230
+ 4.6597716630131325,0.6964285714285714,0.4642857142857143,3.892857142857143,0.5357142857142857,56,0.5357142857142857,37.9,java,0.0
231
+ 5.011095568495564,0.5454545454545454,0.32727272727272727,5.490909090909091,0.6727272727272727,55,0.6727272727272727,33.93333333333333,java,1.0
232
+ 4.07744157582188,0.8085106382978723,0.5531914893617021,2.1702127659574466,0.44680851063829785,47,0.44680851063829785,31.142857142857142,python,0.0
233
+ 4.0614821867207755,0.3181818181818182,0.18181818181818177,3.8181818181818183,0.8181818181818182,22,0.8181818181818182,29.75,python,1.0
234
+ 4.490765292824625,0.7254901960784313,0.47058823529411764,3.019607843137255,0.5294117647058824,51,0.5294117647058824,22.727272727272727,python,1.0
235
+ 3.7695855752178566,0.9612403100775194,0.8372093023255813,2.550387596899225,0.16279069767441862,129,0.16279069767441862,26.833333333333332,java,0.0
236
+ 5.113304524601207,0.8288288288288288,0.5945945945945945,3.936936936936937,0.40540540540540543,111,0.40540540540540543,26.9375,java,0.0
237
+ 6.683599316096754,0.8333333333333334,0.6434599156118144,5.89662447257384,0.35654008438818563,474,0.35654008438818563,44.127906976744185,python,0.0
238
+ 5.742479746741097,0.8826291079812206,0.6572769953051643,7.413145539906103,0.3427230046948357,213,0.3427230046948357,34.11538461538461,java,1.0
239
+ 5.684220757851569,0.842809364548495,0.6889632107023411,4.11371237458194,0.3110367892976589,299,0.3110367892976589,42.95348837209303,python,0.0
240
+ 3.7248747399889837,0.8125,0.6041666666666667,3.6666666666666665,0.3958333333333333,48,0.3958333333333333,26.857142857142858,python,0.0
241
+ 4.438721875540867,0.7083333333333334,0.45833333333333337,3.5625,0.5416666666666666,48,0.5416666666666666,14.647058823529411,java,0.0
242
+ 5.052330376272108,0.7604166666666666,0.5416666666666667,5.239583333333333,0.4583333333333333,96,0.4583333333333333,37.76190476190476,java,0.0
243
+ 4.039211536948156,0.5161290322580645,0.3548387096774194,3.3870967741935485,0.6451612903225806,31,0.6451612903225806,12.0,java,0.0
244
+ 6.058508660286119,0.8492307692307692,0.6830769230769231,6.8246153846153845,0.3169230769230769,325,0.3169230769230769,40.19047619047619,java,1.0
245
+ 4.85654598061365,0.8559322033898306,0.6440677966101696,3.610169491525424,0.3559322033898305,118,0.3559322033898305,22.06896551724138,java,0.0
246
+ 4.384791749673746,0.8899082568807339,0.7155963302752293,2.6055045871559632,0.28440366972477066,109,0.28440366972477066,33.44444444444444,java,1.0
247
+ 4.2115044085684765,0.631578947368421,0.42105263157894735,4.473684210526316,0.5789473684210527,38,0.5789473684210527,31.77777777777778,python,1.0
248
+ 6.445861001406715,0.7741935483870968,0.5443548387096775,5.548387096774194,0.45564516129032256,248,0.45564516129032256,36.03508771929825,python,0.0
249
+ 4.060861683988659,0.8043478260869565,0.5652173913043479,3.0,0.43478260869565216,46,0.43478260869565216,28.77777777777778,python,1.0
250
+ 4.770281067729926,0.6226415094339622,0.39622641509433965,4.962264150943396,0.6037735849056604,53,0.6037735849056604,29.642857142857142,java,1.0
251
+ 4.310866879118227,0.8829787234042553,0.6914893617021276,4.340425531914893,0.30851063829787234,94,0.30851063829787234,30.791666666666668,python,0.0
252
+ 4.912733576964344,0.5636363636363636,0.36363636363636365,3.4545454545454546,0.6363636363636364,55,0.6363636363636364,24.705882352941178,python,1.0
253
+ 3.76062054430996,0.6666666666666666,0.48484848484848486,3.8484848484848486,0.5151515151515151,33,0.5151515151515151,27.11111111111111,python,1.0
254
+ 5.092277224236287,0.6621621621621622,0.43243243243243246,3.7837837837837838,0.5675675675675675,74,0.5675675675675675,29.5625,java,0.0
255
+ 3.5,0.5,0.25,3.125,0.75,16,0.75,19.75,python,1.0
256
+ 4.62765192370225,0.952,0.736,2.472,0.264,125,0.264,35.714285714285715,python,0.0
257
+ 4.516221717727311,0.8026315789473685,0.6052631578947368,5.197368421052632,0.39473684210526316,76,0.39473684210526316,27.72222222222222,python,0.0
258
+ 3.937751955408587,0.6451612903225806,0.4193548387096774,4.193548387096774,0.5806451612903226,31,0.5806451612903226,29.25,python,0.0
259
+ 5.000896232556761,0.8045977011494253,0.5402298850574713,3.6091954022988504,0.45977011494252873,87,0.45977011494252873,33.0,python,1.0
260
+ 6.038322688621861,0.827906976744186,0.6046511627906976,6.120930232558139,0.3953488372093023,215,0.3953488372093023,49.08108108108108,python,0.0
261
+ 4.216152190789069,0.8163265306122449,0.5510204081632653,3.5714285714285716,0.4489795918367347,49,0.4489795918367347,22.263157894736842,java,0.0
262
+ 4.667187378614269,0.8235294117647058,0.5882352941176471,4.0588235294117645,0.4117647058823529,85,0.4117647058823529,32.857142857142854,java,0.0
263
+ 5.070332970220562,0.8478260869565217,0.6594202898550725,4.297101449275362,0.34057971014492755,138,0.34057971014492755,30.34285714285714,java,0.0
264
+ 4.2366344310623845,0.9058823529411765,0.7058823529411764,3.8941176470588235,0.29411764705882354,85,0.29411764705882354,35.0,java,1.0
265
+ 4.062382847947341,0.5454545454545454,0.36363636363636365,2.393939393939394,0.6363636363636364,33,0.6363636363636364,24.333333333333332,python,1.0
266
+ 4.477226049206228,0.6851851851851852,0.4629629629629629,3.8703703703703702,0.5370370370370371,54,0.5370370370370371,37.0,java,1.0
267
+ 4.189125478116705,0.6521739130434783,0.4782608695652174,5.217391304347826,0.5217391304347826,46,0.5217391304347826,39.77777777777778,java,1.0
268
+ 3.6818808028034025,0.47368421052631576,0.26315789473684215,3.3157894736842106,0.7368421052631579,19,0.7368421052631579,22.75,python,1.0
269
+ 4.183768266288633,0.78125,0.609375,1.96875,0.390625,64,0.390625,23.25,python,1.0
270
+ 4.063202513215852,0.7368421052631579,0.4736842105263158,3.1052631578947367,0.5263157894736842,38,0.5263157894736842,23.77777777777778,python,1.0
271
+ 5.240360496391271,0.7901234567901234,0.4814814814814815,4.592592592592593,0.5185185185185185,81,0.5185185185185185,35.0,java,0.0
272
+ 2.9870900416203345,0.7619047619047619,0.5238095238095238,4.333333333333333,0.47619047619047616,21,0.47619047619047616,18.444444444444443,python,1.0
273
+ 4.796230935589923,0.917910447761194,0.7238805970149254,3.1194029850746268,0.27611940298507465,134,0.27611940298507465,33.964285714285715,java,0.0
274
+ 4.921133626426239,0.6779661016949152,0.4067796610169492,4.576271186440678,0.5932203389830508,59,0.5932203389830508,37.9,python,0.0
275
+ 5.223430137239087,0.9595375722543352,0.838150289017341,3.3757225433526012,0.16184971098265896,346,0.16184971098265896,26.730337078651687,java,0.0
276
+ 5.035549713367317,0.5454545454545454,0.32727272727272727,3.7636363636363637,0.6727272727272727,55,0.6727272727272727,25.272727272727273,python,0.0
277
+ 5.404449012034224,0.7906976744186046,0.5658914728682171,5.658914728682171,0.43410852713178294,129,0.43410852713178294,36.758620689655174,java,0.0
278
+ 4.629412138272088,0.4418604651162791,0.3023255813953488,3.8372093023255816,0.6976744186046512,43,0.6976744186046512,27.7,python,0.0
279
+ 4.816777664828826,0.9296875,0.71875,4.7109375,0.28125,128,0.28125,37.078947368421055,java,0.0
280
+ 5.5887549870381,0.8138297872340425,0.6276595744680851,4.388297872340425,0.3723404255319149,188,0.3723404255319149,50.24,python,0.0
281
+ 4.453150309616965,0.8518518518518519,0.7185185185185186,2.4074074074074074,0.2814814814814815,135,0.2814814814814815,14.0,java,0.0
282
+ 5.547057692620618,0.9096385542168675,0.7710843373493976,4.421686746987952,0.2289156626506024,332,0.2289156626506024,31.275862068965516,java,0.0
283
+ 4.627801780853476,0.9,0.6555555555555556,5.055555555555555,0.34444444444444444,90,0.34444444444444444,34.04761904761905,java,0.0
284
+ 4.747191429566854,0.5333333333333333,0.3111111111111111,5.466666666666667,0.6888888888888889,45,0.6888888888888889,36.44444444444444,python,0.0
285
+ 5.524469760991255,0.6788990825688074,0.45871559633027525,4.91743119266055,0.5412844036697247,109,0.5412844036697247,53.38461538461539,python,0.0
286
+ 5.347434988263512,0.4444444444444444,0.29166666666666663,4.0,0.7083333333333334,72,0.7083333333333334,45.625,python,0.0
287
+ 4.074705922236444,0.847457627118644,0.6271186440677966,3.6779661016949152,0.3728813559322034,59,0.3728813559322034,23.1,python,1.0
288
+ 4.41506101220307,0.43333333333333335,0.23333333333333328,4.833333333333333,0.7666666666666667,30,0.7666666666666667,17.333333333333332,python,0.0
289
+ 5.489282954487391,0.7518796992481203,0.5488721804511278,6.7443609022556394,0.45112781954887216,133,0.45112781954887216,41.4,java,0.0
290
+ 4.3199650731621695,0.8584070796460177,0.7256637168141593,3.3628318584070795,0.2743362831858407,113,0.2743362831858407,26.551724137931036,python,0.0
291
+ 5.948852612281259,0.8584905660377359,0.6886792452830188,6.908805031446541,0.3113207547169811,318,0.3113207547169811,39.423529411764704,java,1.0
292
+ 4.251365544851387,0.957983193277311,0.773109243697479,2.302521008403361,0.226890756302521,119,0.226890756302521,25.958333333333332,python,1.0
293
+ 4.966534113288597,0.6,0.41538461538461535,3.753846153846154,0.5846153846153846,65,0.5846153846153846,34.45454545454545,python,1.0
294
+ 4.216421766792934,0.6808510638297872,0.4893617021276596,3.4468085106382977,0.5106382978723404,47,0.5106382978723404,25.53846153846154,python,1.0
295
+ 5.854429565880467,0.47619047619047616,0.32380952380952377,5.79047619047619,0.6761904761904762,105,0.6761904761904762,32.892857142857146,python,1.0
296
+ 5.542084533777365,0.8870967741935484,0.7217741935483871,4.866935483870968,0.2782258064516129,248,0.2782258064516129,38.97727272727273,java,0.0
297
+ 3.8370959941879037,0.7560975609756098,0.5609756097560976,2.5853658536585367,0.43902439024390244,41,0.43902439024390244,23.8,python,1.0
298
+ 5.510000244558343,0.8188405797101449,0.572463768115942,5.884057971014493,0.427536231884058,138,0.427536231884058,31.333333333333332,java,0.0
299
+ 3.9171736218037974,0.9090909090909091,0.7121212121212122,2.878787878787879,0.2878787878787879,66,0.2878787878787879,30.307692307692307,java,1.0
300
+ 4.955593373471456,0.9126984126984127,0.6904761904761905,2.7222222222222223,0.30952380952380953,126,0.30952380952380953,31.208333333333332,python,1.0
301
+ 4.123966016311361,0.7407407407407407,0.537037037037037,3.074074074074074,0.46296296296296297,54,0.46296296296296297,29.9,python,0.0
302
+ 5.280191443467718,0.7971014492753623,0.6086956521739131,4.173913043478261,0.391304347826087,138,0.391304347826087,21.972972972972972,java,0.0
303
+ 4.616874605956221,0.47058823529411764,0.23529411764705888,5.147058823529412,0.7647058823529411,34,0.7647058823529411,33.142857142857146,python,1.0
304
+ 4.128326967635621,0.8043478260869565,0.5434782608695652,4.108695652173913,0.45652173913043476,46,0.45652173913043476,30.363636363636363,java,1.0
305
+ 4.589416279148799,0.8409090909090909,0.6363636363636364,2.715909090909091,0.36363636363636365,88,0.36363636363636365,17.785714285714285,java,0.0
306
+ 4.935672972133341,0.9946236559139785,0.8064516129032258,4.091397849462366,0.1935483870967742,186,0.1935483870967742,31.8,java,0.0
307
+ 4.76880107599223,0.8446601941747572,0.6310679611650485,3.8446601941747574,0.36893203883495146,103,0.36893203883495146,52.72727272727273,python,0.0
308
+ 4.1699823363507,0.8478260869565217,0.5434782608695652,5.456521739130435,0.45652173913043476,46,0.45652173913043476,33.45454545454545,java,1.0
309
+ 4.017917900762096,0.7058823529411765,0.4411764705882353,4.117647058823529,0.5588235294117647,34,0.5588235294117647,30.571428571428573,python,0.0
310
+ 4.6586089678214995,0.896,0.704,2.696,0.296,125,0.296,31.40740740740741,python,0.0
311
+ 4.643160624658742,0.7258064516129032,0.5,3.3870967741935485,0.5,62,0.5,45.30769230769231,java,0.0
312
+ 4.813012081410429,0.7857142857142857,0.5285714285714286,4.257142857142857,0.4714285714285714,70,0.4714285714285714,31.647058823529413,python,1.0
313
+ 4.60865917212318,0.9142857142857143,0.6952380952380952,2.6952380952380954,0.3047619047619048,105,0.3047619047619048,28.5,python,1.0
314
+ 4.713243393050852,0.9347826086956522,0.7536231884057971,2.152173913043478,0.2463768115942029,138,0.2463768115942029,33.76190476190476,python,0.0
315
+ 4.619054056131191,0.734375,0.515625,3.296875,0.484375,64,0.484375,28.066666666666666,java,0.0
316
+ 4.825245826488317,0.7654320987654321,0.5308641975308642,4.580246913580247,0.4691358024691358,81,0.4691358024691358,33.9,java,0.0
317
+ 6.527235468349858,0.8012232415902141,0.6085626911314985,5.792048929663609,0.39143730886850153,327,0.39143730886850153,42.516129032258064,python,0.0
318
+ 5.59181229230442,0.7133333333333334,0.5333333333333333,4.233333333333333,0.4666666666666667,150,0.4666666666666667,33.55172413793103,python,0.0
319
+ 4.0006374384002905,0.8421052631578947,0.631578947368421,2.491228070175439,0.3684210526315789,57,0.3684210526315789,33.5,python,1.0
320
+ 3.7461571189724303,0.9545454545454546,0.6590909090909092,4.25,0.3409090909090909,44,0.3409090909090909,28.833333333333332,python,1.0
321
+ 5.745512973329542,0.8275862068965517,0.5517241379310345,6.027586206896552,0.4482758620689655,145,0.4482758620689655,34.648648648648646,java,0.0
322
+ 5.490266518243658,0.8412698412698413,0.6243386243386244,5.121693121693122,0.37566137566137564,189,0.37566137566137564,40.166666666666664,java,0.0
323
+ 4.496115365169272,0.6875,0.4375,4.729166666666667,0.5625,48,0.5625,43.285714285714285,python,1.0
324
+ 5.505436888624101,0.945273631840796,0.7164179104477613,3.855721393034826,0.2835820895522388,201,0.2835820895522388,35.58139534883721,java,0.0
325
+ 4.795079572399797,0.8155339805825242,0.6116504854368932,4.621359223300971,0.3883495145631068,103,0.3883495145631068,37.54545454545455,java,0.0
326
+ 5.595002345872928,0.7768595041322314,0.5206611570247934,6.099173553719008,0.4793388429752066,121,0.4793388429752066,32.029411764705884,java,0.0
327
+ 5.887085632611005,0.93359375,0.68359375,6.109375,0.31640625,256,0.31640625,31.675675675675677,java,0.0
328
+ 4.809267348202161,0.7971014492753623,0.5217391304347826,3.8840579710144927,0.4782608695652174,69,0.4782608695652174,38.15384615384615,python,0.0
329
+ 4.893291230757242,0.5737704918032787,0.39344262295081966,4.311475409836065,0.6065573770491803,61,0.6065573770491803,33.7,python,0.0
330
+ 4.346926022805004,0.8,0.5454545454545454,4.072727272727272,0.45454545454545453,55,0.45454545454545453,29.866666666666667,java,0.0
331
+ 4.476474118254376,0.7857142857142857,0.5178571428571428,6.660714285714286,0.48214285714285715,56,0.48214285714285715,39.92307692307692,java,1.0
332
+ 3.9809099797950775,0.9154929577464789,0.704225352112676,4.422535211267606,0.29577464788732394,71,0.29577464788732394,30.0,java,1.0
333
+ 5.272508205443481,0.5967741935483871,0.32258064516129037,6.193548387096774,0.6774193548387096,62,0.6774193548387096,44.166666666666664,java,1.0
334
+ 5.1902361166256314,0.9676113360323887,0.7854251012145749,4.611336032388664,0.2145748987854251,247,0.2145748987854251,28.724137931034484,java,1.0
335
+ 4.350489295854283,0.7333333333333333,0.55,3.183333333333333,0.45,60,0.45,33.8,java,1.0
336
+ 5.0009504388098796,0.7927927927927928,0.5765765765765766,4.5225225225225225,0.42342342342342343,111,0.42342342342342343,35.142857142857146,java,0.0
337
+ 5.7769401731183265,0.9180327868852459,0.6284153005464481,5.573770491803279,0.37158469945355194,183,0.37158469945355194,28.34375,java,0.0
338
+ 3.876189125313799,0.8970588235294118,0.6911764705882353,2.073529411764706,0.3088235294117647,68,0.3088235294117647,27.9,java,1.0
339
+ 4.032303242743952,0.4444444444444444,0.2962962962962963,3.888888888888889,0.7037037037037037,27,0.7037037037037037,26.5,python,0.0
340
+ 3.625,0.375,0.1875,6.625,0.8125,16,0.8125,27.8,python,1.0
341
+ 4.4210998675908275,0.9540816326530612,0.826530612244898,2.4846938775510203,0.17346938775510204,196,0.17346938775510204,35.06896551724138,python,0.0
342
+ 4.839775539645508,0.3157894736842105,0.1842105263157895,5.157894736842105,0.8157894736842105,38,0.8157894736842105,37.0,python,1.0
343
+ 5.842252978929465,0.8711484593837535,0.7254901960784313,6.647058823529412,0.27450980392156865,357,0.27450980392156865,42.16470588235294,java,1.0
344
+ 4.166886761135438,0.78,0.56,3.2,0.44,50,0.44,29.916666666666668,python,0.0
345
+ 5.395683189409616,0.8947368421052632,0.6947368421052631,5.515789473684211,0.30526315789473685,190,0.30526315789473685,29.24590163934426,java,1.0
basemodel/train_randomforest.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import joblib
3
+ from sklearn.ensemble import RandomForestClassifier
4
+ from sklearn.metrics import accuracy_score, classification_report
5
+
6
+
7
+ train_df = pd.read_csv("basemodel/train_features.csv")
8
+ val_df = pd.read_csv("basemodel/val_features.csv")
9
+
10
+
11
+ drop_cols = ["Label"]
12
+ if "language" in train_df.columns:
13
+ drop_cols.append("language")
14
+
15
+ X_train = train_df.drop(columns=drop_cols)
16
+ y_train = train_df["Label"]
17
+
18
+ X_val = val_df.drop(columns=drop_cols)
19
+ y_val = val_df["Label"]
20
+
21
+
22
+ rf = RandomForestClassifier(
23
+ n_estimators=200,
24
+ max_depth=8,
25
+ min_samples_split=5,
26
+ min_samples_leaf=3,
27
+ random_state=42,
28
+ class_weight="balanced"
29
+ )
30
+
31
+ rf.fit(X_train, y_train)
32
+
33
+
34
+ val_preds = rf.predict(X_val)
35
+
36
+ accuracy = accuracy_score(y_val, val_preds)
37
+ print("\nValidation Accuracy:", round(accuracy, 4))
38
+
39
+ print("\nValidation Classification Report:\n")
40
+ print(classification_report(y_val, val_preds, target_names=["Human", "AI"]))
41
+
42
+
43
+ joblib.dump(rf, "basemodel/random_forest_baseline.pkl")
44
+ print("\n✅ Random Forest baseline model saved to basemodel/random_forest_baseline.pkl")
basemodel/val_features.csv ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ entropy,burstiness,repetition_ratio,avg_token_length,vocab_richness,num_tokens,unique_token_ratio,avg_line_length,language,Label
2
+ 3.616348566075164,0.42105263157894735,0.26315789473684215,3.4210526315789473,0.7368421052631579,19,0.7368421052631579,25.25,python,1.0
3
+ 5.225116096801971,0.9219512195121952,0.7365853658536585,4.84390243902439,0.2634146341463415,205,0.2634146341463415,31.340425531914892,java,0.0
4
+ 5.858683407719383,0.5053763440860215,0.30107526881720426,6.021505376344086,0.6989247311827957,93,0.6989247311827957,30.59259259259259,python,1.0
5
+ 3.4687406100460665,0.918918918918919,0.6486486486486487,2.4324324324324325,0.35135135135135137,37,0.35135135135135137,40.333333333333336,python,1.0
6
+ 4.726474118254375,0.75,0.4642857142857143,6.339285714285714,0.5357142857142857,56,0.5357142857142857,33.25,java,1.0
7
+ 5.127518057233353,0.9186602870813397,0.7416267942583732,4.779904306220096,0.2583732057416268,209,0.2583732057416268,42.84848484848485,java,0.0
8
+ 3.385592490318422,0.8181818181818182,0.6909090909090909,2.6,0.3090909090909091,55,0.3090909090909091,23.75,python,1.0
9
+ 5.303371529748239,0.8614457831325302,0.6566265060240963,4.445783132530121,0.3433734939759036,166,0.3433734939759036,19.36,java,0.0
10
+ 4.32307418942857,0.72,0.5,4.04,0.5,50,0.5,23.647058823529413,python,1.0
11
+ 4.646104113627779,0.6666666666666666,0.4444444444444444,3.925925925925926,0.5555555555555556,54,0.5555555555555556,17.8125,java,0.0
12
+ 4.824043435284101,0.7971014492753623,0.5217391304347826,3.9130434782608696,0.4782608695652174,69,0.4782608695652174,26.157894736842106,python,0.0
13
+ 4.133660689688186,0.44,0.24,6.0,0.76,25,0.76,28.625,java,1.0
14
+ 3.977232016069148,0.8854166666666666,0.7395833333333333,2.7395833333333335,0.2604166666666667,96,0.2604166666666667,28.4375,python,1.0
15
+ 4.418945246473782,0.8461538461538461,0.641025641025641,3.1153846153846154,0.358974358974359,78,0.358974358974359,33.78947368421053,java,0.0
16
+ 3.795088586397732,0.4782608695652174,0.30434782608695654,3.5652173913043477,0.6956521739130435,23,0.6956521739130435,21.166666666666668,python,1.0
17
+ 4.665893393157604,0.8735632183908046,0.632183908045977,2.4942528735632186,0.367816091954023,87,0.367816091954023,30.833333333333332,java,1.0
18
+ 4.888279432092592,0.5636363636363636,0.36363636363636365,4.5636363636363635,0.6363636363636364,55,0.6363636363636364,31.7,python,0.0
19
+ 4.43887358554625,0.7111111111111111,0.4444444444444444,4.488888888888889,0.5555555555555556,45,0.5555555555555556,27.533333333333335,java,0.0
20
+ 4.956005194695069,0.7636363636363637,0.5909090909090908,3.963636363636364,0.4090909090909091,110,0.4090909090909091,22.925925925925927,java,0.0
21
+ 4.364611584126849,0.8990825688073395,0.7201834862385321,3.86697247706422,0.2798165137614679,218,0.2798165137614679,36.03125,python,0.0
22
+ 5.1310450521201885,0.6923076923076923,0.4945054945054945,3.8241758241758244,0.5054945054945055,91,0.5054945054945055,43.083333333333336,python,0.0
23
+ 4.157269582127642,0.9487179487179487,0.7863247863247863,1.735042735042735,0.21367521367521367,117,0.21367521367521367,36.583333333333336,python,1.0
24
+ 6.1032071909268755,0.7524271844660194,0.558252427184466,4.344660194174757,0.441747572815534,206,0.441747572815534,42.59375,python,0.0
25
+ 5.489504669651612,0.9067796610169492,0.7203389830508475,3.983050847457627,0.2796610169491525,236,0.2796610169491525,46.03225806451613,java,0.0
26
+ 5.5429756373358305,0.6216216216216216,0.4504504504504504,3.810810810810811,0.5495495495495496,111,0.5495495495495496,46.0,python,0.0
27
+ 5.597773058187228,0.7096774193548387,0.4193548387096774,4.548387096774194,0.5806451612903226,93,0.5806451612903226,32.27777777777778,python,1.0
28
+ 4.288320189106,0.7916666666666666,0.5208333333333333,4.229166666666667,0.4791666666666667,48,0.4791666666666667,34.0,python,0.0
29
+ 5.117143809076922,0.8490566037735849,0.5849056603773585,3.943396226415094,0.41509433962264153,106,0.41509433962264153,39.4,python,0.0
30
+ 4.3341837197791895,0.8,0.44999999999999996,7.05,0.55,40,0.55,41.666666666666664,java,1.0
31
+ 4.7319988769018755,0.8416666666666667,0.675,3.091666666666667,0.325,120,0.325,15.944444444444445,java,0.0
32
+ 4.2628148954723555,0.55,0.4,2.825,0.6,40,0.6,31.285714285714285,python,0.0
33
+ 3.25,0.875,0.625,3.6875,0.375,32,0.375,41.4,java,1.0
34
+ 5.086021506160257,0.7008547008547008,0.5470085470085471,4.239316239316239,0.452991452991453,117,0.452991452991453,44.125,python,0.0
35
+ 4.488525294897748,0.6923076923076923,0.46153846153846156,3.9038461538461537,0.5384615384615384,52,0.5384615384615384,30.181818181818183,java,1.0
36
+ 5.273684376262023,0.6111111111111112,0.375,4.819444444444445,0.625,72,0.625,42.72727272727273,python,0.0
37
+ 4.047055675509121,0.8627450980392157,0.607843137254902,4.372549019607843,0.39215686274509803,51,0.39215686274509803,28.428571428571427,python,1.0
38
+ 4.6442547530701885,0.5370370370370371,0.38888888888888884,5.12962962962963,0.6111111111111112,54,0.6111111111111112,34.7,java,0.0
39
+ 4.423251796980337,0.32142857142857145,0.1785714285714286,2.642857142857143,0.8214285714285714,28,0.8214285714285714,30.0,python,1.0
40
+ 5.498927847038898,0.5842696629213483,0.3820224719101124,6.617977528089888,0.6179775280898876,89,0.6179775280898876,41.421052631578945,python,1.0
41
+ 4.376857192109841,0.8625,0.65,3.4125,0.35,80,0.35,29.764705882352942,java,0.0
42
+ 4.600176636857892,0.7894736842105263,0.5087719298245614,3.6842105263157894,0.49122807017543857,57,0.49122807017543857,30.5,python,1.0
43
+ 6.213552119915352,0.8066037735849056,0.5613207547169812,5.485849056603773,0.4386792452830189,212,0.4386792452830189,39.6,python,0.0
44
+ 4.024059741690056,0.7727272727272727,0.6060606060606061,3.5454545454545454,0.3939393939393939,66,0.3939393939393939,30.46153846153846,python,1.0
45
+ 4.14853573331382,0.7619047619047619,0.5,7.023809523809524,0.5,42,0.5,26.266666666666666,java,0.0
46
+ 4.2057539822273915,0.8507462686567164,0.6417910447761195,3.5522388059701493,0.3582089552238806,67,0.3582089552238806,30.352941176470587,python,0.0
47
+ 4.860498585099573,0.8737864077669902,0.6504854368932038,4.097087378640777,0.34951456310679613,103,0.34951456310679613,32.61538461538461,java,0.0
48
+ 4.661648023061636,0.7631578947368421,0.5526315789473684,3.776315789473684,0.4473684210526316,76,0.4473684210526316,32.05555555555556,java,0.0
49
+ 5.405165733375986,0.9311926605504587,0.7339449541284404,5.426605504587156,0.26605504587155965,218,0.26605504587155965,33.0,java,0.0
50
+ 3.238901256602631,0.46153846153846156,0.23076923076923073,4.538461538461538,0.7692307692307693,13,0.7692307692307693,26.0,python,1.0
51
+ 3.964395529526285,0.9512195121951219,0.5853658536585367,4.170731707317073,0.4146341463414634,41,0.4146341463414634,27.1,java,1.0
52
+ 4.091031702849688,0.8170731707317073,0.6707317073170731,3.8780487804878048,0.32926829268292684,82,0.32926829268292684,31.833333333333332,python,0.0
53
+ 3.6596769481795466,0.8571428571428571,0.6190476190476191,4.976190476190476,0.38095238095238093,42,0.38095238095238093,24.533333333333335,java,1.0
54
+ 5.249145047994913,0.6875,0.5178571428571428,3.8125,0.48214285714285715,112,0.48214285714285715,46.5,python,0.0
55
+ 4.456796057875661,0.7761194029850746,0.582089552238806,2.283582089552239,0.417910447761194,67,0.417910447761194,25.545454545454547,java,1.0
56
+ 5.669233537529181,0.8121827411167513,0.6142131979695431,4.3908629441624365,0.38578680203045684,197,0.38578680203045684,49.56,python,0.0
57
+ 4.923879361943277,0.9572649572649573,0.6923076923076923,4.205128205128205,0.3076923076923077,117,0.3076923076923077,31.321428571428573,python,0.0
58
+ 4.028579035079149,0.9841269841269841,0.7142857142857143,3.8253968253968256,0.2857142857142857,63,0.2857142857142857,32.666666666666664,python,1.0
59
+ 4.931270849848961,0.6,0.36363636363636365,5.763636363636364,0.6363636363636364,55,0.6363636363636364,29.8125,java,1.0
60
+ 4.987993396430413,0.8682170542635659,0.6744186046511628,3.317829457364341,0.32558139534883723,129,0.32558139534883723,33.02777777777778,java,0.0
61
+ 4.895499903544709,0.9303482587064676,0.7661691542288558,3.4776119402985075,0.23383084577114427,201,0.23383084577114427,23.032258064516128,java,0.0
62
+ 4.297689964750703,0.9158878504672897,0.7476635514018692,3.532710280373832,0.2523364485981308,107,0.2523364485981308,31.107142857142858,java,0.0
63
+ 4.531740514375559,0.8,0.509090909090909,5.9818181818181815,0.4909090909090909,55,0.4909090909090909,30.25,java,1.0
64
+ 4.894400915540714,0.7763157894736842,0.5263157894736843,3.3157894736842106,0.47368421052631576,76,0.47368421052631576,29.952380952380953,python,0.0
65
+ 5.754451229096238,0.7107438016528925,0.4628099173553719,5.603305785123967,0.5371900826446281,121,0.5371900826446281,34.03703703703704,java,0.0
66
+ 3.5141016549301787,0.6944444444444444,0.5555555555555556,3.0277777777777777,0.4444444444444444,36,0.4444444444444444,33.6,python,1.0
67
+ 5.4389854001411315,0.7350427350427351,0.5213675213675213,5.170940170940171,0.47863247863247865,117,0.47863247863247865,43.72727272727273,java,0.0
68
+ 6.500985818092827,0.8327974276527331,0.6109324758842444,6.864951768488746,0.3890675241157556,311,0.3890675241157556,38.79775280898876,java,1.0
69
+ 3.9219280948873627,0.4,0.19999999999999996,3.85,0.8,20,0.8,20.5,python,1.0
70
+ 4.594928174344042,0.9072164948453608,0.6804123711340206,2.6804123711340204,0.31958762886597936,97,0.31958762886597936,35.5,java,1.0
71
+ 6.3478254862783485,0.7662337662337663,0.5454545454545454,6.783549783549783,0.45454545454545453,231,0.45454545454545453,33.81818181818182,java,1.0
72
+ 4.777798184597433,0.8349514563106796,0.6310679611650485,4.475728155339806,0.36893203883495146,103,0.36893203883495146,30.413793103448278,python,0.0
73
+ 4.955482172039427,0.7422680412371134,0.5257731958762887,5.556701030927835,0.4742268041237113,97,0.4742268041237113,29.88,python,0.0
74
+ 3.963119345888566,0.7291666666666666,0.5416666666666667,3.8333333333333335,0.4583333333333333,48,0.4583333333333333,29.2,python,1.0
75
+ 4.981129381113054,0.7857142857142857,0.5892857142857143,3.8214285714285716,0.4107142857142857,112,0.4107142857142857,47.69230769230769,python,0.0
classifier/__init__.py ADDED
File without changes
classifier/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (176 Bytes). View file
 
classifier/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (180 Bytes). View file
 
classifier/__pycache__/inference.cpython-310.pyc ADDED
Binary file (3.04 kB). View file
 
classifier/__pycache__/inference.cpython-312.pyc ADDED
Binary file (6.6 kB). View file
 
classifier/inference.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import os
3
+
4
+ PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
5
+ sys.path.append(PROJECT_ROOT)
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+ import xgboost as xgb
10
+
11
+ # -------------------------------
12
+ # IMPORT FEATURE EXTRACTORS
13
+ # -------------------------------
14
+ from featureextraction.step1_statistical_extraction.step1_statistical_extraction import extract_features as extract_stat
15
+ from featureextraction.step2_ast_extraction.step2_ast_extraction import extract_ast_features
16
+ from featureextraction.step3_stylometry_extraction.step3_stylometry_extraction import extract_stylometry_features
17
+ from featureextraction.semantic_features.unixcoder_embedding import get_unixcoder_embedding
18
+
19
+ # XAI modules
20
+ from xai.shaplayer import shap_explain
21
+ from xai.grouping import group_shap_explanations
22
+ from xai.text_explainer import generate_text_explanation
23
+
24
+ # -------------------------------
25
+ # LOAD MODEL
26
+ # -------------------------------
27
+ model = xgb.XGBClassifier()
28
+ model.load_model("classifier/xgboost_final_model.json")
29
+
30
+ # -------------------------------
31
+ # LANGUAGE ONE-HOT
32
+ # -------------------------------
33
+ def encode_language(language):
34
+ language = language.lower()
35
+ if language == "python":
36
+ return np.array([1, 0])
37
+ elif language == "java":
38
+ return np.array([0, 1])
39
+ else:
40
+ raise ValueError("Language must be python or java")
41
+
42
+ # -------------------------------
43
+ # BUILD FEATURES FROM CODE
44
+ # -------------------------------
45
+ def build_features_from_code(code, language):
46
+
47
+ df = pd.DataFrame({
48
+ "normalized_code": [code],
49
+ "Language": [language]
50
+ })
51
+
52
+ stat_df = extract_stat(df)
53
+ ast_df = extract_ast_features(df)
54
+ style_df = extract_stylometry_features(df)
55
+
56
+ X_stat = stat_df.drop(columns=["language"]).values.flatten()
57
+ X_ast = ast_df.values.flatten()
58
+ X_style = style_df.values.flatten()
59
+ X_lang = encode_language(language)
60
+ X_sem = get_unixcoder_embedding(code)
61
+
62
+ X_final = np.hstack([
63
+ X_stat,
64
+ X_ast,
65
+ X_style,
66
+ X_lang,
67
+ X_sem
68
+ ]).reshape(1, -1)
69
+
70
+ return X_final
71
+
72
+ # -------------------------------
73
+ # BASIC PREDICT FUNCTION
74
+ # -------------------------------
75
+ def predict_from_features(X_final):
76
+ prediction = model.predict(X_final)[0]
77
+ probability = model.predict_proba(X_final)[0][1]
78
+ label_name = "AI" if prediction == 1 else "Human"
79
+ return label_name, probability
80
+
81
+ # -------------------------------
82
+ # INTERACTIVE CLI
83
+ # -------------------------------
84
+ if __name__ == "__main__":
85
+
86
+ print("\n======================================")
87
+ print(" AI vs Human Code Classification")
88
+ print("======================================")
89
+
90
+ language = input("Choose language (python/java): ").strip().lower()
91
+
92
+ print("\nPaste your code below.")
93
+ print("Type 'END' on a new line when finished.\n")
94
+
95
+ lines = []
96
+ while True:
97
+ line = input()
98
+ if line.strip() == "END":
99
+ break
100
+ lines.append(line)
101
+
102
+ code_input = "\n".join(lines)
103
+
104
+ # build features
105
+ X_final = build_features_from_code(code_input, language)
106
+
107
+ # predict
108
+ label, prob = predict_from_features(X_final)
109
+
110
+ # shap
111
+ shap_result = shap_explain(model, X_final)
112
+
113
+ # grouping
114
+ grouped = group_shap_explanations(shap_result)
115
+
116
+ # text explanation
117
+ text_reason = generate_text_explanation(grouped, label, prob)
118
+
119
+ print("\n========== RESULT ==========")
120
+ print("Prediction :", label)
121
+ print("Confidence :", prob)
122
+
123
+ print("\nTop SHAP features:")
124
+ for e in shap_result:
125
+ print(f"Feature {e['feature_index']} → {e['impact']} ({e['pushes_toward']})")
126
+
127
+ print("\nGrouped SHAP importance:", grouped)
128
+
129
+ print("\nExplanation:\n")
130
+ print(text_reason)
classifier/models/xgboost_final_model.json ADDED
The diff for this file is too large to render. See raw diff
 
classifier/test_xgboost.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import xgboost as xgb
3
+ from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
4
+
5
+ X_test = np.load("featureextraction/final_features/test_X.npy")
6
+ y_test = np.load("featureextraction/final_features/test_y.npy")
7
+
8
+ print("Test shape:", X_test.shape)
9
+
10
+ model = xgb.XGBClassifier()
11
+ model.load_model("classifier/xgboost_final_model.json")
12
+
13
+ y_pred = model.predict(X_test)
14
+
15
+ print("\nTEST SET RESULTS (FINAL MODEL)\n")
16
+ print("Accuracy:", accuracy_score(y_test, y_pred))
17
+
18
+ print("\nClassification Report:\n")
19
+ print(
20
+ classification_report(
21
+ y_test,
22
+ y_pred,
23
+ target_names=["Human", "AI"]
24
+ )
25
+ )
26
+
27
+ print("\nConfusion Matrix:\n")
28
+ print(confusion_matrix(y_test, y_pred))
classifier/train_xgboost.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import xgboost as xgb
3
+ from sklearn.metrics import accuracy_score, classification_report
4
+ import joblib
5
+ import os
6
+
7
+ PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
8
+
9
+ FEATURE_DIR = os.path.join(
10
+ PROJECT_ROOT,
11
+ "featureextraction",
12
+ "final_features"
13
+ )
14
+
15
+ X_train = np.load(
16
+ os.path.join(FEATURE_DIR, "train_X.npy"),
17
+ allow_pickle=True
18
+ )
19
+ y_train = np.load(
20
+ os.path.join(FEATURE_DIR, "train_y.npy"),
21
+ allow_pickle=True
22
+ )
23
+
24
+ X_val = np.load(
25
+ os.path.join(FEATURE_DIR, "val_X.npy"),
26
+ allow_pickle=True
27
+ )
28
+ y_val = np.load(
29
+ os.path.join(FEATURE_DIR, "val_y.npy"),
30
+ allow_pickle=True
31
+ )
32
+
33
+ print("Train shape:", X_train.shape)
34
+ print("Validation shape:", X_val.shape)
35
+ print("Feature dtype:", X_train.dtype)
36
+
37
+ model = xgb.XGBClassifier(
38
+ n_estimators=300,
39
+ max_depth=6,
40
+ learning_rate=0.05,
41
+ subsample=0.8,
42
+ colsample_bytree=0.8,
43
+ objective="binary:logistic",
44
+ eval_metric="logloss",
45
+ random_state=42
46
+ )
47
+
48
+ model.fit(X_train, y_train)
49
+
50
+ val_preds = model.predict(X_val)
51
+
52
+ print("\nVALIDATION RESULTS\n")
53
+ print("Accuracy:", accuracy_score(y_val, val_preds))
54
+ print(
55
+ classification_report(
56
+ y_val,
57
+ val_preds,
58
+ target_names=["Human", "AI"]
59
+ )
60
+ )
61
+
62
+ MODEL_DIR = os.path.join(PROJECT_ROOT, "models")
63
+ os.makedirs(MODEL_DIR, exist_ok=True)
64
+
65
+ model.save_model(os.path.join(MODEL_DIR, "xgboost_final_model.json"))
66
+ joblib.dump(model, os.path.join(MODEL_DIR, "xgboost_final_model.pkl"))
67
+
68
+ print("\n XGBoost model saved successfully")
classifier/tune_xgboost.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import xgboost as xgb
3
+ from sklearn.metrics import accuracy_score, classification_report
4
+ from sklearn.model_selection import ParameterGrid
5
+ import joblib
6
+
7
+ X_train = np.load("featureextraction/final_features/train_X.npy")
8
+ y_train = np.load("featureextraction/final_features/train_y.npy")
9
+
10
+ X_val = np.load("featureextraction/final_features/val_X.npy")
11
+ y_val = np.load("featureextraction/final_features/val_y.npy")
12
+
13
+ print("Train shape:", X_train.shape)
14
+ print("Val shape:", X_val.shape)
15
+
16
+ param_grid = {
17
+ "n_estimators": [300, 500],
18
+ "max_depth": [4, 6, 8],
19
+ "learning_rate": [0.03, 0.05, 0.1],
20
+ "subsample": [0.8, 1.0],
21
+ "colsample_bytree": [0.8, 1.0]
22
+ }
23
+
24
+ best_acc = 0.0
25
+ best_model = None
26
+ best_params = None
27
+
28
+ print("\n Starting XGBoost Hyperparameter Tuning...\n")
29
+
30
+ for idx, params in enumerate(ParameterGrid(param_grid), start=1):
31
+ print(f"Trial {idx}: {params}")
32
+
33
+ model = xgb.XGBClassifier(
34
+ objective="binary:logistic",
35
+ eval_metric="logloss",
36
+ random_state=42,
37
+ **params
38
+ )
39
+
40
+ model.fit(X_train, y_train)
41
+
42
+ val_preds = model.predict(X_val)
43
+ acc = accuracy_score(y_val, val_preds)
44
+
45
+ print("Validation Accuracy:", round(acc, 4))
46
+
47
+ if acc > best_acc:
48
+ best_acc = acc
49
+ best_model = model
50
+ best_params = params
51
+ print("New best model found")
52
+
53
+ print("-" * 60)
54
+
55
+ print("\n🏆 BEST VALIDATION ACCURACY:", round(best_acc, 4))
56
+ print("🏆 BEST PARAMETERS:", best_params)
57
+
58
+ final_preds = best_model.predict(X_val)
59
+
60
+ print("\nFINAL VALIDATION REPORT\n")
61
+ print(
62
+ classification_report(
63
+ y_val,
64
+ final_preds,
65
+ target_names=["Human", "AI"]
66
+ )
67
+ )
68
+
69
+ best_model.save_model("classifier/xgboost_final_model.json")
70
+ joblib.dump(best_model, "classifier/xgboost_final_model.pkl")
71
+
72
+ print("\n Tuned XGBoost model saved successfully")
classifier/xgboost_final_model.json ADDED
The diff for this file is too large to render. See raw diff
 
dataset/processed/dataset_step1_length_normalized.csv ADDED
The diff for this file is too large to render. See raw diff
 
dataset/processed/dataset_step2_cleaned.csv ADDED
The diff for this file is too large to render. See raw diff
 
dataset/processed/dataset_test.csv ADDED
The diff for this file is too large to render. See raw diff
 
dataset/processed/dataset_train.csv ADDED
The diff for this file is too large to render. See raw diff
 
dataset/processed/dataset_val.csv ADDED
The diff for this file is too large to render. See raw diff
 
dataset/raw/raw_dataset.csv ADDED
The diff for this file is too large to render. See raw diff
 
featureextraction/__init__.py ADDED
File without changes
featureextraction/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (183 Bytes). View file
 
featureextraction/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (187 Bytes). View file
 
featureextraction/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (187 Bytes). View file
 
featureextraction/feature_concatenation.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import os
4
+
5
+ PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
6
+
7
+ STAT_PATH = os.path.join(PROJECT_ROOT, "featureextraction", "step1_statistical_extraction")
8
+ AST_PATH = os.path.join(PROJECT_ROOT, "featureextraction", "step2_ast_extraction")
9
+ STYLE_PATH = os.path.join(PROJECT_ROOT, "featureextraction", "step3_stylometry_extraction")
10
+ SEM_PATH = os.path.join(PROJECT_ROOT, "featureextraction", "semantic_features")
11
+
12
+ OUTPUT_DIR = os.path.join(PROJECT_ROOT, "featureextraction", "final_features")
13
+ os.makedirs(OUTPUT_DIR, exist_ok=True)
14
+
15
+ def load_and_concatenate(split):
16
+ stat_df = pd.read_csv(os.path.join(STAT_PATH, f"{split}_features.csv"))
17
+ ast_df = pd.read_csv(os.path.join(AST_PATH, f"{split}_features.csv"))
18
+ style_df = pd.read_csv(os.path.join(STYLE_PATH, f"{split}_features.csv"))
19
+
20
+ y = stat_df["Label"].values
21
+
22
+ if "language" in stat_df.columns:
23
+ lang_onehot = pd.get_dummies(stat_df["language"], prefix="lang")
24
+ else:
25
+ raise ValueError("Language column not found for one-hot encoding!")
26
+
27
+ X_stat = stat_df.drop(columns=["Label", "language"]).values
28
+
29
+ X_ast = ast_df.drop(columns=["Label"]).values
30
+ X_style = style_df.drop(columns=["Label"]).values
31
+
32
+ X_lang = lang_onehot.values
33
+
34
+ X_sem = np.load(os.path.join(SEM_PATH, f"{split}_unixcoder.npy"))
35
+
36
+ assert (
37
+ len(X_stat) == len(X_ast) == len(X_style) == len(X_lang) == len(X_sem)
38
+ ), f"Row mismatch in {split} split!"
39
+
40
+
41
+ X_final = np.hstack([X_stat, X_ast, X_style, X_lang, X_sem])
42
+
43
+ return X_final.astype(float), y
44
+
45
+ if __name__ == "__main__":
46
+
47
+ for split in ["train", "val", "test"]:
48
+ X, y = load_and_concatenate(split)
49
+
50
+ np.save(os.path.join(OUTPUT_DIR, f"{split}_X.npy"), X)
51
+ np.save(os.path.join(OUTPUT_DIR, f"{split}_y.npy"), y)
52
+
53
+ print(f"{split.upper()} FEATURES SHAPE: {X.shape}")
54
+ print(f"{split.upper()} FEATURE TYPE:", X.dtype)
55
+
56
+ print("\nFeature concatenation with ONE-HOT language encoding completed")
featureextraction/semantic_features/__init__,py ADDED
File without changes
featureextraction/semantic_features/__pycache__/unixcoder_embedding.cpython-310.pyc ADDED
Binary file (1.23 kB). View file
 
featureextraction/semantic_features/__pycache__/unixcoder_embedding.cpython-312.pyc ADDED
Binary file (1.85 kB). View file
 
featureextraction/semantic_features/__pycache__/unixcoder_embedding.cpython-313.pyc ADDED
Binary file (1.85 kB). View file
 
featureextraction/semantic_features/unixcoder_embedding.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoTokenizer, AutoModel
3
+
4
+ MODEL_NAME = "microsoft/unixcoder-base"
5
+
6
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
7
+ unix_model = AutoModel.from_pretrained(MODEL_NAME)
8
+
9
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10
+ unix_model.to(device)
11
+ unix_model.eval()
12
+
13
+
14
+ def get_unixcoder_embedding(code, max_length=512):
15
+
16
+ inputs = tokenizer(
17
+ code,
18
+ padding=True,
19
+ truncation=True,
20
+ max_length=max_length,
21
+ return_tensors="pt"
22
+ )
23
+
24
+ inputs = {k: v.to(device) for k, v in inputs.items()}
25
+
26
+ with torch.no_grad():
27
+ outputs = unix_model(**inputs)
28
+ last_hidden = outputs.last_hidden_state
29
+
30
+ cls_embedding = last_hidden[:, 0, :]
31
+ mean_embedding = last_hidden.mean(dim=1)
32
+
33
+ combined = torch.cat((cls_embedding, mean_embedding), dim=1)
34
+
35
+ return combined.cpu().numpy().flatten()
featureextraction/step1_statistical_extraction/__init__.py ADDED
File without changes
featureextraction/step1_statistical_extraction/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (212 Bytes). View file
 
featureextraction/step1_statistical_extraction/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (216 Bytes). View file
 
featureextraction/step1_statistical_extraction/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (216 Bytes). View file
 
featureextraction/step1_statistical_extraction/__pycache__/step1_statistical_extraction.cpython-310.pyc ADDED
Binary file (3.52 kB). View file
 
featureextraction/step1_statistical_extraction/__pycache__/step1_statistical_extraction.cpython-312.pyc ADDED
Binary file (4.81 kB). View file
 
featureextraction/step1_statistical_extraction/__pycache__/step1_statistical_extraction.cpython-313.pyc ADDED
Binary file (4.91 kB). View file
 
featureextraction/step1_statistical_extraction/step1_statistical_extraction.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import re
4
+ import os
5
+ from collections import Counter
6
+ from math import log2
7
+
8
+ def tokenize(code):
9
+ # identifiers, numbers, operators
10
+ return re.findall(r"[A-Za-z_]+|\d+|==|!=|<=|>=|[+\-*/%]", code)
11
+
12
+ def token_entropy(tokens):
13
+ if not tokens:
14
+ return 0.0
15
+ counts = Counter(tokens)
16
+ total = len(tokens)
17
+ probs = [c / total for c in counts.values()]
18
+ return -sum(p * log2(p) for p in probs)
19
+
20
+ def burstiness(tokens):
21
+ if not tokens:
22
+ return 0.0
23
+ counts = Counter(tokens)
24
+ repeated = sum(c for c in counts.values() if c > 1)
25
+ return repeated / len(tokens)
26
+
27
+ def repetition_ratio(tokens):
28
+ if not tokens:
29
+ return 0.0
30
+ return 1 - (len(set(tokens)) / len(tokens))
31
+
32
+ def avg_token_length(tokens):
33
+ if not tokens:
34
+ return 0.0
35
+ return np.mean([len(t) for t in tokens])
36
+
37
+ def vocab_richness(tokens):
38
+ if not tokens:
39
+ return 0.0
40
+ return len(set(tokens)) / len(tokens)
41
+
42
+ def num_tokens(tokens):
43
+ return len(tokens)
44
+
45
+ def unique_token_ratio(tokens):
46
+ if not tokens:
47
+ return 0.0
48
+ return len(set(tokens)) / len(tokens)
49
+
50
+ def avg_line_length(code):
51
+ lines = code.split("\n")
52
+ if not lines:
53
+ return 0.0
54
+ return np.mean([len(l) for l in lines if l.strip()])
55
+
56
+ def extract_features(df):
57
+ features = []
58
+
59
+ for _, row in df.iterrows():
60
+ code = str(row["normalized_code"])
61
+ tokens = tokenize(code)
62
+
63
+ features.append({
64
+ "entropy": token_entropy(tokens),
65
+ "burstiness": burstiness(tokens),
66
+ "repetition_ratio": repetition_ratio(tokens),
67
+ "avg_token_length": avg_token_length(tokens),
68
+ "vocab_richness": vocab_richness(tokens),
69
+
70
+
71
+ "num_tokens": num_tokens(tokens),
72
+ "unique_token_ratio": unique_token_ratio(tokens),
73
+ "avg_line_length": avg_line_length(code),
74
+
75
+ "language": row.get("Language", "unknown")
76
+ })
77
+
78
+ return pd.DataFrame(features)
79
+
80
+ if __name__ == "__main__":
81
+
82
+ os.makedirs("basemodel", exist_ok=True)
83
+
84
+ for split in ["train", "val", "test"]:
85
+ input_path = f"dataset/processed/dataset_{split}.csv"
86
+ df = pd.read_csv(input_path)
87
+
88
+ if "Label (0- HUMAN, 1-AI)" in df.columns:
89
+ df = df.rename(columns={"Label (0- HUMAN, 1-AI)": "Label"})
90
+
91
+ X = extract_features(df)
92
+ X["Label"] = df["Label"]
93
+
94
+ output_path = f"featureextraction/step1_stastical_extraction/{split}_features.csv"
95
+ X.to_csv(output_path, index=False)
96
+
97
+ print(f"Statistical features extracted for {split}")
featureextraction/step1_statistical_extraction/test_features.csv ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ entropy,burstiness,repetition_ratio,avg_token_length,vocab_richness,num_tokens,unique_token_ratio,avg_line_length,language,Label
2
+ 4.83141993487849,0.781021897810219,0.583941605839416,3.153284671532847,0.41605839416058393,137,0.41605839416058393,30.09090909090909,python,1.0
3
+ 4.100105160750668,0.8333333333333334,0.6111111111111112,2.7962962962962963,0.3888888888888889,54,0.3888888888888889,33.5,python,0.0
4
+ 3.549523459597832,0.7857142857142857,0.5,6.5,0.5,28,0.5,27.818181818181817,java,1.0
5
+ 4.598877796136016,0.7543859649122807,0.49122807017543857,3.473684210526316,0.5087719298245614,57,0.5087719298245614,43.666666666666664,python,1.0
6
+ 4.008947073809811,0.7708333333333334,0.5625,2.5208333333333335,0.4375,48,0.4375,24.181818181818183,java,1.0
7
+ 3.9620439376607153,0.8448275862068966,0.6379310344827587,2.689655172413793,0.3620689655172414,58,0.3620689655172414,24.5,java,0.0
8
+ 4.496439344671016,0.65,0.375,4.325,0.625,40,0.625,41.0,python,1.0
9
+ 5.038474870244337,0.7261904761904762,0.5119047619047619,4.083333333333333,0.4880952380952381,84,0.4880952380952381,40.916666666666664,python,0.0
10
+ 3.828442584567261,0.9724770642201835,0.8256880733944953,2.8990825688073394,0.1743119266055046,109,0.1743119266055046,34.111111111111114,python,1.0
11
+ 5.158447453320608,0.7702702702702703,0.45945945945945943,4.581081081081081,0.5405405405405406,74,0.5405405405405406,19.52173913043478,java,0.0
12
+ 4.074700960493769,0.6470588235294118,0.5098039215686274,4.823529411764706,0.49019607843137253,51,0.49019607843137253,31.272727272727273,python,0.0
13
+ 5.216551170557516,0.32653061224489793,0.18367346938775508,6.489795918367347,0.8163265306122449,49,0.8163265306122449,31.466666666666665,java,1.0
14
+ 4.662553238462382,0.8235294117647058,0.5588235294117647,2.926470588235294,0.4411764705882353,68,0.4411764705882353,25.866666666666667,python,1.0
15
+ 4.718863069670588,0.8344827586206897,0.6827586206896552,4.703448275862069,0.31724137931034485,145,0.31724137931034485,28.0,java,0.0
16
+ 4.186133469144905,0.7878787878787878,0.6060606060606061,3.015151515151515,0.3939393939393939,66,0.3939393939393939,30.636363636363637,python,1.0
17
+ 4.201497374489525,0.9393939393939394,0.7474747474747474,1.9292929292929293,0.25252525252525254,99,0.25252525252525254,46.142857142857146,java,1.0
18
+ 5.555743206064364,0.7756410256410257,0.5769230769230769,4.397435897435898,0.4230769230769231,156,0.4230769230769231,33.875,java,0.0
19
+ 5.120767187212948,0.819672131147541,0.5819672131147541,4.122950819672131,0.4180327868852459,122,0.4180327868852459,27.517241379310345,java,0.0
20
+ 5.411695856960623,0.5733333333333334,0.3466666666666667,4.773333333333333,0.6533333333333333,75,0.6533333333333333,44.36363636363637,python,0.0
21
+ 5.9364666815595095,0.9323308270676691,0.7969924812030076,4.402255639097745,0.20300751879699247,532,0.20300751879699247,46.348314606741575,java,0.0
22
+ 5.921411473217016,0.8119266055045872,0.6146788990825688,5.041284403669724,0.3853211009174312,218,0.3853211009174312,43.567567567567565,python,0.0
23
+ 5.681975847739398,0.5217391304347826,0.3369565217391305,6.336956521739131,0.6630434782608695,92,0.6630434782608695,41.10526315789474,python,1.0
24
+ 4.9007043302212505,0.7849462365591398,0.5591397849462365,5.236559139784946,0.44086021505376344,93,0.44086021505376344,38.111111111111114,python,0.0
25
+ 5.3228285773092585,0.9466019417475728,0.7524271844660194,3.5194174757281553,0.24757281553398058,206,0.24757281553398058,33.48837209302326,java,0.0
26
+ 4.886224508551541,0.6811594202898551,0.4492753623188406,4.956521739130435,0.5507246376811594,69,0.5507246376811594,27.904761904761905,java,0.0
27
+ 3.7468422480983827,0.7317073170731707,0.5609756097560976,2.5853658536585367,0.43902439024390244,41,0.43902439024390244,23.1,python,1.0
28
+ 5.711349874129563,0.9204545454545454,0.7727272727272727,3.7017045454545454,0.22727272727272727,352,0.22727272727272727,33.15068493150685,java,0.0
29
+ 4.708644255945031,0.8160919540229885,0.6091954022988506,4.551724137931035,0.39080459770114945,87,0.39080459770114945,38.73684210526316,java,0.0
30
+ 4.366729296672175,0.46875,0.28125,6.21875,0.71875,32,0.71875,24.0,java,1.0
31
+ 4.819680984926536,0.7704918032786885,0.47540983606557374,3.19672131147541,0.5245901639344263,61,0.5245901639344263,33.18181818181818,python,0.0
32
+ 4.982067485400015,0.8495575221238938,0.6283185840707964,2.8849557522123894,0.37168141592920356,113,0.37168141592920356,37.5,python,0.0
33
+ 5.792349655949547,0.8646288209606987,0.6506550218340612,6.423580786026201,0.34934497816593885,229,0.34934497816593885,31.1,java,1.0
34
+ 5.172672355162728,0.8211382113821138,0.6016260162601625,3.2195121951219514,0.3983739837398374,123,0.3983739837398374,29.7,java,0.0
35
+ 4.509883902076781,0.5869565217391305,0.3913043478260869,3.9782608695652173,0.6086956521739131,46,0.6086956521739131,31.6,java,1.0
36
+ 5.0781577563714695,0.7582417582417582,0.5274725274725275,4.912087912087912,0.4725274725274725,91,0.4725274725274725,42.94736842105263,java,0.0
37
+ 4.752102792050065,0.8555555555555555,0.6222222222222222,2.6,0.37777777777777777,90,0.37777777777777777,30.875,python,1.0
38
+ 5.385167845762692,0.6842105263157895,0.4631578947368421,5.442105263157894,0.5368421052631579,95,0.5368421052631579,39.34782608695652,java,0.0
39
+ 4.698453748752097,0.8333333333333334,0.6111111111111112,2.3,0.3888888888888889,90,0.3888888888888889,30.75,python,1.0
40
+ 5.0936606896881855,0.48,0.26,4.5,0.74,50,0.74,36.44444444444444,python,1.0
41
+ 4.762058188070034,0.5689655172413793,0.3793103448275862,3.8793103448275863,0.6206896551724138,58,0.6206896551724138,35.875,python,0.0
42
+ 3.842216900031167,0.8703703703703703,0.6666666666666667,3.4444444444444446,0.3333333333333333,54,0.3333333333333333,26.571428571428573,python,1.0
43
+ 5.394236721618798,0.8340425531914893,0.6851063829787234,3.1659574468085108,0.3148936170212766,235,0.3148936170212766,45.16129032258065,java,0.0
44
+ 3.6368421881310113,0.5416666666666666,0.375,5.916666666666667,0.625,24,0.625,29.857142857142858,python,1.0
45
+ 4.371179616205941,0.821917808219178,0.6164383561643836,4.575342465753424,0.3835616438356164,73,0.3835616438356164,31.31578947368421,python,0.0
46
+ 5.292298716803247,0.6103896103896104,0.38961038961038963,4.597402597402597,0.6103896103896104,77,0.6103896103896104,43.63636363636363,python,0.0
47
+ 4.9019636334438,0.8170731707317073,0.5609756097560976,4.7926829268292686,0.43902439024390244,82,0.43902439024390244,34.21739130434783,java,0.0
48
+ 6.249267385727016,0.8602150537634409,0.6774193548387097,5.467741935483871,0.3225806451612903,372,0.3225806451612903,44.231884057971016,python,0.0
49
+ 5.022946255731872,0.905982905982906,0.6495726495726496,2.769230769230769,0.3504273504273504,117,0.3504273504273504,34.44,python,0.0
50
+ 5.183364648336087,0.578125,0.34375,6.140625,0.65625,64,0.65625,36.5,python,1.0
51
+ 3.8514475312208925,0.891566265060241,0.7469879518072289,2.5542168674698793,0.25301204819277107,83,0.25301204819277107,33.15384615384615,java,1.0
52
+ 4.76725052491479,0.8088235294117647,0.5294117647058824,4.382352941176471,0.47058823529411764,68,0.47058823529411764,29.857142857142858,java,0.0
53
+ 4.053149247727424,0.93,0.76,2.96,0.24,100,0.24,30.523809523809526,java,1.0
54
+ 5.508457113311519,0.8251748251748252,0.5944055944055944,5.1328671328671325,0.40559440559440557,143,0.40559440559440557,42.411764705882355,java,0.0
55
+ 4.041108871665959,0.7560975609756098,0.5121951219512195,5.048780487804878,0.4878048780487805,41,0.4878048780487805,26.75,java,1.0
56
+ 5.264706383768677,0.7769784172661871,0.5827338129496402,4.683453237410072,0.4172661870503597,139,0.4172661870503597,71.57142857142857,python,0.0
57
+ 4.888197350707755,0.9251700680272109,0.7210884353741497,2.5374149659863945,0.2789115646258503,147,0.2789115646258503,37.392857142857146,python,0.0
58
+ 4.246439344671016,0.65,0.42500000000000004,4.5,0.575,40,0.575,25.818181818181817,python,1.0
59
+ 5.3571067941182635,0.4838709677419355,0.27419354838709675,7.532258064516129,0.7258064516129032,62,0.7258064516129032,46.15384615384615,java,1.0
60
+ 4.882260151910624,0.8173076923076923,0.625,3.951923076923077,0.375,104,0.375,29.307692307692307,java,0.0
61
+ 4.413909765557392,0.40625,0.25,3.84375,0.75,32,0.75,28.666666666666668,python,0.0
62
+ 4.003044786734202,0.6666666666666666,0.4242424242424242,4.424242424242424,0.5757575757575758,33,0.5757575757575758,28.714285714285715,python,0.0
63
+ 4.138502179294739,0.8235294117647058,0.6176470588235294,3.8088235294117645,0.38235294117647056,68,0.38235294117647056,38.45454545454545,java,1.0
64
+ 3.490664945449586,0.8979591836734694,0.6938775510204082,4.571428571428571,0.30612244897959184,49,0.30612244897959184,26.526315789473685,java,0.0
65
+ 4.742155477497019,0.7049180327868853,0.47540983606557374,2.80327868852459,0.5245901639344263,61,0.5245901639344263,27.4,python,0.0
66
+ 4.907962016217061,0.4318181818181818,0.25,3.0454545454545454,0.75,44,0.75,62.0,python,1.0
67
+ 5.299012661837314,0.7024793388429752,0.5371900826446281,3.677685950413223,0.4628099173553719,121,0.4628099173553719,48.0,python,0.0
68
+ 5.643744827511846,0.8793774319066148,0.7081712062256809,7.603112840466926,0.2918287937743191,257,0.2918287937743191,36.523809523809526,java,1.0
69
+ 3.640223928941852,0.26666666666666666,0.1333333333333333,4.4,0.8666666666666667,15,0.8666666666666667,32.333333333333336,python,1.0
70
+ 4.220518125914522,0.9583333333333334,0.8194444444444444,2.0277777777777777,0.18055555555555555,144,0.18055555555555555,41.92307692307692,java,1.0
71
+ 5.706414205462762,0.7961783439490446,0.5796178343949044,7.503184713375796,0.42038216560509556,157,0.42038216560509556,38.61363636363637,java,1.0
72
+ 5.222228298430333,0.8230088495575221,0.5663716814159292,3.982300884955752,0.4336283185840708,113,0.4336283185840708,41.75,python,0.0
73
+ 4.277261292670126,0.8588235294117647,0.6705882352941177,3.0,0.32941176470588235,85,0.32941176470588235,34.35294117647059,python,0.0
74
+ 3.8035088547976783,0.5416666666666666,0.33333333333333337,4.208333333333333,0.6666666666666666,24,0.6666666666666666,24.571428571428573,python,1.0
75
+ 5.251158101143722,0.8026315789473685,0.618421052631579,4.894736842105263,0.3815789473684211,152,0.3815789473684211,44.06896551724138,java,0.0
featureextraction/step1_statistical_extraction/train_features.csv ADDED
@@ -0,0 +1,345 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ entropy,burstiness,repetition_ratio,avg_token_length,vocab_richness,num_tokens,unique_token_ratio,avg_line_length,language,Label
2
+ 5.207956079154271,0.8712121212121212,0.6515151515151515,3.9015151515151514,0.3484848484848485,132,0.3484848484848485,41.464285714285715,python,0.0
3
+ 4.844227218082805,0.8962962962962963,0.7037037037037037,3.1259259259259258,0.2962962962962963,135,0.2962962962962963,15.704545454545455,java,0.0
4
+ 5.110041502517433,0.916083916083916,0.6783216783216783,4.5174825174825175,0.32167832167832167,143,0.32167832167832167,25.784313725490197,java,0.0
5
+ 4.599986090068817,0.8839285714285714,0.7053571428571428,4.616071428571429,0.29464285714285715,112,0.29464285714285715,30.68421052631579,java,0.0
6
+ 4.469392725378344,0.8589743589743589,0.6153846153846154,3.230769230769231,0.38461538461538464,78,0.38461538461538464,23.923076923076923,java,0.0
7
+ 4.293744383186907,0.859375,0.625,2.953125,0.375,64,0.375,26.210526315789473,java,0.0
8
+ 4.549676205034737,0.860655737704918,0.6885245901639344,5.918032786885246,0.3114754098360656,122,0.3114754098360656,33.96666666666667,python,0.0
9
+ 4.328596578632476,0.8163265306122449,0.5306122448979591,3.2244897959183674,0.46938775510204084,49,0.46938775510204084,28.333333333333332,python,0.0
10
+ 4.597753650865813,0.7941176470588235,0.5588235294117647,3.0441176470588234,0.4411764705882353,68,0.4411764705882353,30.916666666666668,java,1.0
11
+ 4.0675361498812554,0.8333333333333334,0.6666666666666667,2.0384615384615383,0.3333333333333333,78,0.3333333333333333,28.0,python,0.0
12
+ 4.853956434293166,0.898989898989899,0.6464646464646464,4.101010101010101,0.35353535353535354,99,0.35353535353535354,31.40740740740741,java,0.0
13
+ 6.10213285294983,0.875,0.6736111111111112,7.423611111111111,0.3263888888888889,288,0.3263888888888889,37.31395348837209,java,1.0
14
+ 3.053791405126089,0.7777777777777778,0.6388888888888888,4.027777777777778,0.3611111111111111,36,0.3611111111111111,22.25,python,1.0
15
+ 4.379556754405944,0.8620689655172413,0.5689655172413793,4.120689655172414,0.43103448275862066,58,0.43103448275862066,35.36363636363637,java,1.0
16
+ 4.538844415918181,0.8026315789473685,0.5921052631578947,3.3026315789473686,0.40789473684210525,76,0.40789473684210525,15.043478260869565,java,0.0
17
+ 4.435455286899907,0.847457627118644,0.576271186440678,2.694915254237288,0.423728813559322,59,0.423728813559322,26.818181818181817,python,1.0
18
+ 4.1277054819279435,0.8333333333333334,0.5625,3.0,0.4375,48,0.4375,24.6,java,1.0
19
+ 4.498419401169,0.7058823529411765,0.47058823529411764,3.2941176470588234,0.5294117647058824,51,0.5294117647058824,25.866666666666667,java,0.0
20
+ 3.615922063835167,0.7333333333333333,0.5,5.433333333333334,0.5,30,0.5,30.125,python,0.0
21
+ 5.043983303761563,0.5833333333333334,0.3666666666666667,4.983333333333333,0.6333333333333333,60,0.6333333333333333,37.5,python,1.0
22
+ 3.6897037321995474,0.42857142857142855,0.2857142857142857,7.095238095238095,0.7142857142857143,21,0.7142857142857143,22.7,java,1.0
23
+ 3.791950834682379,0.7142857142857143,0.5934065934065934,2.769230769230769,0.4065934065934066,91,0.4065934065934066,34.0,python,1.0
24
+ 5.542412227395111,0.7934782608695652,0.6141304347826086,3.6793478260869565,0.3858695652173913,184,0.3858695652173913,61.75,java,0.0
25
+ 3.8463189626846366,0.8947368421052632,0.5526315789473684,3.0526315789473686,0.4473684210526316,38,0.4473684210526316,20.6,python,1.0
26
+ 4.7720552088742005,0.6666666666666666,0.375,6.229166666666667,0.625,48,0.625,29.214285714285715,java,1.0
27
+ 5.085703343051975,0.7099236641221374,0.549618320610687,3.931297709923664,0.45038167938931295,131,0.45038167938931295,32.40909090909091,python,0.0
28
+ 4.253863368422077,0.6578947368421053,0.42105263157894735,3.6052631578947367,0.5789473684210527,38,0.5789473684210527,34.833333333333336,java,1.0
29
+ 5.285942953673669,0.8157894736842105,0.5614035087719298,3.4035087719298245,0.43859649122807015,114,0.43859649122807015,45.666666666666664,java,0.0
30
+ 4.324534762707879,0.7111111111111111,0.4666666666666667,8.177777777777777,0.5333333333333333,45,0.5333333333333333,37.30769230769231,java,1.0
31
+ 4.895714600514266,0.8457943925233645,0.7383177570093458,2.6448598130841123,0.2616822429906542,214,0.2616822429906542,18.784313725490197,java,0.0
32
+ 3.327339900083509,0.6666666666666666,0.4285714285714286,3.0952380952380953,0.5714285714285714,21,0.5714285714285714,22.8,python,1.0
33
+ 5.581633377149314,0.5934065934065934,0.37362637362637363,6.318681318681318,0.6263736263736264,91,0.6263736263736264,40.78947368421053,python,1.0
34
+ 5.657995400316549,0.8997289972899729,0.7425474254742548,3.907859078590786,0.25745257452574527,369,0.25745257452574527,56.26315789473684,java,0.0
35
+ 4.029187712039474,0.8367346938775511,0.5918367346938775,2.9183673469387754,0.40816326530612246,49,0.40816326530612246,30.0,java,1.0
36
+ 4.8582227682726,0.9393939393939394,0.7348484848484849,2.6363636363636362,0.26515151515151514,132,0.26515151515151514,32.0,python,1.0
37
+ 4.42309119581308,0.7941176470588235,0.5735294117647058,2.5588235294117645,0.4264705882352941,68,0.4264705882352941,30.454545454545453,java,1.0
38
+ 3.7665853954588724,0.7704918032786885,0.639344262295082,2.8852459016393444,0.36065573770491804,61,0.36065573770491804,33.90909090909091,python,0.0
39
+ 5.197908393371423,0.8072289156626506,0.6325301204819277,5.0843373493975905,0.3674698795180723,166,0.3674698795180723,38.65625,java,0.0
40
+ 4.536286231168867,0.42857142857142855,0.2571428571428571,3.6,0.7428571428571429,35,0.7428571428571429,31.166666666666668,python,1.0
41
+ 4.709586788340617,0.8957055214723927,0.7300613496932515,3.361963190184049,0.26993865030674846,163,0.26993865030674846,17.816326530612244,java,0.0
42
+ 4.5049847535671015,0.9545454545454546,0.7897727272727273,3.3806818181818183,0.21022727272727273,176,0.21022727272727273,36.16129032258065,java,0.0
43
+ 4.9423908406293835,0.6538461538461539,0.4358974358974359,3.8333333333333335,0.5641025641025641,78,0.5641025641025641,45.125,python,0.0
44
+ 4.41120441892766,0.7777777777777778,0.4666666666666667,6.488888888888889,0.5333333333333333,45,0.5333333333333333,44.77777777777778,python,0.0
45
+ 5.262208612706845,0.7327586206896551,0.5431034482758621,4.146551724137931,0.45689655172413796,116,0.45689655172413796,28.666666666666668,python,1.0
46
+ 4.208410187268525,0.48148148148148145,0.2592592592592593,4.222222222222222,0.7407407407407407,27,0.7407407407407407,32.2,python,1.0
47
+ 5.793376775298043,0.8823529411764706,0.680672268907563,5.529411764705882,0.31932773109243695,238,0.31932773109243695,27.633333333333333,java,1.0
48
+ 5.691854216621956,0.7849462365591398,0.5967741935483871,4.973118279569892,0.4032258064516129,186,0.4032258064516129,40.333333333333336,python,0.0
49
+ 5.058984089445427,0.3333333333333333,0.16666666666666663,6.785714285714286,0.8333333333333334,42,0.8333333333333334,30.923076923076923,python,1.0
50
+ 4.433157434964864,0.9736842105263158,0.7543859649122807,2.4035087719298245,0.24561403508771928,114,0.24561403508771928,31.5,python,1.0
51
+ 5.268123017576671,0.6164383561643836,0.3835616438356164,4.917808219178082,0.6164383561643836,73,0.6164383561643836,44.09090909090909,python,0.0
52
+ 6.198108865484992,0.7208121827411168,0.5025380710659899,5.0456852791878175,0.49746192893401014,197,0.49746192893401014,41.638888888888886,python,1.0
53
+ 4.5615159206608356,0.8227848101265823,0.6075949367088608,3.5569620253164556,0.3924050632911392,79,0.3924050632911392,13.678571428571429,java,0.0
54
+ 5.04371468310881,0.6351351351351351,0.43243243243243246,4.1891891891891895,0.5675675675675675,74,0.5675675675675675,37.0,java,0.0
55
+ 3.995906598484245,0.6944444444444444,0.4722222222222222,4.333333333333333,0.5277777777777778,36,0.5277777777777778,27.5,python,1.0
56
+ 4.757950193468896,0.9148936170212766,0.648936170212766,2.893617021276596,0.35106382978723405,94,0.35106382978723405,34.0,python,1.0
57
+ 5.053560274699695,0.9136690647482014,0.7050359712230216,4.705035971223022,0.2949640287769784,139,0.2949640287769784,39.69565217391305,python,0.0
58
+ 3.833323296470577,0.8813559322033898,0.6779661016949152,2.288135593220339,0.3220338983050847,59,0.3220338983050847,27.5,java,1.0
59
+ 4.253212018409155,0.5882352941176471,0.3529411764705882,4.117647058823529,0.6470588235294118,34,0.6470588235294118,13.75,java,0.0
60
+ 4.611949334080443,0.5384615384615384,0.3076923076923077,4.717948717948718,0.6923076923076923,39,0.6923076923076923,34.42857142857143,python,0.0
61
+ 4.756182308560408,0.9186991869918699,0.7073170731707317,3.3089430894308944,0.2926829268292683,123,0.2926829268292683,28.09090909090909,java,0.0
62
+ 5.1256959966316895,0.9022988505747126,0.6954022988505747,3.8045977011494254,0.3045977011494253,174,0.3045977011494253,31.727272727272727,java,0.0
63
+ 4.274386660093705,0.8103448275862069,0.5862068965517242,2.2413793103448274,0.41379310344827586,58,0.41379310344827586,39.166666666666664,java,1.0
64
+ 5.649232493101554,0.8258064516129032,0.5870967741935484,5.961290322580645,0.4129032258064516,155,0.4129032258064516,29.104166666666668,java,0.0
65
+ 3.8005595576361078,0.875,0.6785714285714286,3.0892857142857144,0.32142857142857145,56,0.32142857142857145,24.357142857142858,python,1.0
66
+ 4.635290969626768,0.6666666666666666,0.43137254901960786,3.7254901960784315,0.5686274509803921,51,0.5686274509803921,25.46153846153846,java,0.0
67
+ 4.854346937661789,0.5740740740740741,0.37037037037037035,5.0,0.6296296296296297,54,0.6296296296296297,25.4,java,0.0
68
+ 4.393061650825727,0.5,0.2941176470588235,7.352941176470588,0.7058823529411765,34,0.7058823529411765,40.625,python,1.0
69
+ 4.422442136473173,0.5135135135135135,0.32432432432432434,5.081081081081081,0.6756756756756757,37,0.6756756756756757,39.166666666666664,python,1.0
70
+ 4.207435516759526,0.6571428571428571,0.4,3.0285714285714285,0.6,35,0.6,22.375,python,1.0
71
+ 5.99203259332159,0.7687074829931972,0.48299319727891155,5.122448979591836,0.5170068027210885,147,0.5170068027210885,39.357142857142854,python,1.0
72
+ 6.007074413338004,0.711764705882353,0.5058823529411764,4.129411764705883,0.49411764705882355,170,0.49411764705882355,58.0,python,0.0
73
+ 3.6464393446710157,0.5,0.30000000000000004,3.85,0.7,20,0.7,27.0,python,1.0
74
+ 4.497874075147441,0.5333333333333333,0.37777777777777777,4.311111111111111,0.6222222222222222,45,0.6222222222222222,30.636363636363637,java,1.0
75
+ 4.728255941527934,0.9318181818181818,0.7348484848484849,2.6363636363636362,0.26515151515151514,132,0.26515151515151514,33.68,python,0.0
76
+ 4.2354799985372935,0.6862745098039216,0.5098039215686274,2.9411764705882355,0.49019607843137253,51,0.49019607843137253,28.333333333333332,python,0.0
77
+ 5.560363930440956,0.8622754491017964,0.6347305389221557,4.029940119760479,0.3652694610778443,167,0.3652694610778443,31.70212765957447,java,0.0
78
+ 6.014883786593109,0.5323741007194245,0.38129496402877694,4.863309352517986,0.6187050359712231,139,0.6187050359712231,34.25,python,1.0
79
+ 4.384183719779189,0.55,0.375,6.35,0.625,40,0.625,32.916666666666664,java,1.0
80
+ 4.36346329043209,0.7105263157894737,0.39473684210526316,6.157894736842105,0.6052631578947368,38,0.6052631578947368,27.733333333333334,java,1.0
81
+ 5.92272462852426,0.7397959183673469,0.5510204081632653,4.969387755102041,0.4489795918367347,196,0.4489795918367347,72.89473684210526,java,0.0
82
+ 4.17415473143563,0.9078947368421053,0.7105263157894737,2.1578947368421053,0.2894736842105263,76,0.2894736842105263,30.692307692307693,python,1.0
83
+ 5.271209951802516,0.9140271493212669,0.7330316742081449,4.97737556561086,0.2669683257918552,221,0.2669683257918552,43.416666666666664,java,0.0
84
+ 5.014527492014123,0.9281045751633987,0.7320261437908497,2.7777777777777777,0.2679738562091503,153,0.2679738562091503,38.869565217391305,python,0.0
85
+ 4.482483990770269,0.9565217391304348,0.7391304347826086,4.026086956521739,0.2608695652173913,115,0.2608695652173913,36.90909090909091,java,1.0
86
+ 5.722786643861381,0.8493975903614458,0.5783132530120482,3.7710843373493974,0.42168674698795183,166,0.42168674698795183,19.595744680851062,java,0.0
87
+ 5.300160551498397,0.5909090909090909,0.33333333333333337,5.757575757575758,0.6666666666666666,66,0.6666666666666666,36.75,java,1.0
88
+ 4.45148902643068,0.5,0.3157894736842105,3.4210526315789473,0.6842105263157895,38,0.6842105263157895,21.7,python,1.0
89
+ 5.072571258365157,0.8296703296703297,0.6428571428571428,3.4450549450549453,0.35714285714285715,182,0.35714285714285715,28.515151515151516,python,1.0
90
+ 4.168243795012672,0.5161290322580645,0.32258064516129037,6.0,0.6774193548387096,31,0.6774193548387096,31.375,java,0.0
91
+ 4.123939173306749,0.6511627906976745,0.4651162790697675,3.6744186046511627,0.5348837209302325,43,0.5348837209302325,28.2,java,1.0
92
+ 4.534110630792036,0.8888888888888888,0.6666666666666667,3.1222222222222222,0.3333333333333333,90,0.3333333333333333,35.3,java,0.0
93
+ 4.889260316971673,1.0,0.7348484848484849,3.6515151515151514,0.26515151515151514,132,0.26515151515151514,23.066666666666666,java,0.0
94
+ 4.883002384777129,0.9366197183098591,0.7323943661971831,2.852112676056338,0.2676056338028169,142,0.2676056338028169,36.25806451612903,python,0.0
95
+ 3.8200705941688406,0.9666666666666667,0.8111111111111111,5.011111111111111,0.18888888888888888,90,0.18888888888888888,37.421052631578945,java,1.0
96
+ 5.866907260896749,0.7725321888412017,0.5965665236051503,5.141630901287554,0.4034334763948498,233,0.4034334763948498,71.04347826086956,python,0.0
97
+ 4.592226257370053,0.7936507936507936,0.5396825396825398,3.5714285714285716,0.4603174603174603,63,0.4603174603174603,31.5,python,0.0
98
+ 4.979264809390598,0.2926829268292683,0.1707317073170732,6.317073170731708,0.8292682926829268,41,0.8292682926829268,33.416666666666664,java,1.0
99
+ 3.1693150678026982,0.8705882352941177,0.7294117647058824,2.1411764705882352,0.27058823529411763,85,0.27058823529411763,29.88888888888889,python,1.0
100
+ 4.718291773866265,0.8803418803418803,0.6837606837606838,2.871794871794872,0.3162393162393162,117,0.3162393162393162,33.17857142857143,python,0.0
101
+ 6.057756669289524,0.5080645161290323,0.3306451612903226,5.588709677419355,0.6693548387096774,124,0.6693548387096774,37.57692307692308,python,1.0
102
+ 4.94407911950056,0.8181818181818182,0.5324675324675325,7.077922077922078,0.4675324675324675,77,0.4675324675324675,37.05,java,1.0
103
+ 4.726143834398879,0.6136363636363636,0.34090909090909094,4.840909090909091,0.6590909090909091,44,0.6590909090909091,28.25,python,1.0
104
+ 4.475969855129791,0.8305084745762712,0.5593220338983051,3.389830508474576,0.4406779661016949,59,0.4406779661016949,27.0,java,1.0
105
+ 5.689839588541731,0.783410138248848,0.6082949308755761,3.640552995391705,0.391705069124424,217,0.391705069124424,31.355555555555554,java,0.0
106
+ 4.518215514675149,0.9473684210526315,0.6578947368421053,2.526315789473684,0.34210526315789475,76,0.34210526315789475,44.285714285714285,python,1.0
107
+ 4.124232718690979,0.7678571428571429,0.5892857142857143,3.1607142857142856,0.4107142857142857,56,0.4107142857142857,25.3125,java,0.0
108
+ 4.112426730000285,0.8846153846153846,0.7403846153846154,2.951923076923077,0.25961538461538464,104,0.25961538461538464,34.09090909090909,python,0.0
109
+ 5.039480564148708,0.7866666666666666,0.4933333333333333,4.093333333333334,0.5066666666666667,75,0.5066666666666667,32.111111111111114,java,0.0
110
+ 4.677139104850461,0.8181818181818182,0.5454545454545454,3.621212121212121,0.45454545454545453,66,0.45454545454545453,22.0,python,0.0
111
+ 4.131300342505361,0.7096774193548387,0.3870967741935484,6.161290322580645,0.6129032258064516,31,0.6129032258064516,31.625,java,1.0
112
+ 5.462480227696889,0.7959183673469388,0.5850340136054422,4.707482993197279,0.41496598639455784,147,0.41496598639455784,24.872340425531913,java,0.0
113
+ 3.6914280318460246,0.8125,0.53125,2.09375,0.46875,32,0.46875,23.333333333333332,python,1.0
114
+ 4.214114119970501,0.7142857142857143,0.5102040816326531,2.816326530612245,0.4897959183673469,49,0.4897959183673469,22.583333333333332,java,1.0
115
+ 5.697217023615801,0.8171428571428572,0.5828571428571429,5.154285714285714,0.41714285714285715,175,0.41714285714285715,37.93023255813954,java,0.0
116
+ 4.0791433740260095,0.6571428571428571,0.4285714285714286,5.057142857142857,0.5714285714285714,35,0.5714285714285714,25.0,python,1.0
117
+ 3.7489948035250964,0.47619047619047616,0.2857142857142857,3.4285714285714284,0.7142857142857143,21,0.7142857142857143,26.25,python,1.0
118
+ 4.907735246015272,0.75,0.525,4.2125,0.475,80,0.475,29.869565217391305,python,0.0
119
+ 4.822037278990095,0.7808219178082192,0.5342465753424658,2.958904109589041,0.4657534246575342,73,0.4657534246575342,33.0,python,0.0
120
+ 5.030104271105216,0.8301886792452831,0.6037735849056604,3.349056603773585,0.39622641509433965,106,0.39622641509433965,33.958333333333336,java,1.0
121
+ 4.816108939837481,0.7142857142857143,0.4285714285714286,4.392857142857143,0.5714285714285714,56,0.5714285714285714,29.23076923076923,python,1.0
122
+ 2.4276042408847402,0.8461538461538461,0.7230769230769231,1.7692307692307692,0.27692307692307694,65,0.27692307692307694,27.666666666666668,python,1.0
123
+ 6.584083875285213,0.8148148148148148,0.6271604938271604,6.212345679012346,0.3728395061728395,405,0.3728395061728395,40.2247191011236,python,0.0
124
+ 4.1673958618292355,0.8536585365853658,0.6951219512195121,2.7560975609756095,0.3048780487804878,82,0.3048780487804878,29.65,java,0.0
125
+ 3.741962789037907,0.6896551724137931,0.4482758620689655,2.0689655172413794,0.5517241379310345,29,0.5517241379310345,18.285714285714285,python,1.0
126
+ 4.86754794914527,0.5178571428571429,0.3571428571428571,3.375,0.6428571428571429,56,0.6428571428571429,19.2,java,0.0
127
+ 3.9331695057515157,0.9285714285714286,0.5952380952380952,3.5714285714285716,0.40476190476190477,42,0.40476190476190477,38.166666666666664,python,1.0
128
+ 4.196483601699262,0.49019607843137253,0.4117647058823529,6.254901960784314,0.5882352941176471,51,0.5882352941176471,34.25,python,0.0
129
+ 5.906626836308724,0.8967254408060453,0.7481108312342569,3.44080604534005,0.2518891687657431,397,0.2518891687657431,53.51219512195122,java,0.0
130
+ 6.369927477083823,0.5966850828729282,0.4088397790055248,4.900552486187845,0.5911602209944752,181,0.5911602209944752,49.65384615384615,python,1.0
131
+ 4.384795487373134,0.5277777777777778,0.33333333333333337,4.611111111111111,0.6666666666666666,36,0.6666666666666666,18.25,java,0.0
132
+ 5.139173834043169,0.8348623853211009,0.5871559633027523,4.935779816513762,0.41284403669724773,109,0.41284403669724773,35.370370370370374,java,0.0
133
+ 4.238521032323433,0.7708333333333334,0.5208333333333333,4.5625,0.4791666666666667,48,0.4791666666666667,27.571428571428573,python,0.0
134
+ 4.460619643646408,0.8309859154929577,0.5915492957746479,4.704225352112676,0.4084507042253521,71,0.4084507042253521,35.72222222222222,java,0.0
135
+ 5.2878758225325395,0.9408450704225352,0.8169014084507042,4.366197183098592,0.18309859154929578,355,0.18309859154929578,42.14545454545455,java,0.0
136
+ 4.8335434078297626,0.6666666666666666,0.4545454545454546,4.46969696969697,0.5454545454545454,66,0.5454545454545454,65.66666666666667,python,0.0
137
+ 3.3889821967054377,0.9555555555555556,0.7333333333333334,5.022222222222222,0.26666666666666666,45,0.26666666666666666,53.0,java,1.0
138
+ 5.626267512421823,0.8212290502793296,0.6145251396648045,3.8491620111731844,0.3854748603351955,179,0.3854748603351955,55.166666666666664,python,1.0
139
+ 4.5054500825687525,0.9090909090909091,0.696969696969697,3.6464646464646466,0.30303030303030304,99,0.30303030303030304,41.36363636363637,python,0.0
140
+ 4.66816999226466,0.5609756097560976,0.31707317073170727,4.7560975609756095,0.6829268292682927,41,0.6829268292682927,30.2,java,1.0
141
+ 4.1138276215159735,0.8461538461538461,0.6307692307692307,2.876923076923077,0.36923076923076925,65,0.36923076923076925,31.071428571428573,java,0.0
142
+ 3.7489948035250964,0.47619047619047616,0.2857142857142857,6.904761904761905,0.7142857142857143,21,0.7142857142857143,33.57142857142857,java,1.0
143
+ 4.639932288803049,0.8472222222222222,0.6944444444444444,3.9166666666666665,0.3055555555555556,144,0.3055555555555556,40.34615384615385,java,0.0
144
+ 5.503071993063162,0.7610619469026548,0.504424778761062,3.2920353982300883,0.49557522123893805,113,0.49557522123893805,51.36363636363637,python,1.0
145
+ 5.320245758154888,0.8130081300813008,0.5853658536585367,2.227642276422764,0.4146341463414634,123,0.4146341463414634,33.5625,python,1.0
146
+ 4.448860987743577,0.5789473684210527,0.3421052631578947,4.131578947368421,0.6578947368421053,38,0.6578947368421053,32.125,python,1.0
147
+ 4.526180668235663,0.5897435897435898,0.33333333333333337,4.17948717948718,0.6666666666666666,39,0.6666666666666666,26.9,java,1.0
148
+ 4.202589101773195,0.9411764705882353,0.7294117647058824,1.7058823529411764,0.27058823529411763,85,0.27058823529411763,31.3,java,1.0
149
+ 5.934435083287192,0.6229508196721312,0.4016393442622951,5.0,0.5983606557377049,122,0.5983606557377049,36.0,python,1.0
150
+ 4.713707321865714,0.6885245901639344,0.4590163934426229,6.459016393442623,0.5409836065573771,61,0.5409836065573771,33.8125,python,0.0
151
+ 4.3180873032876175,0.9,0.7111111111111111,2.2777777777777777,0.28888888888888886,90,0.28888888888888886,38.833333333333336,python,0.0
152
+ 4.285367908896077,0.6875,0.47916666666666663,4.520833333333333,0.5208333333333334,48,0.5208333333333334,40.875,java,1.0
153
+ 4.366596139176847,0.6585365853658537,0.41463414634146345,5.609756097560975,0.5853658536585366,41,0.5853658536585366,34.8,java,1.0
154
+ 5.343782087298768,0.8652482269503546,0.624113475177305,5.7304964539007095,0.375886524822695,141,0.375886524822695,40.40625,java,0.0
155
+ 4.327819531114783,0.375,0.25,6.28125,0.75,32,0.75,29.5,java,1.0
156
+ 3.8033524347703986,0.7105263157894737,0.5,2.763157894736842,0.5,38,0.5,26.375,python,0.0
157
+ 5.72576174609731,0.797979797979798,0.6161616161616161,3.984848484848485,0.3838383838383838,198,0.3838383838383838,42.55882352941177,java,0.0
158
+ 5.409509758627718,0.841025641025641,0.676923076923077,5.671794871794872,0.3230769230769231,195,0.3230769230769231,34.90384615384615,python,0.0
159
+ 5.679736241553765,0.7013888888888888,0.5138888888888888,4.743055555555555,0.4861111111111111,144,0.4861111111111111,52.55555555555556,python,0.0
160
+ 4.4912054473944405,0.8148148148148148,0.5185185185185186,5.277777777777778,0.48148148148148145,54,0.48148148148148145,36.23076923076923,java,0.0
161
+ 3.9362434129830604,0.6333333333333333,0.4,3.1333333333333333,0.6,30,0.6,24.75,python,0.0
162
+ 4.215928174708332,0.8852459016393442,0.6229508196721312,2.721311475409836,0.3770491803278688,61,0.3770491803278688,27.923076923076923,python,0.0
163
+ 5.255072207410051,0.9081081081081082,0.7135135135135136,3.675675675675676,0.2864864864864865,185,0.2864864864864865,38.34375,python,0.0
164
+ 4.8132935840925875,0.8902439024390244,0.5853658536585367,4.365853658536586,0.4146341463414634,82,0.4146341463414634,39.166666666666664,python,0.0
165
+ 3.728999521334141,0.9047619047619048,0.7301587301587302,4.619047619047619,0.2698412698412698,63,0.2698412698412698,34.84615384615385,java,1.0
166
+ 4.4003105301349255,0.6888888888888889,0.4444444444444444,2.5555555555555554,0.5555555555555556,45,0.5555555555555556,21.90909090909091,python,1.0
167
+ 5.192613181399552,0.8343949044585988,0.6624203821656052,3.445859872611465,0.3375796178343949,157,0.3375796178343949,42.476190476190474,python,0.0
168
+ 4.896037310447887,0.8761061946902655,0.6548672566371682,3.2123893805309733,0.34513274336283184,113,0.34513274336283184,27.653846153846153,java,0.0
169
+ 4.652782517460112,0.6447368421052632,0.513157894736842,2.8684210526315788,0.4868421052631579,76,0.4868421052631579,32.76470588235294,java,0.0
170
+ 3.757539644379974,0.9032258064516129,0.7096774193548387,1.6612903225806452,0.2903225806451613,62,0.2903225806451613,24.375,java,1.0
171
+ 5.4053549514703,0.7605633802816901,0.5633802816901409,5.640845070422535,0.43661971830985913,142,0.43661971830985913,38.833333333333336,java,0.0
172
+ 4.715163766425403,0.9142857142857143,0.6761904761904762,4.580952380952381,0.3238095238095238,105,0.3238095238095238,27.733333333333334,java,0.0
173
+ 5.048749755702801,0.8085106382978723,0.5638297872340425,2.74468085106383,0.43617021276595747,94,0.43617021276595747,34.375,java,1.0
174
+ 4.412142047769037,0.734375,0.546875,2.9375,0.453125,64,0.453125,31.0,python,0.0
175
+ 4.162294909570876,0.4444444444444444,0.2592592592592593,4.148148148148148,0.7407407407407407,27,0.7407407407407407,21.428571428571427,java,0.0
176
+ 5.17784699780413,0.46153846153846156,0.32307692307692304,3.4153846153846152,0.676923076923077,65,0.676923076923077,38.92307692307692,java,0.0
177
+ 5.4865950410391395,0.8578199052132701,0.6872037914691943,4.1753554502369665,0.3127962085308057,211,0.3127962085308057,37.65217391304348,java,0.0
178
+ 5.104500026854035,0.875,0.6333333333333333,4.291666666666667,0.36666666666666664,120,0.36666666666666664,39.80769230769231,java,0.0
179
+ 6.146771906965858,0.8115183246073299,0.5445026178010471,5.350785340314136,0.45549738219895286,191,0.45549738219895286,40.8,python,0.0
180
+ 4.898714188796334,0.8387096774193549,0.6021505376344086,4.559139784946237,0.3978494623655914,93,0.3978494623655914,40.0,java,0.0
181
+ 4.33379987011787,0.6521739130434783,0.4565217391304348,4.195652173913044,0.5434782608695652,46,0.5434782608695652,21.944444444444443,java,0.0
182
+ 4.720077865655612,0.8817204301075269,0.6344086021505376,4.806451612903226,0.3655913978494624,93,0.3655913978494624,35.95238095238095,java,0.0
183
+ 4.971145882958361,0.543859649122807,0.3508771929824561,4.0701754385964914,0.6491228070175439,57,0.6491228070175439,40.375,python,1.0
184
+ 4.608465630737972,0.89,0.6799999999999999,2.32,0.32,100,0.32,30.428571428571427,java,1.0
185
+ 4.553348286183388,0.8556701030927835,0.6597938144329897,5.391752577319588,0.3402061855670103,97,0.3402061855670103,31.903225806451612,java,0.0
186
+ 5.111219097255348,0.6422018348623854,0.5045871559633027,4.009174311926605,0.4954128440366973,109,0.4954128440366973,29.88888888888889,java,0.0
187
+ 5.169436190447377,0.8862275449101796,0.7005988023952097,2.8862275449101795,0.2994011976047904,167,0.2994011976047904,42.2,java,1.0
188
+ 4.897236854659848,0.6271186440677966,0.4067796610169492,4.593220338983051,0.5932203389830508,59,0.5932203389830508,29.384615384615383,java,0.0
189
+ 5.5494840429514545,0.9004739336492891,0.6966824644549763,6.156398104265403,0.3033175355450237,211,0.3033175355450237,33.016949152542374,java,0.0
190
+ 4.316499122181017,0.8412698412698413,0.6031746031746033,2.6507936507936507,0.3968253968253968,63,0.3968253968253968,29.818181818181817,java,1.0
191
+ 4.946526678933779,0.5,0.31481481481481477,4.203703703703703,0.6851851851851852,54,0.6851851851851852,48.5,python,1.0
192
+ 3.980473809580999,0.9195402298850575,0.7471264367816092,2.528735632183908,0.25287356321839083,87,0.25287356321839083,22.53846153846154,java,0.0
193
+ 4.931343145380855,0.8930817610062893,0.6666666666666667,2.0440251572327046,0.3333333333333333,159,0.3333333333333333,25.423076923076923,python,1.0
194
+ 5.865359596361463,0.7432432432432432,0.5067567567567568,5.256756756756757,0.49324324324324326,148,0.49324324324324326,34.970588235294116,python,1.0
195
+ 5.124006295628484,0.45614035087719296,0.2807017543859649,3.754385964912281,0.7192982456140351,57,0.7192982456140351,38.0,python,0.0
196
+ 6.085910006150067,0.8445378151260504,0.6218487394957983,6.142857142857143,0.37815126050420167,238,0.37815126050420167,28.604395604395606,java,1.0
197
+ 4.868510971586463,0.7411764705882353,0.5411764705882354,3.2470588235294118,0.4588235294117647,85,0.4588235294117647,28.17391304347826,java,0.0
198
+ 4.605408294302115,0.8611111111111112,0.5972222222222222,3.138888888888889,0.4027777777777778,72,0.4027777777777778,33.857142857142854,python,0.0
199
+ 4.73995425222158,0.6964285714285714,0.4464285714285714,6.482142857142857,0.5535714285714286,56,0.5535714285714286,33.4375,java,1.0
200
+ 4.8675567139411235,0.7701149425287356,0.5517241379310345,4.816091954022989,0.4482758620689655,87,0.4482758620689655,36.13636363636363,java,0.0
201
+ 4.246481972040831,0.7454545454545455,0.5272727272727273,4.2727272727272725,0.4727272727272727,55,0.4727272727272727,30.416666666666668,python,0.0
202
+ 2.8016094970590277,0.8,0.55,5.0,0.45,20,0.45,28.8,python,1.0
203
+ 5.7659039797576215,0.5669291338582677,0.40944881889763785,5.377952755905512,0.5905511811023622,127,0.5905511811023622,35.2962962962963,java,0.0
204
+ 4.8369927564722754,0.5978260869565217,0.46739130434782605,5.695652173913044,0.532608695652174,92,0.532608695652174,30.0,python,1.0
205
+ 5.650410881834686,0.8103448275862069,0.6091954022988506,4.022988505747127,0.39080459770114945,174,0.39080459770114945,39.65853658536585,java,0.0
206
+ 4.33611521033683,0.6363636363636364,0.43181818181818177,4.75,0.5681818181818182,44,0.5681818181818182,34.6,java,1.0
207
+ 6.041274356789173,0.8076923076923077,0.657051282051282,3.801282051282051,0.34294871794871795,312,0.34294871794871795,30.096385542168676,java,0.0
208
+ 5.289310575466358,0.6428571428571429,0.5,4.190476190476191,0.5,126,0.5,31.615384615384617,python,1.0
209
+ 4.700186536808578,0.7741935483870968,0.5,3.532258064516129,0.5,62,0.5,33.76923076923077,python,0.0
210
+ 4.180451390892101,0.4444444444444444,0.2592592592592593,4.777777777777778,0.7407407407407407,27,0.7407407407407407,22.25,python,1.0
211
+ 4.786213536530154,0.9016393442622951,0.6967213114754098,2.762295081967213,0.30327868852459017,122,0.30327868852459017,29.083333333333332,python,1.0
212
+ 5.081486951808688,0.7666666666666667,0.5222222222222221,3.7888888888888888,0.4777777777777778,90,0.4777777777777778,35.8,python,1.0
213
+ 4.44849995084568,0.8461538461538461,0.6593406593406593,3.5934065934065935,0.34065934065934067,91,0.34065934065934067,31.045454545454547,python,0.0
214
+ 4.7077148025974385,0.37142857142857144,0.19999999999999996,7.228571428571429,0.8,35,0.8,40.888888888888886,java,1.0
215
+ 4.51662508214898,0.8837209302325582,0.6627906976744187,3.302325581395349,0.3372093023255814,86,0.3372093023255814,23.25,java,0.0
216
+ 4.591236738386586,0.7903225806451613,0.532258064516129,4.193548387096774,0.46774193548387094,62,0.46774193548387094,26.352941176470587,java,1.0
217
+ 4.770332655554241,0.7123287671232876,0.5205479452054795,2.5753424657534247,0.4794520547945205,73,0.4794520547945205,14.681818181818182,java,0.0
218
+ 4.193064830243576,0.9555555555555556,0.7333333333333334,2.3555555555555556,0.26666666666666666,90,0.26666666666666666,24.68421052631579,python,1.0
219
+ 3.935732282211645,0.90625,0.703125,3.1875,0.296875,64,0.296875,34.72727272727273,java,1.0
220
+ 5.255459047825728,0.7096774193548387,0.4838709677419355,4.32258064516129,0.5161290322580645,93,0.5161290322580645,47.583333333333336,python,0.0
221
+ 5.56295430052689,0.9239543726235742,0.7490494296577948,6.391634980988593,0.2509505703422053,263,0.2509505703422053,51.39655172413793,java,0.0
222
+ 4.7624023788690275,0.8846153846153846,0.5897435897435898,3.41025641025641,0.41025641025641024,78,0.41025641025641024,27.95,python,0.0
223
+ 4.641341251556263,0.7164179104477612,0.5223880597014925,3.5671641791044775,0.47761194029850745,67,0.47761194029850745,32.69230769230769,java,1.0
224
+ 3.9580339180966533,0.9142857142857143,0.7142857142857143,4.014285714285714,0.2857142857142857,70,0.2857142857142857,24.5,python,1.0
225
+ 3.821296333684861,0.6071428571428571,0.3928571428571429,4.357142857142857,0.6071428571428571,28,0.6071428571428571,26.5,java,1.0
226
+ 5.99080968202175,0.8926553672316384,0.7033898305084746,4.759887005649717,0.2966101694915254,354,0.2966101694915254,40.125,python,0.0
227
+ 5.023079206577155,0.8592592592592593,0.674074074074074,2.77037037037037,0.32592592592592595,135,0.32592592592592595,31.88888888888889,java,0.0
228
+ 4.427567157116929,0.675,0.4,5.55,0.6,40,0.6,31.4,java,1.0
229
+ 4.183636205660773,0.8666666666666667,0.6916666666666667,2.8583333333333334,0.30833333333333335,120,0.30833333333333335,28.38888888888889,python,1.0
230
+ 4.6597716630131325,0.6964285714285714,0.4642857142857143,3.892857142857143,0.5357142857142857,56,0.5357142857142857,37.9,java,0.0
231
+ 5.011095568495564,0.5454545454545454,0.32727272727272727,5.490909090909091,0.6727272727272727,55,0.6727272727272727,33.93333333333333,java,1.0
232
+ 4.07744157582188,0.8085106382978723,0.5531914893617021,2.1702127659574466,0.44680851063829785,47,0.44680851063829785,31.142857142857142,python,0.0
233
+ 4.0614821867207755,0.3181818181818182,0.18181818181818177,3.8181818181818183,0.8181818181818182,22,0.8181818181818182,29.75,python,1.0
234
+ 4.490765292824625,0.7254901960784313,0.47058823529411764,3.019607843137255,0.5294117647058824,51,0.5294117647058824,22.727272727272727,python,1.0
235
+ 3.7695855752178566,0.9612403100775194,0.8372093023255813,2.550387596899225,0.16279069767441862,129,0.16279069767441862,26.833333333333332,java,0.0
236
+ 5.113304524601207,0.8288288288288288,0.5945945945945945,3.936936936936937,0.40540540540540543,111,0.40540540540540543,26.9375,java,0.0
237
+ 6.683599316096754,0.8333333333333334,0.6434599156118144,5.89662447257384,0.35654008438818563,474,0.35654008438818563,44.127906976744185,python,0.0
238
+ 5.742479746741097,0.8826291079812206,0.6572769953051643,7.413145539906103,0.3427230046948357,213,0.3427230046948357,34.11538461538461,java,1.0
239
+ 5.684220757851569,0.842809364548495,0.6889632107023411,4.11371237458194,0.3110367892976589,299,0.3110367892976589,42.95348837209303,python,0.0
240
+ 3.7248747399889837,0.8125,0.6041666666666667,3.6666666666666665,0.3958333333333333,48,0.3958333333333333,26.857142857142858,python,0.0
241
+ 4.438721875540867,0.7083333333333334,0.45833333333333337,3.5625,0.5416666666666666,48,0.5416666666666666,14.647058823529411,java,0.0
242
+ 5.052330376272108,0.7604166666666666,0.5416666666666667,5.239583333333333,0.4583333333333333,96,0.4583333333333333,37.76190476190476,java,0.0
243
+ 4.039211536948156,0.5161290322580645,0.3548387096774194,3.3870967741935485,0.6451612903225806,31,0.6451612903225806,12.0,java,0.0
244
+ 6.058508660286119,0.8492307692307692,0.6830769230769231,6.8246153846153845,0.3169230769230769,325,0.3169230769230769,40.19047619047619,java,1.0
245
+ 4.85654598061365,0.8559322033898306,0.6440677966101696,3.610169491525424,0.3559322033898305,118,0.3559322033898305,22.06896551724138,java,0.0
246
+ 4.384791749673746,0.8899082568807339,0.7155963302752293,2.6055045871559632,0.28440366972477066,109,0.28440366972477066,33.44444444444444,java,1.0
247
+ 4.2115044085684765,0.631578947368421,0.42105263157894735,4.473684210526316,0.5789473684210527,38,0.5789473684210527,31.77777777777778,python,1.0
248
+ 6.445861001406715,0.7741935483870968,0.5443548387096775,5.548387096774194,0.45564516129032256,248,0.45564516129032256,36.03508771929825,python,0.0
249
+ 4.060861683988659,0.8043478260869565,0.5652173913043479,3.0,0.43478260869565216,46,0.43478260869565216,28.77777777777778,python,1.0
250
+ 4.770281067729926,0.6226415094339622,0.39622641509433965,4.962264150943396,0.6037735849056604,53,0.6037735849056604,29.642857142857142,java,1.0
251
+ 4.310866879118227,0.8829787234042553,0.6914893617021276,4.340425531914893,0.30851063829787234,94,0.30851063829787234,30.791666666666668,python,0.0
252
+ 4.912733576964344,0.5636363636363636,0.36363636363636365,3.4545454545454546,0.6363636363636364,55,0.6363636363636364,24.705882352941178,python,1.0
253
+ 3.76062054430996,0.6666666666666666,0.48484848484848486,3.8484848484848486,0.5151515151515151,33,0.5151515151515151,27.11111111111111,python,1.0
254
+ 5.092277224236287,0.6621621621621622,0.43243243243243246,3.7837837837837838,0.5675675675675675,74,0.5675675675675675,29.5625,java,0.0
255
+ 3.5,0.5,0.25,3.125,0.75,16,0.75,19.75,python,1.0
256
+ 4.62765192370225,0.952,0.736,2.472,0.264,125,0.264,35.714285714285715,python,0.0
257
+ 4.516221717727311,0.8026315789473685,0.6052631578947368,5.197368421052632,0.39473684210526316,76,0.39473684210526316,27.72222222222222,python,0.0
258
+ 3.937751955408587,0.6451612903225806,0.4193548387096774,4.193548387096774,0.5806451612903226,31,0.5806451612903226,29.25,python,0.0
259
+ 5.000896232556761,0.8045977011494253,0.5402298850574713,3.6091954022988504,0.45977011494252873,87,0.45977011494252873,33.0,python,1.0
260
+ 6.038322688621861,0.827906976744186,0.6046511627906976,6.120930232558139,0.3953488372093023,215,0.3953488372093023,49.08108108108108,python,0.0
261
+ 4.216152190789069,0.8163265306122449,0.5510204081632653,3.5714285714285716,0.4489795918367347,49,0.4489795918367347,22.263157894736842,java,0.0
262
+ 4.667187378614269,0.8235294117647058,0.5882352941176471,4.0588235294117645,0.4117647058823529,85,0.4117647058823529,32.857142857142854,java,0.0
263
+ 5.070332970220562,0.8478260869565217,0.6594202898550725,4.297101449275362,0.34057971014492755,138,0.34057971014492755,30.34285714285714,java,0.0
264
+ 4.2366344310623845,0.9058823529411765,0.7058823529411764,3.8941176470588235,0.29411764705882354,85,0.29411764705882354,35.0,java,1.0
265
+ 4.062382847947341,0.5454545454545454,0.36363636363636365,2.393939393939394,0.6363636363636364,33,0.6363636363636364,24.333333333333332,python,1.0
266
+ 4.477226049206228,0.6851851851851852,0.4629629629629629,3.8703703703703702,0.5370370370370371,54,0.5370370370370371,37.0,java,1.0
267
+ 4.189125478116705,0.6521739130434783,0.4782608695652174,5.217391304347826,0.5217391304347826,46,0.5217391304347826,39.77777777777778,java,1.0
268
+ 3.6818808028034025,0.47368421052631576,0.26315789473684215,3.3157894736842106,0.7368421052631579,19,0.7368421052631579,22.75,python,1.0
269
+ 4.183768266288633,0.78125,0.609375,1.96875,0.390625,64,0.390625,23.25,python,1.0
270
+ 4.063202513215852,0.7368421052631579,0.4736842105263158,3.1052631578947367,0.5263157894736842,38,0.5263157894736842,23.77777777777778,python,1.0
271
+ 5.240360496391271,0.7901234567901234,0.4814814814814815,4.592592592592593,0.5185185185185185,81,0.5185185185185185,35.0,java,0.0
272
+ 2.9870900416203345,0.7619047619047619,0.5238095238095238,4.333333333333333,0.47619047619047616,21,0.47619047619047616,18.444444444444443,python,1.0
273
+ 4.796230935589923,0.917910447761194,0.7238805970149254,3.1194029850746268,0.27611940298507465,134,0.27611940298507465,33.964285714285715,java,0.0
274
+ 4.921133626426239,0.6779661016949152,0.4067796610169492,4.576271186440678,0.5932203389830508,59,0.5932203389830508,37.9,python,0.0
275
+ 5.223430137239087,0.9595375722543352,0.838150289017341,3.3757225433526012,0.16184971098265896,346,0.16184971098265896,26.730337078651687,java,0.0
276
+ 5.035549713367317,0.5454545454545454,0.32727272727272727,3.7636363636363637,0.6727272727272727,55,0.6727272727272727,25.272727272727273,python,0.0
277
+ 5.404449012034224,0.7906976744186046,0.5658914728682171,5.658914728682171,0.43410852713178294,129,0.43410852713178294,36.758620689655174,java,0.0
278
+ 4.629412138272088,0.4418604651162791,0.3023255813953488,3.8372093023255816,0.6976744186046512,43,0.6976744186046512,27.7,python,0.0
279
+ 4.816777664828826,0.9296875,0.71875,4.7109375,0.28125,128,0.28125,37.078947368421055,java,0.0
280
+ 5.5887549870381,0.8138297872340425,0.6276595744680851,4.388297872340425,0.3723404255319149,188,0.3723404255319149,50.24,python,0.0
281
+ 4.453150309616965,0.8518518518518519,0.7185185185185186,2.4074074074074074,0.2814814814814815,135,0.2814814814814815,14.0,java,0.0
282
+ 5.547057692620618,0.9096385542168675,0.7710843373493976,4.421686746987952,0.2289156626506024,332,0.2289156626506024,31.275862068965516,java,0.0
283
+ 4.627801780853476,0.9,0.6555555555555556,5.055555555555555,0.34444444444444444,90,0.34444444444444444,34.04761904761905,java,0.0
284
+ 4.747191429566854,0.5333333333333333,0.3111111111111111,5.466666666666667,0.6888888888888889,45,0.6888888888888889,36.44444444444444,python,0.0
285
+ 5.524469760991255,0.6788990825688074,0.45871559633027525,4.91743119266055,0.5412844036697247,109,0.5412844036697247,53.38461538461539,python,0.0
286
+ 5.347434988263512,0.4444444444444444,0.29166666666666663,4.0,0.7083333333333334,72,0.7083333333333334,45.625,python,0.0
287
+ 4.074705922236444,0.847457627118644,0.6271186440677966,3.6779661016949152,0.3728813559322034,59,0.3728813559322034,23.1,python,1.0
288
+ 4.41506101220307,0.43333333333333335,0.23333333333333328,4.833333333333333,0.7666666666666667,30,0.7666666666666667,17.333333333333332,python,0.0
289
+ 5.489282954487391,0.7518796992481203,0.5488721804511278,6.7443609022556394,0.45112781954887216,133,0.45112781954887216,41.4,java,0.0
290
+ 4.3199650731621695,0.8584070796460177,0.7256637168141593,3.3628318584070795,0.2743362831858407,113,0.2743362831858407,26.551724137931036,python,0.0
291
+ 5.948852612281259,0.8584905660377359,0.6886792452830188,6.908805031446541,0.3113207547169811,318,0.3113207547169811,39.423529411764704,java,1.0
292
+ 4.251365544851387,0.957983193277311,0.773109243697479,2.302521008403361,0.226890756302521,119,0.226890756302521,25.958333333333332,python,1.0
293
+ 4.966534113288597,0.6,0.41538461538461535,3.753846153846154,0.5846153846153846,65,0.5846153846153846,34.45454545454545,python,1.0
294
+ 4.216421766792934,0.6808510638297872,0.4893617021276596,3.4468085106382977,0.5106382978723404,47,0.5106382978723404,25.53846153846154,python,1.0
295
+ 5.854429565880467,0.47619047619047616,0.32380952380952377,5.79047619047619,0.6761904761904762,105,0.6761904761904762,32.892857142857146,python,1.0
296
+ 5.542084533777365,0.8870967741935484,0.7217741935483871,4.866935483870968,0.2782258064516129,248,0.2782258064516129,38.97727272727273,java,0.0
297
+ 3.8370959941879037,0.7560975609756098,0.5609756097560976,2.5853658536585367,0.43902439024390244,41,0.43902439024390244,23.8,python,1.0
298
+ 5.510000244558343,0.8188405797101449,0.572463768115942,5.884057971014493,0.427536231884058,138,0.427536231884058,31.333333333333332,java,0.0
299
+ 3.9171736218037974,0.9090909090909091,0.7121212121212122,2.878787878787879,0.2878787878787879,66,0.2878787878787879,30.307692307692307,java,1.0
300
+ 4.955593373471456,0.9126984126984127,0.6904761904761905,2.7222222222222223,0.30952380952380953,126,0.30952380952380953,31.208333333333332,python,1.0
301
+ 4.123966016311361,0.7407407407407407,0.537037037037037,3.074074074074074,0.46296296296296297,54,0.46296296296296297,29.9,python,0.0
302
+ 5.280191443467718,0.7971014492753623,0.6086956521739131,4.173913043478261,0.391304347826087,138,0.391304347826087,21.972972972972972,java,0.0
303
+ 4.616874605956221,0.47058823529411764,0.23529411764705888,5.147058823529412,0.7647058823529411,34,0.7647058823529411,33.142857142857146,python,1.0
304
+ 4.128326967635621,0.8043478260869565,0.5434782608695652,4.108695652173913,0.45652173913043476,46,0.45652173913043476,30.363636363636363,java,1.0
305
+ 4.589416279148799,0.8409090909090909,0.6363636363636364,2.715909090909091,0.36363636363636365,88,0.36363636363636365,17.785714285714285,java,0.0
306
+ 4.935672972133341,0.9946236559139785,0.8064516129032258,4.091397849462366,0.1935483870967742,186,0.1935483870967742,31.8,java,0.0
307
+ 4.76880107599223,0.8446601941747572,0.6310679611650485,3.8446601941747574,0.36893203883495146,103,0.36893203883495146,52.72727272727273,python,0.0
308
+ 4.1699823363507,0.8478260869565217,0.5434782608695652,5.456521739130435,0.45652173913043476,46,0.45652173913043476,33.45454545454545,java,1.0
309
+ 4.017917900762096,0.7058823529411765,0.4411764705882353,4.117647058823529,0.5588235294117647,34,0.5588235294117647,30.571428571428573,python,0.0
310
+ 4.6586089678214995,0.896,0.704,2.696,0.296,125,0.296,31.40740740740741,python,0.0
311
+ 4.643160624658742,0.7258064516129032,0.5,3.3870967741935485,0.5,62,0.5,45.30769230769231,java,0.0
312
+ 4.813012081410429,0.7857142857142857,0.5285714285714286,4.257142857142857,0.4714285714285714,70,0.4714285714285714,31.647058823529413,python,1.0
313
+ 4.60865917212318,0.9142857142857143,0.6952380952380952,2.6952380952380954,0.3047619047619048,105,0.3047619047619048,28.5,python,1.0
314
+ 4.713243393050852,0.9347826086956522,0.7536231884057971,2.152173913043478,0.2463768115942029,138,0.2463768115942029,33.76190476190476,python,0.0
315
+ 4.619054056131191,0.734375,0.515625,3.296875,0.484375,64,0.484375,28.066666666666666,java,0.0
316
+ 4.825245826488317,0.7654320987654321,0.5308641975308642,4.580246913580247,0.4691358024691358,81,0.4691358024691358,33.9,java,0.0
317
+ 6.527235468349858,0.8012232415902141,0.6085626911314985,5.792048929663609,0.39143730886850153,327,0.39143730886850153,42.516129032258064,python,0.0
318
+ 5.59181229230442,0.7133333333333334,0.5333333333333333,4.233333333333333,0.4666666666666667,150,0.4666666666666667,33.55172413793103,python,0.0
319
+ 4.0006374384002905,0.8421052631578947,0.631578947368421,2.491228070175439,0.3684210526315789,57,0.3684210526315789,33.5,python,1.0
320
+ 3.7461571189724303,0.9545454545454546,0.6590909090909092,4.25,0.3409090909090909,44,0.3409090909090909,28.833333333333332,python,1.0
321
+ 5.745512973329542,0.8275862068965517,0.5517241379310345,6.027586206896552,0.4482758620689655,145,0.4482758620689655,34.648648648648646,java,0.0
322
+ 5.490266518243658,0.8412698412698413,0.6243386243386244,5.121693121693122,0.37566137566137564,189,0.37566137566137564,40.166666666666664,java,0.0
323
+ 4.496115365169272,0.6875,0.4375,4.729166666666667,0.5625,48,0.5625,43.285714285714285,python,1.0
324
+ 5.505436888624101,0.945273631840796,0.7164179104477613,3.855721393034826,0.2835820895522388,201,0.2835820895522388,35.58139534883721,java,0.0
325
+ 4.795079572399797,0.8155339805825242,0.6116504854368932,4.621359223300971,0.3883495145631068,103,0.3883495145631068,37.54545454545455,java,0.0
326
+ 5.595002345872928,0.7768595041322314,0.5206611570247934,6.099173553719008,0.4793388429752066,121,0.4793388429752066,32.029411764705884,java,0.0
327
+ 5.887085632611005,0.93359375,0.68359375,6.109375,0.31640625,256,0.31640625,31.675675675675677,java,0.0
328
+ 4.809267348202161,0.7971014492753623,0.5217391304347826,3.8840579710144927,0.4782608695652174,69,0.4782608695652174,38.15384615384615,python,0.0
329
+ 4.893291230757242,0.5737704918032787,0.39344262295081966,4.311475409836065,0.6065573770491803,61,0.6065573770491803,33.7,python,0.0
330
+ 4.346926022805004,0.8,0.5454545454545454,4.072727272727272,0.45454545454545453,55,0.45454545454545453,29.866666666666667,java,0.0
331
+ 4.476474118254376,0.7857142857142857,0.5178571428571428,6.660714285714286,0.48214285714285715,56,0.48214285714285715,39.92307692307692,java,1.0
332
+ 3.9809099797950775,0.9154929577464789,0.704225352112676,4.422535211267606,0.29577464788732394,71,0.29577464788732394,30.0,java,1.0
333
+ 5.272508205443481,0.5967741935483871,0.32258064516129037,6.193548387096774,0.6774193548387096,62,0.6774193548387096,44.166666666666664,java,1.0
334
+ 5.1902361166256314,0.9676113360323887,0.7854251012145749,4.611336032388664,0.2145748987854251,247,0.2145748987854251,28.724137931034484,java,1.0
335
+ 4.350489295854283,0.7333333333333333,0.55,3.183333333333333,0.45,60,0.45,33.8,java,1.0
336
+ 5.0009504388098796,0.7927927927927928,0.5765765765765766,4.5225225225225225,0.42342342342342343,111,0.42342342342342343,35.142857142857146,java,0.0
337
+ 5.7769401731183265,0.9180327868852459,0.6284153005464481,5.573770491803279,0.37158469945355194,183,0.37158469945355194,28.34375,java,0.0
338
+ 3.876189125313799,0.8970588235294118,0.6911764705882353,2.073529411764706,0.3088235294117647,68,0.3088235294117647,27.9,java,1.0
339
+ 4.032303242743952,0.4444444444444444,0.2962962962962963,3.888888888888889,0.7037037037037037,27,0.7037037037037037,26.5,python,0.0
340
+ 3.625,0.375,0.1875,6.625,0.8125,16,0.8125,27.8,python,1.0
341
+ 4.4210998675908275,0.9540816326530612,0.826530612244898,2.4846938775510203,0.17346938775510204,196,0.17346938775510204,35.06896551724138,python,0.0
342
+ 4.839775539645508,0.3157894736842105,0.1842105263157895,5.157894736842105,0.8157894736842105,38,0.8157894736842105,37.0,python,1.0
343
+ 5.842252978929465,0.8711484593837535,0.7254901960784313,6.647058823529412,0.27450980392156865,357,0.27450980392156865,42.16470588235294,java,1.0
344
+ 4.166886761135438,0.78,0.56,3.2,0.44,50,0.44,29.916666666666668,python,0.0
345
+ 5.395683189409616,0.8947368421052632,0.6947368421052631,5.515789473684211,0.30526315789473685,190,0.30526315789473685,29.24590163934426,java,1.0
featureextraction/step1_statistical_extraction/val_features.csv ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ entropy,burstiness,repetition_ratio,avg_token_length,vocab_richness,num_tokens,unique_token_ratio,avg_line_length,language,Label
2
+ 3.616348566075164,0.42105263157894735,0.26315789473684215,3.4210526315789473,0.7368421052631579,19,0.7368421052631579,25.25,python,1.0
3
+ 5.225116096801971,0.9219512195121952,0.7365853658536585,4.84390243902439,0.2634146341463415,205,0.2634146341463415,31.340425531914892,java,0.0
4
+ 5.858683407719383,0.5053763440860215,0.30107526881720426,6.021505376344086,0.6989247311827957,93,0.6989247311827957,30.59259259259259,python,1.0
5
+ 3.4687406100460665,0.918918918918919,0.6486486486486487,2.4324324324324325,0.35135135135135137,37,0.35135135135135137,40.333333333333336,python,1.0
6
+ 4.726474118254375,0.75,0.4642857142857143,6.339285714285714,0.5357142857142857,56,0.5357142857142857,33.25,java,1.0
7
+ 5.127518057233353,0.9186602870813397,0.7416267942583732,4.779904306220096,0.2583732057416268,209,0.2583732057416268,42.84848484848485,java,0.0
8
+ 3.385592490318422,0.8181818181818182,0.6909090909090909,2.6,0.3090909090909091,55,0.3090909090909091,23.75,python,1.0
9
+ 5.303371529748239,0.8614457831325302,0.6566265060240963,4.445783132530121,0.3433734939759036,166,0.3433734939759036,19.36,java,0.0
10
+ 4.32307418942857,0.72,0.5,4.04,0.5,50,0.5,23.647058823529413,python,1.0
11
+ 4.646104113627779,0.6666666666666666,0.4444444444444444,3.925925925925926,0.5555555555555556,54,0.5555555555555556,17.8125,java,0.0
12
+ 4.824043435284101,0.7971014492753623,0.5217391304347826,3.9130434782608696,0.4782608695652174,69,0.4782608695652174,26.157894736842106,python,0.0
13
+ 4.133660689688186,0.44,0.24,6.0,0.76,25,0.76,28.625,java,1.0
14
+ 3.977232016069148,0.8854166666666666,0.7395833333333333,2.7395833333333335,0.2604166666666667,96,0.2604166666666667,28.4375,python,1.0
15
+ 4.418945246473782,0.8461538461538461,0.641025641025641,3.1153846153846154,0.358974358974359,78,0.358974358974359,33.78947368421053,java,0.0
16
+ 3.795088586397732,0.4782608695652174,0.30434782608695654,3.5652173913043477,0.6956521739130435,23,0.6956521739130435,21.166666666666668,python,1.0
17
+ 4.665893393157604,0.8735632183908046,0.632183908045977,2.4942528735632186,0.367816091954023,87,0.367816091954023,30.833333333333332,java,1.0
18
+ 4.888279432092592,0.5636363636363636,0.36363636363636365,4.5636363636363635,0.6363636363636364,55,0.6363636363636364,31.7,python,0.0
19
+ 4.43887358554625,0.7111111111111111,0.4444444444444444,4.488888888888889,0.5555555555555556,45,0.5555555555555556,27.533333333333335,java,0.0
20
+ 4.956005194695069,0.7636363636363637,0.5909090909090908,3.963636363636364,0.4090909090909091,110,0.4090909090909091,22.925925925925927,java,0.0
21
+ 4.364611584126849,0.8990825688073395,0.7201834862385321,3.86697247706422,0.2798165137614679,218,0.2798165137614679,36.03125,python,0.0
22
+ 5.1310450521201885,0.6923076923076923,0.4945054945054945,3.8241758241758244,0.5054945054945055,91,0.5054945054945055,43.083333333333336,python,0.0
23
+ 4.157269582127642,0.9487179487179487,0.7863247863247863,1.735042735042735,0.21367521367521367,117,0.21367521367521367,36.583333333333336,python,1.0
24
+ 6.1032071909268755,0.7524271844660194,0.558252427184466,4.344660194174757,0.441747572815534,206,0.441747572815534,42.59375,python,0.0
25
+ 5.489504669651612,0.9067796610169492,0.7203389830508475,3.983050847457627,0.2796610169491525,236,0.2796610169491525,46.03225806451613,java,0.0
26
+ 5.5429756373358305,0.6216216216216216,0.4504504504504504,3.810810810810811,0.5495495495495496,111,0.5495495495495496,46.0,python,0.0
27
+ 5.597773058187228,0.7096774193548387,0.4193548387096774,4.548387096774194,0.5806451612903226,93,0.5806451612903226,32.27777777777778,python,1.0
28
+ 4.288320189106,0.7916666666666666,0.5208333333333333,4.229166666666667,0.4791666666666667,48,0.4791666666666667,34.0,python,0.0
29
+ 5.117143809076922,0.8490566037735849,0.5849056603773585,3.943396226415094,0.41509433962264153,106,0.41509433962264153,39.4,python,0.0
30
+ 4.3341837197791895,0.8,0.44999999999999996,7.05,0.55,40,0.55,41.666666666666664,java,1.0
31
+ 4.7319988769018755,0.8416666666666667,0.675,3.091666666666667,0.325,120,0.325,15.944444444444445,java,0.0
32
+ 4.2628148954723555,0.55,0.4,2.825,0.6,40,0.6,31.285714285714285,python,0.0
33
+ 3.25,0.875,0.625,3.6875,0.375,32,0.375,41.4,java,1.0
34
+ 5.086021506160257,0.7008547008547008,0.5470085470085471,4.239316239316239,0.452991452991453,117,0.452991452991453,44.125,python,0.0
35
+ 4.488525294897748,0.6923076923076923,0.46153846153846156,3.9038461538461537,0.5384615384615384,52,0.5384615384615384,30.181818181818183,java,1.0
36
+ 5.273684376262023,0.6111111111111112,0.375,4.819444444444445,0.625,72,0.625,42.72727272727273,python,0.0
37
+ 4.047055675509121,0.8627450980392157,0.607843137254902,4.372549019607843,0.39215686274509803,51,0.39215686274509803,28.428571428571427,python,1.0
38
+ 4.6442547530701885,0.5370370370370371,0.38888888888888884,5.12962962962963,0.6111111111111112,54,0.6111111111111112,34.7,java,0.0
39
+ 4.423251796980337,0.32142857142857145,0.1785714285714286,2.642857142857143,0.8214285714285714,28,0.8214285714285714,30.0,python,1.0
40
+ 5.498927847038898,0.5842696629213483,0.3820224719101124,6.617977528089888,0.6179775280898876,89,0.6179775280898876,41.421052631578945,python,1.0
41
+ 4.376857192109841,0.8625,0.65,3.4125,0.35,80,0.35,29.764705882352942,java,0.0
42
+ 4.600176636857892,0.7894736842105263,0.5087719298245614,3.6842105263157894,0.49122807017543857,57,0.49122807017543857,30.5,python,1.0
43
+ 6.213552119915352,0.8066037735849056,0.5613207547169812,5.485849056603773,0.4386792452830189,212,0.4386792452830189,39.6,python,0.0
44
+ 4.024059741690056,0.7727272727272727,0.6060606060606061,3.5454545454545454,0.3939393939393939,66,0.3939393939393939,30.46153846153846,python,1.0
45
+ 4.14853573331382,0.7619047619047619,0.5,7.023809523809524,0.5,42,0.5,26.266666666666666,java,0.0
46
+ 4.2057539822273915,0.8507462686567164,0.6417910447761195,3.5522388059701493,0.3582089552238806,67,0.3582089552238806,30.352941176470587,python,0.0
47
+ 4.860498585099573,0.8737864077669902,0.6504854368932038,4.097087378640777,0.34951456310679613,103,0.34951456310679613,32.61538461538461,java,0.0
48
+ 4.661648023061636,0.7631578947368421,0.5526315789473684,3.776315789473684,0.4473684210526316,76,0.4473684210526316,32.05555555555556,java,0.0
49
+ 5.405165733375986,0.9311926605504587,0.7339449541284404,5.426605504587156,0.26605504587155965,218,0.26605504587155965,33.0,java,0.0
50
+ 3.238901256602631,0.46153846153846156,0.23076923076923073,4.538461538461538,0.7692307692307693,13,0.7692307692307693,26.0,python,1.0
51
+ 3.964395529526285,0.9512195121951219,0.5853658536585367,4.170731707317073,0.4146341463414634,41,0.4146341463414634,27.1,java,1.0
52
+ 4.091031702849688,0.8170731707317073,0.6707317073170731,3.8780487804878048,0.32926829268292684,82,0.32926829268292684,31.833333333333332,python,0.0
53
+ 3.6596769481795466,0.8571428571428571,0.6190476190476191,4.976190476190476,0.38095238095238093,42,0.38095238095238093,24.533333333333335,java,1.0
54
+ 5.249145047994913,0.6875,0.5178571428571428,3.8125,0.48214285714285715,112,0.48214285714285715,46.5,python,0.0
55
+ 4.456796057875661,0.7761194029850746,0.582089552238806,2.283582089552239,0.417910447761194,67,0.417910447761194,25.545454545454547,java,1.0
56
+ 5.669233537529181,0.8121827411167513,0.6142131979695431,4.3908629441624365,0.38578680203045684,197,0.38578680203045684,49.56,python,0.0
57
+ 4.923879361943277,0.9572649572649573,0.6923076923076923,4.205128205128205,0.3076923076923077,117,0.3076923076923077,31.321428571428573,python,0.0
58
+ 4.028579035079149,0.9841269841269841,0.7142857142857143,3.8253968253968256,0.2857142857142857,63,0.2857142857142857,32.666666666666664,python,1.0
59
+ 4.931270849848961,0.6,0.36363636363636365,5.763636363636364,0.6363636363636364,55,0.6363636363636364,29.8125,java,1.0
60
+ 4.987993396430413,0.8682170542635659,0.6744186046511628,3.317829457364341,0.32558139534883723,129,0.32558139534883723,33.02777777777778,java,0.0
61
+ 4.895499903544709,0.9303482587064676,0.7661691542288558,3.4776119402985075,0.23383084577114427,201,0.23383084577114427,23.032258064516128,java,0.0
62
+ 4.297689964750703,0.9158878504672897,0.7476635514018692,3.532710280373832,0.2523364485981308,107,0.2523364485981308,31.107142857142858,java,0.0
63
+ 4.531740514375559,0.8,0.509090909090909,5.9818181818181815,0.4909090909090909,55,0.4909090909090909,30.25,java,1.0
64
+ 4.894400915540714,0.7763157894736842,0.5263157894736843,3.3157894736842106,0.47368421052631576,76,0.47368421052631576,29.952380952380953,python,0.0
65
+ 5.754451229096238,0.7107438016528925,0.4628099173553719,5.603305785123967,0.5371900826446281,121,0.5371900826446281,34.03703703703704,java,0.0
66
+ 3.5141016549301787,0.6944444444444444,0.5555555555555556,3.0277777777777777,0.4444444444444444,36,0.4444444444444444,33.6,python,1.0
67
+ 5.4389854001411315,0.7350427350427351,0.5213675213675213,5.170940170940171,0.47863247863247865,117,0.47863247863247865,43.72727272727273,java,0.0
68
+ 6.500985818092827,0.8327974276527331,0.6109324758842444,6.864951768488746,0.3890675241157556,311,0.3890675241157556,38.79775280898876,java,1.0
69
+ 3.9219280948873627,0.4,0.19999999999999996,3.85,0.8,20,0.8,20.5,python,1.0
70
+ 4.594928174344042,0.9072164948453608,0.6804123711340206,2.6804123711340204,0.31958762886597936,97,0.31958762886597936,35.5,java,1.0
71
+ 6.3478254862783485,0.7662337662337663,0.5454545454545454,6.783549783549783,0.45454545454545453,231,0.45454545454545453,33.81818181818182,java,1.0
72
+ 4.777798184597433,0.8349514563106796,0.6310679611650485,4.475728155339806,0.36893203883495146,103,0.36893203883495146,30.413793103448278,python,0.0
73
+ 4.955482172039427,0.7422680412371134,0.5257731958762887,5.556701030927835,0.4742268041237113,97,0.4742268041237113,29.88,python,0.0
74
+ 3.963119345888566,0.7291666666666666,0.5416666666666667,3.8333333333333335,0.4583333333333333,48,0.4583333333333333,29.2,python,1.0
75
+ 4.981129381113054,0.7857142857142857,0.5892857142857143,3.8214285714285716,0.4107142857142857,112,0.4107142857142857,47.69230769230769,python,0.0