File size: 2,910 Bytes
b144cb7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
045661d
 
 
 
 
b144cb7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
"""
Hugging Face Spaces entrypoint (Gradio).
Uses the same backend as the Reflex UI via `reflex_ui.backend_bridge.run_prediction`.
"""

from __future__ import annotations

import os
import sys
from pathlib import Path

import gradio as gr
import pandas as pd

REPO_ROOT = Path(__file__).resolve().parent
os.chdir(REPO_ROOT)
if str(REPO_ROOT) not in sys.path:
    sys.path.insert(0, str(REPO_ROOT))

_reflex_pkg_parent = REPO_ROOT / "reflex_ui"
if str(_reflex_pkg_parent) not in sys.path:
    sys.path.insert(0, str(_reflex_pkg_parent))

_reflex_pkg_nested = _reflex_pkg_parent / "reflex_ui"
if str(_reflex_pkg_nested) not in sys.path:
    sys.path.insert(0, str(_reflex_pkg_nested))

from backend_bridge import run_prediction  # noqa: E402


def _predict(code: str, language: str, top_k: float):
    text = (code or "").strip()
    if not text:
        return (
            "*Paste a non-empty code snippet.*",
            "",
            pd.DataFrame(),
            "",
        )
    lang = (language or "python").lower()
    if lang not in ("python", "java"):
        lang = "python"
    k = int(top_k) if top_k else 6
    k = max(1, min(30, k))
    try:
        r = run_prediction(code=text, language=lang, top_k=k)
    except Exception as e:
        return (f"**{type(e).__name__}:** `{e}`", "", pd.DataFrame(), "")

    label_md = f"## Prediction: **{r.label}**\n\nP(AI) = **{r.prob_ai:.4f}**"
    grouped_lines = [
        f"- **{name}:** {val:.4f}" for name, val in sorted(r.grouped_importance.items(), key=lambda x: -x[1])
    ]
    grouped_md = "### Group importance\n" + ("\n".join(grouped_lines) if grouped_lines else "_n/a_")
    df = pd.DataFrame(r.shap_rows)
    if not df.empty and "impact" in df.columns:
        show = df.drop(columns=["impact"], errors="ignore")
    else:
        show = df
    return label_md, r.explanation, show, grouped_md


with gr.Blocks(title="AI vs Human Code Classifier") as demo:
    gr.Markdown(
        "# AI vs Human Code Classifier\n"
        "Classifies snippets as **AI** or **Human** using XGBoost on statistical, AST, "
        "stylometry, language, and **UnixCoder** semantic features, with **SHAP** attributions."
    )
    with gr.Row():
        language = gr.Radio(choices=["python", "java"], value="python", label="Language")
        top_k = gr.Slider(1, 20, value=6, step=1, label="Top SHAP features")
    code = gr.Code(label="Code", language="python", lines=18)
    run_btn = gr.Button("Run prediction", variant="primary")
    out_head = gr.Markdown()
    out_grouped = gr.Markdown()
    out_expl = gr.Markdown(label="Narrative explanation")
    out_table = gr.Dataframe(label="Top feature attributions", wrap=True)

    run_btn.click(
        _predict,
        inputs=[code, language, top_k],
        outputs=[out_head, out_expl, out_table, out_grouped],
    )


if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)