Spaces:

MannuGaurav
/

ai-code-detector

Sleeping

File size: 14,875 Bytes

import warnings
warnings.filterwarnings('ignore')

import streamlit as st
import numpy as np
import joblib
import torch
import torch.nn as nn
import ast
from transformers import RobertaTokenizer, RobertaModel

# ── Page config ───────────────────────────────────────────────────────────────
st.set_page_config(
    page_title="AI Code Detector",
    page_icon="🔍",
    layout="centered"
)

# ── Device ────────────────────────────────────────────────────────────────────
device = torch.device('cpu')

# ── CodeBERT Architecture ─────────────────────────────────────────────────────
class CodeBERTClassifier(nn.Module):
    def __init__(self, dropout=0.1):
        super(CodeBERTClassifier, self).__init__()
        self.codebert   = RobertaModel.from_pretrained('microsoft/codebert-base')
        self.dropout    = nn.Dropout(dropout)
        self.classifier = nn.Linear(768, 2)

    def forward(self, input_ids, attention_mask):
        outputs    = self.codebert(
            input_ids=input_ids,
            attention_mask=attention_mask
        )
        cls_output = outputs.last_hidden_state[:, 0, :]
        cls_output = self.dropout(cls_output)
        return self.classifier(cls_output)

# ── Load models (cached so they load only once) ───────────────────────────────
@st.cache_resource
def load_models():
    scaler    = joblib.load("models/scaler.pkl")
    lr_model  = joblib.load("models/logistic_regression.pkl")
    svm_model = joblib.load("models/svm.pkl")
    rf_model  = joblib.load("models/random_forest.pkl")

    tokenizer = RobertaTokenizer.from_pretrained('microsoft/codebert-base')

    print("Loading CodeBERT weights...")
    cb_model = CodeBERTClassifier()

    state_dict = torch.load(
        "models/best_model.pt",
        map_location=device,
        weights_only=False      # required for cross-version compatibility
    )
    cb_model.load_state_dict(state_dict, strict=True)
    cb_model.eval()

    # Sanity check — verify model outputs non-trivial probabilities
    with torch.no_grad():
        dummy_ids   = torch.zeros(1, 512, dtype=torch.long)
        dummy_mask  = torch.ones(1, 512, dtype=torch.long)
        dummy_out   = cb_model(dummy_ids, dummy_mask)
        dummy_probs = torch.softmax(dummy_out, dim=1)[0].numpy()
        print(f"CodeBERT sanity check — Human: {dummy_probs[0]:.4f}, AI: {dummy_probs[1]:.4f}")
        if dummy_probs[0] > 0.9999:
            print("WARNING: CodeBERT may not have loaded correctly")
        else:
            print("CodeBERT loaded correctly")

    print("All models ready")

    return scaler, lr_model, svm_model, rf_model, tokenizer, cb_model

# ── Ensemble weights ──────────────────────────────────────────────────────────
_raw    = np.array([0.8179**4, 0.8708**4, 0.8860**4, 0.9983**4])
WEIGHTS = _raw / _raw.sum()

# ── Feature extraction ────────────────────────────────────────────────────────
def get_cyclomatic_complexity(func_node):
    count = 1
    for node in ast.walk(func_node):
        if isinstance(node, (ast.If, ast.For, ast.While, ast.ExceptHandler)):
            count += 1
        elif isinstance(node, ast.BoolOp):
            count += len(node.values) - 1
    return count


def get_max_nesting_depth(code):
    max_depth = 0
    for line in code.split('\n'):
        stripped = line.strip()
        if stripped == '' or stripped.startswith('#'):
            continue
        spaces    = len(line) - len(line.lstrip())
        max_depth = max(max_depth, spaces // 4)
    return max_depth


def get_variable_stats(func_node):
    names = []
    for node in ast.walk(func_node):
        if isinstance(node, ast.Assign):
            for target in node.targets:
                if isinstance(target, ast.Name):
                    names.append(target.id)
        elif isinstance(node, ast.AugAssign):
            if isinstance(node.target, ast.Name):
                names.append(node.target.id)
        elif isinstance(node, ast.AnnAssign):
            if isinstance(node.target, ast.Name):
                names.append(node.target.id)
    unique  = len(set(names))
    avg_len = round(np.mean([len(n) for n in names]), 2) if names else 0
    return unique, avg_len


def extract_features(code):
    try:
        lines         = code.split('\n')
        total_lines   = len(lines)
        blank_lines   = sum(1 for l in lines if l.strip() == '')
        comment_lines = sum(1 for l in lines if l.strip().startswith('#'))

        tree = ast.parse(code)
        if not tree.body or not isinstance(tree.body[0], ast.FunctionDef):
            return None

        func = tree.body[0]

        has_docstring   = 0
        docstring_lines = 0
        if (func.body and
                isinstance(func.body[0], ast.Expr) and
                isinstance(func.body[0].value, ast.Constant) and
                isinstance(func.body[0].value.value, str)):
            has_docstring   = 1
            docstring_lines = len(func.body[0].value.value.split('\n'))

        doc_lines  = docstring_lines if has_docstring else 0
        code_lines = max(
            total_lines - blank_lines - comment_lines - doc_lines, 1
        )
        non_blank       = [l for l in lines if l.strip() != '']
        avg_line_length = round(
            np.mean([len(l) for l in non_blank]), 2
        ) if non_blank else 0

        params         = func.args.args
        has_type_hints = 1 if (
            func.returns is not None or
            any(a.annotation is not None for a in params)
        ) else 0

        num_returns    = sum(1 for n in ast.walk(func) if isinstance(n, ast.Return))
        num_raises     = sum(1 for n in ast.walk(func) if isinstance(n, ast.Raise))
        num_assertions = sum(1 for n in ast.walk(func) if isinstance(n, ast.Assert))
        num_loops      = sum(1 for n in ast.walk(func)
                             if isinstance(n, (ast.For, ast.While)))
        num_exceptions = sum(1 for n in ast.walk(func)
                             if isinstance(n, ast.ExceptHandler))
        num_calls      = sum(1 for n in ast.walk(func) if isinstance(n, ast.Call))

        uses_list_comp = 1 if any(isinstance(n, ast.ListComp)
                                   for n in ast.walk(func)) else 0
        uses_lambda    = 1 if any(isinstance(n, ast.Lambda)
                                   for n in ast.walk(func)) else 0
        uses_with      = 1 if any(isinstance(n, ast.With)
                                   for n in ast.walk(func)) else 0
        uses_fstring   = 1 if any(isinstance(n, ast.JoinedStr)
                                   for n in ast.walk(func)) else 0

        nested_funcs = [n for n in ast.walk(func)
                        if isinstance(n, ast.FunctionDef) and n is not func]
        has_nested   = 1 if nested_funcs else 0

        num_vars, avg_var_len = get_variable_stats(func)

        return [
            code_lines, blank_lines, avg_line_length,
            get_cyclomatic_complexity(func), num_loops, num_exceptions,
            get_max_nesting_depth(code), num_returns,
            has_docstring, docstring_lines, comment_lines,
            num_vars, avg_var_len, has_type_hints,
            num_assertions, num_raises, uses_list_comp,
            uses_lambda, uses_fstring, uses_with,
            num_calls, has_nested
        ]
    except Exception:
        return None


# ── Prediction ────────────────────────────────────────────────────────────────
def predict(code, scaler, lr_model, svm_model, rf_model, tokenizer, cb_model):
    code = code.strip()

    if not code.startswith('def '):
        return None, "Input must start with 'def'. Please paste a complete Python function."

    try:
        tree = ast.parse(code)
    except SyntaxError as e:
        return None, f"Syntax error: {e}"

    if not tree.body or not isinstance(tree.body[0], ast.FunctionDef):
        return None, "No function definition found."

    features = extract_features(code)
    if features is None:
        return None, "Could not extract features. Check your input."

    features_arr    = np.array(features, dtype=float).reshape(1, -1)
    features_scaled = scaler.transform(features_arr)

    lr_prob  = lr_model.predict_proba(features_scaled)[0]
    svm_prob = svm_model.predict_proba(features_scaled)[0]
    rf_prob  = rf_model.predict_proba(features_arr)[0]

    lr_pred  = int(np.argmax(lr_prob))
    svm_pred = int(np.argmax(svm_prob))
    rf_pred  = int(np.argmax(rf_prob))

    encoding = tokenizer(
        code,
        max_length=512,
        padding='max_length',
        truncation=True,
        return_tensors='pt'
    )
    with torch.no_grad():
        logits  = cb_model(
            encoding['input_ids'],
            encoding['attention_mask']
        )
        cb_prob = torch.softmax(logits, dim=1)[0].numpy()
        cb_pred = int(np.argmax(cb_prob))

    ai_probs      = np.array([lr_prob[1], svm_prob[1], rf_prob[1], cb_prob[1]])
    ensemble_prob = float(np.dot(WEIGHTS, ai_probs))
    ensemble_pred = 1 if ensemble_prob >= 0.5 else 0

    results = {
        'ensemble_pred':  ensemble_pred,
        'ensemble_prob':  ensemble_prob,
        'lr_pred':  lr_pred,  'lr_prob':  lr_prob[1],
        'svm_pred': svm_pred, 'svm_prob': svm_prob[1],
        'rf_pred':  rf_pred,  'rf_prob':  rf_prob[1],
        'cb_pred':  cb_pred,  'cb_prob':  cb_prob[1],
        'features': features,
    }
    return results, None


# ── Streamlit UI ──────────────────────────────────────────────────────────────
st.title("🔍 AI Code Detector")
st.markdown(
    "Paste any standalone Python function to detect whether it was written "
    "by a **human** or generated by **AI**."
)

st.info(
    "**4 models with weighted ensemble:**  \n"
    "🔵 Logistic Regression (17%)  |  🟠 SVM (22%)  |  "
    "🟢 Random Forest (23%)  |  🔴 CodeBERT (38%)"
)

# Load models with spinner
with st.spinner("Loading models... (first load takes ~30 seconds)"):
    scaler, lr_model, svm_model, rf_model, tokenizer, cb_model = load_models()

st.success("All models loaded and ready.")

# Input
code_input = st.text_area(
    "Python Function",
    height=300,
    placeholder="Paste your Python function here...\n\ndef my_function(x, y):\n    result = x + y\n    return result",
)

# Detect button
if st.button("🔍 Detect", type="primary"):
    if not code_input or code_input.strip() == '':
        st.warning("Please paste a Python function first.")
    else:
        with st.spinner("Analysing... (CodeBERT may take 15-20 seconds on CPU)"):
            results, error = predict(
                code_input,
                scaler, lr_model, svm_model,
                rf_model, tokenizer, cb_model
            )

        if error:
            st.error(error)
        else:
            # Verdict
            if results['ensemble_pred'] == 1:
                prob_pct = results['ensemble_prob'] * 100
                st.error(f"## 🤖 AI GENERATED  —  {prob_pct:.1f}% AI probability")
            else:
                prob_pct = (1 - results['ensemble_prob']) * 100
                st.success(f"## 👤 HUMAN WRITTEN  —  {prob_pct:.1f}% Human probability")

            # Individual models
            st.markdown("### Individual Model Predictions")
            col1, col2, col3, col4 = st.columns(4)

            def model_card(col, name, pred, prob):
                label = "🤖 AI" if pred == 1 else "👤 Human"
                col.metric(name, label, f"{prob*100:.1f}% AI")

            model_card(col1, "🔵 LR",       results['lr_pred'],  results['lr_prob'])
            model_card(col2, "🟠 SVM",      results['svm_pred'], results['svm_prob'])
            model_card(col3, "🟢 RF",       results['rf_pred'],  results['rf_prob'])
            model_card(col4, "🔴 CodeBERT", results['cb_pred'],  results['cb_prob'])

            # Ensemble weights
            st.markdown("### Ensemble Weights")
            weights_data = {
                "Model": ["Logistic Regression", "SVM", "Random Forest", "CodeBERT"],
                "Weight": ["17.0%", "21.9%", "23.4%", "37.7%"],
                "F1 Score": ["0.818", "0.871", "0.886", "0.998"],
            }
            import pandas as pd
            st.table(pd.DataFrame(weights_data))

            # Features
            st.markdown("### Key Features Extracted")
            f = results['features']
            feat_col1, feat_col2 = st.columns(2)
            with feat_col1:
                st.markdown(f"- **code_lines:** {f[0]}")
                st.markdown(f"- **blank_lines:** {f[1]}")
                st.markdown(f"- **avg_line_length:** {f[2]}")
                st.markdown(f"- **cyclomatic_complexity:** {f[3]}")
                st.markdown(f"- **has_docstring:** {'Yes' if f[8] else 'No'}")
            with feat_col2:
                st.markdown(f"- **docstring_lines:** {f[9]}")
                st.markdown(f"- **num_comments:** {f[10]}")
                st.markdown(f"- **num_function_calls:** {f[20]}")
                st.markdown(f"- **num_unique_variables:** {f[11]}")
                st.markdown(f"- **avg_var_name_length:** {f[12]}")

# Example functions
with st.expander("Show example functions to test"):
    st.markdown("**Example 1 — Likely Human Written:**")
    st.code('''def calculate_statistics(data):
    """Calculate basic statistics for a dataset."""
    if not data:
        raise ValueError("Data cannot be empty")
    sorted_data = sorted(data)
    n = len(sorted_data)
    mean = sum(sorted_data) / n
    if n % 2 == 0:
        median = (sorted_data[n//2 - 1] + sorted_data[n//2]) / 2
    else:
        median = sorted_data[n//2]
    variance = sum((x - mean) ** 2 for x in sorted_data) / n
    return {"mean": round(mean, 4), "median": round(median, 4),
            "std": round(variance ** 0.5, 4)}''', language="python")

    st.markdown("**Example 2 — Likely AI Generated:**")
    st.code('''def add_numbers(a, b):
    result = a + b
    return result''', language="python")