Spaces:

sammoftah
/

smart-contract-security-scanner

Sleeping

File size: 6,175 Bytes

64a1c9c

"""
Smart Contract Security Scanner
Rule-based Solidity audit assistant with transparent findings and remediation notes.
"""

from pathlib import Path
import re

import pandas as pd
import plotly.express as px
import streamlit as st


st.set_page_config(page_title="Smart Contract Security Scanner", page_icon="🔒", layout="wide")


def load_shared_css() -> None:
    current_dir = Path(__file__).resolve().parent
    candidates = [
        current_dir / "shared" / "styles.css",
        current_dir.parent / "shared" / "styles.css",
    ]
    css_path = next(path for path in candidates if path.exists())
    st.markdown(f"<style>{css_path.read_text(encoding='utf-8')}</style>", unsafe_allow_html=True)


load_shared_css()


SAMPLE_CONTRACT = """pragma solidity ^0.8.0;

contract Vault {
    mapping(address => uint256) public balances;
    address public owner;

    function withdraw(uint256 amount) public {
        require(balances[msg.sender] >= amount);
        (bool ok, ) = msg.sender.call{value: amount}("");
        require(ok);
        balances[msg.sender] -= amount;
    }

    function emergencyTransfer(address target, bytes memory data) public {
        require(tx.origin == owner);
        target.delegatecall(data);
    }
}
"""


RULES = [
    {
        "id": "reentrancy-call-before-state-update",
        "pattern": r"\.call\{value:\s*[^}]+\}\([^)]*\)",
        "severity": "High",
        "reason": "External value transfer can re-enter before state is updated.",
        "fix": "Apply checks-effects-interactions, update balances before the call, or use ReentrancyGuard.",
    },
    {
        "id": "tx-origin-auth",
        "pattern": r"tx\.origin",
        "severity": "High",
        "reason": "tx.origin authentication can be phished through intermediary contracts.",
        "fix": "Use msg.sender and explicit role-based authorization.",
    },
    {
        "id": "delegatecall",
        "pattern": r"\bdelegatecall\b",
        "severity": "Critical",
        "reason": "delegatecall executes target code in this contract storage context.",
        "fix": "Avoid delegatecall unless target code is immutable, audited, and tightly authorized.",
    },
    {
        "id": "missing-events",
        "pattern": r"function\s+\w+[\s\S]{0,180}(balances\[|owner\s*=)",
        "severity": "Medium",
        "reason": "State-changing operations should emit events for monitoring and incident response.",
        "fix": "Emit events for withdrawals, ownership changes, and privileged actions.",
    },
    {
        "id": "unchecked-low-level-call",
        "pattern": r"\.send\(|\.call\(",
        "severity": "Medium",
        "reason": "Low-level calls need explicit success handling and safe control flow.",
        "fix": "Check return values and prefer typed interfaces when possible.",
    },
]

SEVERITY_WEIGHT = {"Low": 1, "Medium": 2, "High": 4, "Critical": 6}


def line_number(source: str, index: int) -> int:
    return source[:index].count("\n") + 1


def scan_contract(source: str) -> pd.DataFrame:
    findings = []
    for rule in RULES:
        for match in re.finditer(rule["pattern"], source, re.IGNORECASE):
            findings.append({
                "rule": rule["id"],
                "severity": rule["severity"],
                "line": line_number(source, match.start()),
                "evidence": source[match.start():match.end()].replace("\n", " ")[:120],
                "reason": rule["reason"],
                "fix": rule["fix"],
            })
    return pd.DataFrame(findings)


def risk_score(findings: pd.DataFrame) -> int:
    if findings.empty:
        return 0
    raw = sum(SEVERITY_WEIGHT[item] for item in findings["severity"])
    return min(100, int(raw / 18 * 100))


st.markdown("""
<div class="hero">
  <div class="hf-badge">AI Safety + Static Analysis</div>
  <h1>🔒 Smart Contract Security Scanner</h1>
  <p>Inspect Solidity code for high-signal vulnerability patterns, explain the risk, and produce remediation notes.</p>
  <div class="pill-row">
    <span class="hf-chip">Reentrancy</span>
    <span class="hf-chip">Authorization flaws</span>
    <span class="hf-chip">Audit-ready output</span>
  </div>
</div>
""", unsafe_allow_html=True)

with st.sidebar:
    st.markdown("### Scope")
    st.info("This Space is a transparent audit assistant, not a formal verification engine. It is useful for education, first-pass triage, and building labeled vulnerability datasets.")
    include_medium = st.checkbox("Show medium severity findings", value=True)

source = st.text_area("Solidity source", value=SAMPLE_CONTRACT, height=360)
findings = scan_contract(source)
if not include_medium and not findings.empty:
    findings = findings[findings["severity"].isin(["High", "Critical"])]

score = risk_score(findings)
metric_cols = st.columns(3)
metric_cols[0].metric("Risk score", f"{score}/100")
metric_cols[1].metric("Findings", len(findings))
metric_cols[2].metric("Critical/High", int(findings["severity"].isin(["Critical", "High"]).sum()) if not findings.empty else 0)

tab1, tab2, tab3 = st.tabs(["Findings", "Risk Breakdown", "How To Extend"])

with tab1:
    if findings.empty:
        st.success("No configured rule triggered. Add more rules before treating this as safe.")
    else:
        st.dataframe(findings, use_container_width=True, hide_index=True)

with tab2:
    if findings.empty:
        st.info("No chart to show yet.")
    else:
        counts = findings.groupby("severity").size().reset_index(name="count")
        fig = px.bar(
            counts,
            x="severity",
            y="count",
            color="severity",
            color_discrete_map={"Critical": "#111827", "High": "#e8935c", "Medium": "#b8a9d9", "Low": "#7accff"},
            title="Finding count by severity",
        )
        st.plotly_chart(fig, use_container_width=True)

with tab3:
    st.markdown("""
### HF-Native Extension Path

- Publish scanned snippets and labels as a Hugging Face Dataset.
- Fine-tune or evaluate a code model on vulnerability explanations.
- Add Slither/Mythril output as additional features.
- Use a Space to compare rule-based, model-based, and hybrid triage.
""")