Spaces:

Boray
/

radon-cc-analysis

Sleeping

File size: 25,465 Bytes

import json
from pathlib import Path
from typing import Any, Dict, List

import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import streamlit as st

st.set_page_config(
    page_title="Radon Complexity Analyzer",
    page_icon="📊",
    layout="wide",
    initial_sidebar_state="expanded",
)

# Custom CSS for better styling
st.markdown(
    """
    <style>
    .grade-A { color: #2ecc71; font-weight: bold; }
    .grade-B { color: #f39c12; font-weight: bold; }
    .grade-C { color: #e74c3c; font-weight: bold; }
    .grade-D { color: #e67e22; font-weight: bold; }
    .grade-F { color: #c0392b; font-weight: bold; }
    .metric-high { background-color: #ffe6e6; }
    .metric-medium { background-color: #fff3cd; }
    .metric-low { background-color: #d4edda; }
    </style>
    """,
    unsafe_allow_html=True,
)


def get_grade_color(grade: str) -> str:
    """Get color for grade"""
    colors = {
        "A": "#2ecc71",  # Green
        "B": "#f39c12",  # Orange
        "C": "#e74c3c",  # Red
        "D": "#e67e22",  # Dark Orange
        "E": "#d35400",  # Darker Orange
        "F": "#c0392b",  # Dark Red
    }
    return colors.get(grade, "#95a5a6")


def get_complexity_color(complexity: int, high_threshold: int = 10) -> str:
    """Get color based on complexity value"""
    if complexity <= 3:
        return "#2ecc71"  # Green - Simple
    elif complexity <= 7:
        return "#f39c12"  # Orange - Moderate
    elif complexity <= high_threshold:
        return "#e74c3c"  # Red - Complex
    else:
        return "#c0392b"  # Dark Red - Very Complex


def flatten_report(report: Dict[str, List[Dict]]) -> pd.DataFrame:
    """Convert nested JSON report to flattened DataFrame"""
    rows = []

    for filepath, items in report.items():
        if not isinstance(items, list):
            continue

        for item in items:
            row = {
                "filepath": filepath,
                "type": item.get("type", "N/A"),
                "name": item.get("name", "N/A"),
                "classname": item.get("classname", ""),
                "complexity": item.get("complexity", 0),
                "rank": item.get("rank", "N/A"),
                "lineno": item.get("lineno", 0),
                "endline": item.get("endline", 0),
                "col_offset": item.get("col_offset", 0),
            }
            rows.append(row)

            # Add nested methods/closures
            if item.get("methods"):
                for method in item["methods"]:
                    method_row = row.copy()
                    method_row.update(
                        {
                            "type": method.get("type", "method"),
                            "name": method.get("name", "N/A"),
                            "complexity": method.get("complexity", 0),
                            "rank": method.get("rank", "N/A"),
                            "lineno": method.get("lineno", 0),
                            "endline": method.get("endline", 0),
                            "col_offset": method.get("col_offset", 0),
                            "parent_name": item.get("name", ""),
                        }
                    )
                    rows.append(method_row)

            if item.get("closures"):
                for closure in item["closures"]:
                    closure_row = row.copy()
                    closure_row.update(
                        {
                            "type": closure.get("type", "closure"),
                            "name": closure.get("name", "N/A"),
                            "complexity": closure.get("complexity", 0),
                            "rank": closure.get("rank", "N/A"),
                            "lineno": closure.get("lineno", 0),
                            "endline": closure.get("endline", 0),
                            "col_offset": closure.get("col_offset", 0),
                            "parent_name": item.get("name", ""),
                        }
                    )
                    rows.append(closure_row)

    return pd.DataFrame(rows)


def display_grade_badge(grade: str) -> str:
    """Create colored grade badge"""
    color = get_grade_color(grade)
    return f'<span style="background-color: {color}; color: white; padding: 4px 8px; border-radius: 4px; font-weight: bold;">{grade}</span>'


def identify_risky_items(
    df: pd.DataFrame, complexity_threshold: int = 10, risky_grades: List[str] = None
) -> pd.DataFrame:
    """Identify items that need investigation"""
    if risky_grades is None:
        risky_grades = ["D", "E", "F"]

    risky = df[
        (df["complexity"] >= complexity_threshold) | (df["rank"].isin(risky_grades))
    ]
    return risky.sort_values("complexity", ascending=False)


def create_complexity_chart(df: pd.DataFrame):
    """Create a chart showing complexity distribution"""
    complexity_dist = df["complexity"].value_counts().sort_index()
    fig = go.Figure(data=[go.Bar(x=complexity_dist.index, y=complexity_dist.values)])
    fig.update_layout(
        title="Complexity Distribution",
        xaxis_title="Complexity Level",
        yaxis_title="Count",
        hovermode="x unified",
    )
    return fig


def create_grade_chart(df: pd.DataFrame):
    """Create a chart showing grade distribution"""
    grade_dist = df["rank"].value_counts()
    grade_order = ["A", "B", "C", "D", "E", "F"]
    grade_dist = grade_dist.reindex(
        [g for g in grade_order if g in grade_dist.index], fill_value=0
    )

    colors = [get_grade_color(g) for g in grade_dist.index]
    fig = go.Figure(
        data=[go.Bar(x=grade_dist.index, y=grade_dist.values, marker_color=colors)]
    )
    fig.update_layout(
        title="Grade Distribution",
        xaxis_title="Grade",
        yaxis_title="Count",
        hovermode="x unified",
    )
    return fig


def create_scatter_plot(df: pd.DataFrame):
    """Create scatter plot of complexity vs files"""
    # Add a column with just the filename for display
    df_plot = df.copy()
    df_plot["filename"] = df_plot["filepath"].apply(lambda x: x.split("/")[-1])

    fig = px.scatter(
        df_plot,
        x="filename",
        y="complexity",
        color="rank",
        hover_data=["name", "type", "lineno", "filepath"],
        title="Complexity by File and Grade",
        color_discrete_map={g: get_grade_color(g) for g in df_plot["rank"].unique()},
        height=600,
    )
    fig.update_layout(xaxis_tickangle=-45, xaxis_title="File")
    return fig


# Initialize session state
if "report_data" not in st.session_state:
    st.session_state.report_data = None
if "df" not in st.session_state:
    st.session_state.df = None

# Sidebar for file upload
st.sidebar.title("📊 Radon Report Analyzer")

# File upload
uploaded_file = st.sidebar.file_uploader(
    "Upload JSON Report",
    type=["json"],
    help="Upload the cyclomatic complexity report from radon library",
)

if uploaded_file:
    try:
        report_data = json.load(uploaded_file)
        st.session_state.report_data = report_data
        st.session_state.df = flatten_report(report_data)
        # Reset scatter plot file selection when new file is uploaded
        if "selected_scatter_files" in st.session_state:
            del st.session_state.selected_scatter_files
        st.sidebar.success("✅ Report loaded successfully!")
    except json.JSONDecodeError:
        st.sidebar.error("❌ Invalid JSON file")
    except Exception as e:
        st.sidebar.error(f"❌ Error loading file: {str(e)}")

# Main app logic
if st.session_state.df is not None and len(st.session_state.df) > 0:
    df = st.session_state.df.copy()

    # drop duplicate rows by name (drop the one with NaN parrent)
    df = df.drop_duplicates(subset=["name", "filepath", "lineno"], keep="first")

    # Create tabs
    tab1, tab2, tab3, tab4 = st.tabs(
        ["📈 Overview", "🔍 Analysis", "⚠️ Warnings", "📋 Details"]
    )

    # ===== TAB 1: OVERVIEW =====
    with tab1:
        col1, col2, col3, col4 = st.columns(4)

        with col1:
            st.metric("Total Items", len(df))
        with col2:
            st.metric("Total Files", df["filepath"].nunique())
        with col3:
            avg_complexity = df["complexity"].mean()
            st.metric("Avg Complexity", f"{avg_complexity:.2f}")
        with col4:
            max_complexity = df["complexity"].max()
            st.metric("Max Complexity", max_complexity)

        st.divider()

        col1, col2 = st.columns(2)
        with col1:
            st.plotly_chart(create_complexity_chart(df), use_container_width=True)
        with col2:
            st.plotly_chart(create_grade_chart(df), use_container_width=True)

        st.divider()
        st.subheader("📍 Complexity by File and Grade")

        # File filter for scatter plot - show only filename, not full path
        filepath_to_filename = {fp: fp.split("/")[-1] for fp in df["filepath"].unique()}
        filename_to_filepath = {v: k for k, v in filepath_to_filename.items()}

        # Initialize selected files in session state if not exists
        if "selected_scatter_files" not in st.session_state:
            st.session_state.selected_scatter_files = sorted(
                filepath_to_filename.values()
            )

        # Select all / Remove all buttons
        col_btn1, col_btn2, col_spacer = st.columns([1, 1, 6])
        with col_btn1:
            if st.button("Select All", use_container_width=True):
                st.session_state.selected_scatter_files = sorted(
                    filepath_to_filename.values()
                )
                st.rerun()
        with col_btn2:
            if st.button("Remove All", use_container_width=True):
                st.session_state.selected_scatter_files = []
                st.rerun()

        st.write("**Select files to display:**")
        scatter_file_filter_display = st.pills(
            "Filter files",
            options=sorted(filepath_to_filename.values()),
            selection_mode="multi",
            default=st.session_state.selected_scatter_files,
            label_visibility="collapsed",
            key="scatter_plot_file_filter",
        )

        # Update session state
        st.session_state.selected_scatter_files = (
            scatter_file_filter_display if scatter_file_filter_display else []
        )

        # Convert selected filenames back to full paths
        scatter_file_filter = [
            filename_to_filepath[fn] for fn in (scatter_file_filter_display or [])
        ]

        # Apply file filter for scatter plot
        if scatter_file_filter:
            scatter_df = df[df["filepath"].isin(scatter_file_filter)]
        else:
            scatter_df = pd.DataFrame()  # Empty dataframe when no files selected

        if len(scatter_df) > 0:
            st.plotly_chart(create_scatter_plot(scatter_df), use_container_width=True)
        else:
            st.info("No data to display. Please select at least one file.")

    # ===== TAB 2: ANALYSIS WITH FILTERS =====
    with tab2:
        st.subheader("Filter & Sort Data")

        col1, col2, col3, col4 = st.columns(4)

        with col1:
            type_filter = st.multiselect(
                "Type",
                options=df["type"].unique(),
                default=df["type"].unique(),
                help="Filter by item type",
            )

        with col2:
            grade_filter = st.multiselect(
                "Grade",
                options=sorted(df["rank"].unique()),
                default=sorted(df["rank"].unique()),
                help="Filter by grade",
            )

        with col3:
            complexity_range = st.slider(
                "Complexity Range",
                min_value=int(df["complexity"].min()),
                max_value=int(df["complexity"].max()),
                value=(int(df["complexity"].min()), int(df["complexity"].max())),
                help="Filter by complexity level",
            )

        with col4:
            filepath_filter = st.multiselect(
                "Files",
                options=sorted(df["filepath"].unique()),
                default=sorted(df["filepath"].unique()),
                help="Filter by file",
            )

        # Apply filters
        filtered_df = df[
            (df["type"].isin(type_filter))
            & (df["rank"].isin(grade_filter))
            & (df["complexity"] >= complexity_range[0])
            & (df["complexity"] <= complexity_range[1])
            & (df["filepath"].isin(filepath_filter))
        ]

        col1, col2 = st.columns(2)
        with col1:
            sort_by = st.selectbox(
                "Sort by",
                options=[
                    "Complexity (High→Low)",
                    "Complexity (Low→High)",
                    "Grade (Best→Worst)",
                    "Name (A→Z)",
                    "File Path",
                    "Line Number",
                ],
                help="Sort the filtered results",
            )

        with col2:
            search_term = st.text_input(
                "Search by name", help="Search for specific function/class names"
            )

        # Apply sorting
        if sort_by == "Complexity (High→Low)":
            filtered_df = filtered_df.sort_values("complexity", ascending=False)
        elif sort_by == "Complexity (Low→High)":
            filtered_df = filtered_df.sort_values("complexity", ascending=True)
        elif sort_by == "Grade (Best→Worst)":
            grade_order = {"A": 1, "B": 2, "C": 3, "D": 4, "F": 5}
            filtered_df = filtered_df.sort_values(
                "rank", key=lambda x: x.map(grade_order)
            )
        elif sort_by == "Name (A→Z)":
            filtered_df = filtered_df.sort_values("name")
        elif sort_by == "File Path":
            filtered_df = filtered_df.sort_values("filepath")
        elif sort_by == "Line Number":
            filtered_df = filtered_df.sort_values("lineno")

        # Apply search
        if search_term:
            filtered_df = filtered_df[
                filtered_df["name"].str.contains(search_term, case=False, na=False)
            ]

        st.info(f"Showing {len(filtered_df)} of {len(df)} items")

        # Display table with color coding
        def style_dataframe(val, column):
            if column == "rank":
                color = get_grade_color(val)
                return f"background-color: {color}; color: white; font-weight: bold;"
            elif column == "complexity":
                color = get_complexity_color(int(val))
                return f"background-color: {color}; color: white;"
            return ""

        display_df = filtered_df[
            ["filepath", "type", "name", "complexity", "rank", "lineno", "endline"]
        ].copy()
        display_df = display_df.reset_index(drop=True)

        st.dataframe(
            display_df,
            use_container_width=True,
            column_config={
                "complexity": st.column_config.NumberColumn(width="small"),
                "rank": st.column_config.TextColumn(width="small"),
                "lineno": st.column_config.NumberColumn(width="small"),
                "endline": st.column_config.NumberColumn(width="small"),
                "type": st.column_config.TextColumn(width="small"),
            },
        )

    # ===== TAB 3: WARNINGS =====
    with tab3:
        st.subheader("⚠️ Items Requiring Investigation")

        col1, col2 = st.columns(2)
        with col1:
            complexity_threshold = st.slider(
                "Complexity Threshold",
                min_value=1,
                max_value=int(df["complexity"].max()),
                value=10,
                help="Items with complexity >= this value will be flagged",
            )

        with col2:
            risky_grades = st.multiselect(
                "Risky Grades",
                options=["A", "B", "C", "D", "E", "F"],
                default=["D", "E", "F"],
                help="Grades considered risky",
            )

        risky_df = identify_risky_items(df, complexity_threshold, risky_grades)

        if len(risky_df) > 0:
            st.warning(f"⚠️ Found {len(risky_df)} items that need investigation")

            # Group by severity
            col1, col2 = st.columns(2)
            with col1:
                high_risk = risky_df[risky_df["complexity"] >= complexity_threshold + 5]
                st.metric("High Risk (Very High Complexity)", len(high_risk))

            with col2:
                bad_grade = risky_df[risky_df["rank"].isin(["D", "F"])]
                st.metric("Bad Grade Items", len(bad_grade))

            st.divider()

            # Detailed view of risky items
            for idx, (_, row) in enumerate(risky_df.head(20).iterrows(), 1):
                with st.expander(
                    f"🚨 {row['name']} (Complexity: {row['complexity']}, Grade: {row['rank']})",
                    expanded=(idx == 1),
                ):
                    col1, col2, col3, col4 = st.columns(4)
                    with col1:
                        st.metric("Complexity", row["complexity"])
                    with col2:
                        st.write(f"**Grade:** {row['rank']}")
                    with col3:
                        st.write(f"**Type:** {row['type']}")
                    with col4:
                        st.write(f"**Lines:** {row['lineno']}-{row['endline']}")

                    st.write(f"**File:** `{row['filepath']}`")
                    full_name = (
                        f"{row['classname']}.{row['name']}"
                        if row["type"] == "method"
                        else row["name"]
                    )
                    st.write(f"**Full Name:** `{full_name}`")

                    # Recommendation
                    if row["complexity"] >= complexity_threshold + 5:
                        st.error("🔴 **CRITICAL:** This needs immediate refactoring")
                    elif row["complexity"] >= complexity_threshold:
                        st.warning(
                            "🟠 **HIGH:** Consider breaking this into smaller functions"
                        )

                    if row["rank"] in ["D", "E", "F"]:
                        st.warning(
                            f"**Grade {row['rank']}:** Code quality is poor, refactoring recommended"
                        )
        else:
            st.success("✅ No risky items found! Your code looks good.")

    # ===== TAB 4: DETAILED VIEW =====
    with tab4:
        st.subheader("Detailed Item Analysis")

        # Select item to analyze
        df_display = df.copy()
        df_display["display_name"] = df_display.apply(
            lambda x: f"{x['name']} ({x['type']}) - {x['filepath'].split('/')[-1]}",
            axis=1,
        )

        selected_item = st.selectbox(
            "Select an item to analyze",
            options=df_display.index,
            format_func=lambda x: df_display.loc[x, "display_name"],
        )

        if selected_item is not None:
            item = df.iloc[selected_item]

            # Header with grade badge
            col1, col2 = st.columns([3, 1])
            with col1:
                st.title(item["name"])
            with col2:
                grade_html = display_grade_badge(item["rank"])
                st.markdown(grade_html, unsafe_allow_html=True)

            st.divider()

            # Detailed metrics
            col1, col2, col3, col4, col5 = st.columns(5)
            with col1:
                st.metric("Complexity", item["complexity"])
            with col2:
                st.metric("Type", item["type"])
            with col3:
                st.metric("Start Line", int(item["lineno"]))
            with col4:
                st.metric("End Line", int(item["endline"]))
            with col5:
                st.metric("Lines of Code", int(item["endline"] - item["lineno"] + 1))

            st.divider()

            # File and location info
            col1, col2 = st.columns(2)
            with col1:
                st.write("**File Path:**")
                st.code(item["filepath"], language="text")
            with col2:
                st.write("**Location:**")
                st.code(
                    f"Line {int(item['lineno'])} to {int(item['endline'])}, Column {int(item['col_offset'])}",
                    language="text",
                )

            if item["classname"]:
                st.write("**Class Name:**")
                st.code(item["classname"], language="text")

            st.divider()

            # Recommendations
            st.subheader("💡 Recommendations")

            complexity = int(item["complexity"])
            if complexity <= 3:
                st.success(
                    "✅ **Simple:** This code is easy to understand and maintain."
                )
            elif complexity <= 7:
                st.info(
                    "ℹ️ **Moderate:** Code is reasonably complex. Consider breaking into smaller functions if it exceeds 7."
                )
            elif complexity <= 10:
                st.warning(
                    "⚠️ **Complex:** This code is complex and may be difficult to maintain. Consider refactoring."
                )
            else:
                st.error(
                    "🔴 **Very Complex:** This code needs immediate refactoring. Break it into smaller, testable units."
                )

            if item["rank"] in ["D", "E", "F"]:
                st.error(
                    f"📉 **Grade {item['rank']}:** Code quality needs improvement."
                )

else:
    # Landing page
    st.title("📊 Radon Complexity Analyzer")
    st.markdown(
        """
    Welcome to the Radon Cyclomatic Complexity Analyzer!

    This tool helps you analyze and visualize Python code complexity reports from the **radon** library.

    ### Features:
    - 📈 **Overview:** See complexity distribution across your codebase
    - 🔍 **Analysis:** Filter, sort, and search for specific functions/classes
    - ⚠️ **Warnings:** Identify items that need immediate attention
    - 📋 **Details:** Get detailed analysis and recommendations for each item

    ### How to use:
    1. Generate a radon complexity report as JSON:
       ```bash
       radon cc your_project/ -j > report.json
       ```
    2. Upload the JSON file using the sidebar
    3. Explore and analyze your code complexity!
    """
    )

    # Create sample data for demonstration
    st.divider()
    st.subheader("Or try with sample data:")

    if st.button("Load Sample Report"):
        sample_file_path = Path(__file__).parent / "sample_report.json"

        try:
            with open(sample_file_path, "r") as f:
                sample_report = json.load(f)

            st.session_state.report_data = sample_report
            st.session_state.df = flatten_report(sample_report)
            # Reset scatter plot file selection when sample is loaded
            if "selected_scatter_files" in st.session_state:
                del st.session_state.selected_scatter_files
            st.success("✅ Sample data loaded! Refresh the page to see the analysis.")
            st.rerun()
        except FileNotFoundError:
            st.error("❌ Sample report file not found. Please upload your own report.")
        except Exception as e:
            st.error(f"❌ Error loading sample data: {str(e)}")
        # sample_report = {
        #     "example/settings.py": [
        #         {
        #             "type": "class",
        #             "rank": "A",
        #             "lineno": 7,
        #             "complexity": 1,
        #             "endline": 8,
        #             "name": "DBSettings",
        #             "col_offset": 0,
        #             "methods": [],
        #         },
        #         {
        #             "type": "class",
        #             "rank": "B",
        #             "lineno": 11,
        #             "complexity": 5,
        #             "endline": 13,
        #             "name": "ComplexSettings",
        #             "col_offset": 0,
        #             "methods": [
        #                 {
        #                     "type": "method",
        #                     "rank": "C",
        #                     "lineno": 12,
        #                     "classname": "ComplexSettings",
        #                     "complexity": 8,
        #                     "endline": 13,
        #                     "name": "validate",
        #                     "col_offset": 4,
        #                     "closures": [],
        #                 }
        #             ],
        #         },
        #     ],
        #     "example/base.py": [
        #         {
        #             "type": "function",
        #             "rank": "F",
        #             "lineno": 1,
        #             "complexity": 15,
        #             "endline": 50,
        #             "name": "complex_function",
        #             "col_offset": 0,
        #         }
        #     ],
        # }

        # st.session_state.report_data = sample_report
        # st.session_state.df = flatten_report(sample_report)
        # st.success("✅ Sample data loaded! Refresh the page to see the analysis.")
        # st.rerun()