Spaces:

Boray
/

radon-cc-analysis

Sleeping

Boray

bugfix on upload data

8023e92 9 days ago

25.5 kB

	import json
	from pathlib import Path
	from typing import Any, Dict, List

	import pandas as pd
	import plotly.express as px
	import plotly.graph_objects as go
	import streamlit as st

	st.set_page_config(
	page_title="Radon Complexity Analyzer",
	page_icon="📊",
	layout="wide",
	initial_sidebar_state="expanded",
	)

	# Custom CSS for better styling
	st.markdown(
	"""
	<style>
	.grade-A { color: #2ecc71; font-weight: bold; }
	.grade-B { color: #f39c12; font-weight: bold; }
	.grade-C { color: #e74c3c; font-weight: bold; }
	.grade-D { color: #e67e22; font-weight: bold; }
	.grade-F { color: #c0392b; font-weight: bold; }
	.metric-high { background-color: #ffe6e6; }
	.metric-medium { background-color: #fff3cd; }
	.metric-low { background-color: #d4edda; }
	</style>
	""",
	unsafe_allow_html=True,
	)


	def get_grade_color(grade: str) -> str:
	"""Get color for grade"""
	colors = {
	"A": "#2ecc71", # Green
	"B": "#f39c12", # Orange
	"C": "#e74c3c", # Red
	"D": "#e67e22", # Dark Orange
	"E": "#d35400", # Darker Orange
	"F": "#c0392b", # Dark Red
	}
	return colors.get(grade, "#95a5a6")


	def get_complexity_color(complexity: int, high_threshold: int = 10) -> str:
	"""Get color based on complexity value"""
	if complexity <= 3:
	return "#2ecc71" # Green - Simple
	elif complexity <= 7:
	return "#f39c12" # Orange - Moderate
	elif complexity <= high_threshold:
	return "#e74c3c" # Red - Complex
	else:
	return "#c0392b" # Dark Red - Very Complex


	def flatten_report(report: Dict[str, List[Dict]]) -> pd.DataFrame:
	"""Convert nested JSON report to flattened DataFrame"""
	rows = []

	for filepath, items in report.items():
	if not isinstance(items, list):
	continue

	for item in items:
	row = {
	"filepath": filepath,
	"type": item.get("type", "N/A"),
	"name": item.get("name", "N/A"),
	"classname": item.get("classname", ""),
	"complexity": item.get("complexity", 0),
	"rank": item.get("rank", "N/A"),
	"lineno": item.get("lineno", 0),
	"endline": item.get("endline", 0),
	"col_offset": item.get("col_offset", 0),
	}
	rows.append(row)

	# Add nested methods/closures
	if item.get("methods"):
	for method in item["methods"]:
	method_row = row.copy()
	method_row.update(
	{
	"type": method.get("type", "method"),
	"name": method.get("name", "N/A"),
	"complexity": method.get("complexity", 0),
	"rank": method.get("rank", "N/A"),
	"lineno": method.get("lineno", 0),
	"endline": method.get("endline", 0),
	"col_offset": method.get("col_offset", 0),
	"parent_name": item.get("name", ""),
	}
	)
	rows.append(method_row)

	if item.get("closures"):
	for closure in item["closures"]:
	closure_row = row.copy()
	closure_row.update(
	{
	"type": closure.get("type", "closure"),
	"name": closure.get("name", "N/A"),
	"complexity": closure.get("complexity", 0),
	"rank": closure.get("rank", "N/A"),
	"lineno": closure.get("lineno", 0),
	"endline": closure.get("endline", 0),
	"col_offset": closure.get("col_offset", 0),
	"parent_name": item.get("name", ""),
	}
	)
	rows.append(closure_row)

	return pd.DataFrame(rows)


	def display_grade_badge(grade: str) -> str:
	"""Create colored grade badge"""
	color = get_grade_color(grade)
	return f'<span style="background-color: {color}; color: white; padding: 4px 8px; border-radius: 4px; font-weight: bold;">{grade}</span>'


	def identify_risky_items(
	df: pd.DataFrame, complexity_threshold: int = 10, risky_grades: List[str] = None
	) -> pd.DataFrame:
	"""Identify items that need investigation"""
	if risky_grades is None:
	risky_grades = ["D", "E", "F"]

	risky = df[
	(df["complexity"] >= complexity_threshold) \| (df["rank"].isin(risky_grades))
	]
	return risky.sort_values("complexity", ascending=False)


	def create_complexity_chart(df: pd.DataFrame):
	"""Create a chart showing complexity distribution"""
	complexity_dist = df["complexity"].value_counts().sort_index()
	fig = go.Figure(data=[go.Bar(x=complexity_dist.index, y=complexity_dist.values)])
	fig.update_layout(
	title="Complexity Distribution",
	xaxis_title="Complexity Level",
	yaxis_title="Count",
	hovermode="x unified",
	)
	return fig


	def create_grade_chart(df: pd.DataFrame):
	"""Create a chart showing grade distribution"""
	grade_dist = df["rank"].value_counts()
	grade_order = ["A", "B", "C", "D", "E", "F"]
	grade_dist = grade_dist.reindex(
	[g for g in grade_order if g in grade_dist.index], fill_value=0
	)

	colors = [get_grade_color(g) for g in grade_dist.index]
	fig = go.Figure(
	data=[go.Bar(x=grade_dist.index, y=grade_dist.values, marker_color=colors)]
	)
	fig.update_layout(
	title="Grade Distribution",
	xaxis_title="Grade",
	yaxis_title="Count",
	hovermode="x unified",
	)
	return fig


	def create_scatter_plot(df: pd.DataFrame):
	"""Create scatter plot of complexity vs files"""
	# Add a column with just the filename for display
	df_plot = df.copy()
	df_plot["filename"] = df_plot["filepath"].apply(lambda x: x.split("/")[-1])

	fig = px.scatter(
	df_plot,
	x="filename",
	y="complexity",
	color="rank",
	hover_data=["name", "type", "lineno", "filepath"],
	title="Complexity by File and Grade",
	color_discrete_map={g: get_grade_color(g) for g in df_plot["rank"].unique()},
	height=600,
	)
	fig.update_layout(xaxis_tickangle=-45, xaxis_title="File")
	return fig


	# Initialize session state
	if "report_data" not in st.session_state:
	st.session_state.report_data = None
	if "df" not in st.session_state:
	st.session_state.df = None

	# Sidebar for file upload
	st.sidebar.title("📊 Radon Report Analyzer")

	# File upload
	uploaded_file = st.sidebar.file_uploader(
	"Upload JSON Report",
	type=["json"],
	help="Upload the cyclomatic complexity report from radon library",
	)

	if uploaded_file:
	try:
	report_data = json.load(uploaded_file)
	st.session_state.report_data = report_data
	st.session_state.df = flatten_report(report_data)
	# Reset scatter plot file selection when new file is uploaded
	if "selected_scatter_files" in st.session_state:
	del st.session_state.selected_scatter_files
	st.sidebar.success("✅ Report loaded successfully!")
	except json.JSONDecodeError:
	st.sidebar.error("❌ Invalid JSON file")
	except Exception as e:
	st.sidebar.error(f"❌ Error loading file: {str(e)}")

	# Main app logic
	if st.session_state.df is not None and len(st.session_state.df) > 0:
	df = st.session_state.df.copy()

	# drop duplicate rows by name (drop the one with NaN parrent)
	df = df.drop_duplicates(subset=["name", "filepath", "lineno"], keep="first")

	# Create tabs
	tab1, tab2, tab3, tab4 = st.tabs(
	["📈 Overview", "🔍 Analysis", "⚠️ Warnings", "📋 Details"]
	)

	# ===== TAB 1: OVERVIEW =====
	with tab1:
	col1, col2, col3, col4 = st.columns(4)

	with col1:
	st.metric("Total Items", len(df))
	with col2:
	st.metric("Total Files", df["filepath"].nunique())
	with col3:
	avg_complexity = df["complexity"].mean()
	st.metric("Avg Complexity", f"{avg_complexity:.2f}")
	with col4:
	max_complexity = df["complexity"].max()
	st.metric("Max Complexity", max_complexity)

	st.divider()

	col1, col2 = st.columns(2)
	with col1:
	st.plotly_chart(create_complexity_chart(df), use_container_width=True)
	with col2:
	st.plotly_chart(create_grade_chart(df), use_container_width=True)

	st.divider()
	st.subheader("📍 Complexity by File and Grade")

	# File filter for scatter plot - show only filename, not full path
	filepath_to_filename = {fp: fp.split("/")[-1] for fp in df["filepath"].unique()}
	filename_to_filepath = {v: k for k, v in filepath_to_filename.items()}

	# Initialize selected files in session state if not exists
	if "selected_scatter_files" not in st.session_state:
	st.session_state.selected_scatter_files = sorted(
	filepath_to_filename.values()
	)

	# Select all / Remove all buttons
	col_btn1, col_btn2, col_spacer = st.columns([1, 1, 6])
	with col_btn1:
	if st.button("Select All", use_container_width=True):
	st.session_state.selected_scatter_files = sorted(
	filepath_to_filename.values()
	)
	st.rerun()
	with col_btn2:
	if st.button("Remove All", use_container_width=True):
	st.session_state.selected_scatter_files = []
	st.rerun()

	st.write("Select files to display:")
	scatter_file_filter_display = st.pills(
	"Filter files",
	options=sorted(filepath_to_filename.values()),
	selection_mode="multi",
	default=st.session_state.selected_scatter_files,
	label_visibility="collapsed",
	key="scatter_plot_file_filter",
	)

	# Update session state
	st.session_state.selected_scatter_files = (
	scatter_file_filter_display if scatter_file_filter_display else []
	)

	# Convert selected filenames back to full paths
	scatter_file_filter = [
	filename_to_filepath[fn] for fn in (scatter_file_filter_display or [])
	]

	# Apply file filter for scatter plot
	if scatter_file_filter:
	scatter_df = df[df["filepath"].isin(scatter_file_filter)]
	else:
	scatter_df = pd.DataFrame() # Empty dataframe when no files selected

	if len(scatter_df) > 0:
	st.plotly_chart(create_scatter_plot(scatter_df), use_container_width=True)
	else:
	st.info("No data to display. Please select at least one file.")

	# ===== TAB 2: ANALYSIS WITH FILTERS =====
	with tab2:
	st.subheader("Filter & Sort Data")

	col1, col2, col3, col4 = st.columns(4)

	with col1:
	type_filter = st.multiselect(
	"Type",
	options=df["type"].unique(),
	default=df["type"].unique(),
	help="Filter by item type",
	)

	with col2:
	grade_filter = st.multiselect(
	"Grade",
	options=sorted(df["rank"].unique()),
	default=sorted(df["rank"].unique()),
	help="Filter by grade",
	)

	with col3:
	complexity_range = st.slider(
	"Complexity Range",
	min_value=int(df["complexity"].min()),
	max_value=int(df["complexity"].max()),
	value=(int(df["complexity"].min()), int(df["complexity"].max())),
	help="Filter by complexity level",
	)

	with col4:
	filepath_filter = st.multiselect(
	"Files",
	options=sorted(df["filepath"].unique()),
	default=sorted(df["filepath"].unique()),
	help="Filter by file",
	)

	# Apply filters
	filtered_df = df[
	(df["type"].isin(type_filter))
	& (df["rank"].isin(grade_filter))
	& (df["complexity"] >= complexity_range[0])
	& (df["complexity"] <= complexity_range[1])
	& (df["filepath"].isin(filepath_filter))
	]

	col1, col2 = st.columns(2)
	with col1:
	sort_by = st.selectbox(
	"Sort by",
	options=[
	"Complexity (High→Low)",
	"Complexity (Low→High)",
	"Grade (Best→Worst)",
	"Name (A→Z)",
	"File Path",
	"Line Number",
	],
	help="Sort the filtered results",
	)

	with col2:
	search_term = st.text_input(
	"Search by name", help="Search for specific function/class names"
	)

	# Apply sorting
	if sort_by == "Complexity (High→Low)":
	filtered_df = filtered_df.sort_values("complexity", ascending=False)
	elif sort_by == "Complexity (Low→High)":
	filtered_df = filtered_df.sort_values("complexity", ascending=True)
	elif sort_by == "Grade (Best→Worst)":
	grade_order = {"A": 1, "B": 2, "C": 3, "D": 4, "F": 5}
	filtered_df = filtered_df.sort_values(
	"rank", key=lambda x: x.map(grade_order)
	)
	elif sort_by == "Name (A→Z)":
	filtered_df = filtered_df.sort_values("name")
	elif sort_by == "File Path":
	filtered_df = filtered_df.sort_values("filepath")
	elif sort_by == "Line Number":
	filtered_df = filtered_df.sort_values("lineno")

	# Apply search
	if search_term:
	filtered_df = filtered_df[
	filtered_df["name"].str.contains(search_term, case=False, na=False)
	]

	st.info(f"Showing {len(filtered_df)} of {len(df)} items")

	# Display table with color coding
	def style_dataframe(val, column):
	if column == "rank":
	color = get_grade_color(val)
	return f"background-color: {color}; color: white; font-weight: bold;"
	elif column == "complexity":
	color = get_complexity_color(int(val))
	return f"background-color: {color}; color: white;"
	return ""

	display_df = filtered_df[
	["filepath", "type", "name", "complexity", "rank", "lineno", "endline"]
	].copy()
	display_df = display_df.reset_index(drop=True)

	st.dataframe(
	display_df,
	use_container_width=True,
	column_config={
	"complexity": st.column_config.NumberColumn(width="small"),
	"rank": st.column_config.TextColumn(width="small"),
	"lineno": st.column_config.NumberColumn(width="small"),
	"endline": st.column_config.NumberColumn(width="small"),
	"type": st.column_config.TextColumn(width="small"),
	},
	)

	# ===== TAB 3: WARNINGS =====
	with tab3:
	st.subheader("⚠️ Items Requiring Investigation")

	col1, col2 = st.columns(2)
	with col1:
	complexity_threshold = st.slider(
	"Complexity Threshold",
	min_value=1,
	max_value=int(df["complexity"].max()),
	value=10,
	help="Items with complexity >= this value will be flagged",
	)

	with col2:
	risky_grades = st.multiselect(
	"Risky Grades",
	options=["A", "B", "C", "D", "E", "F"],
	default=["D", "E", "F"],
	help="Grades considered risky",
	)

	risky_df = identify_risky_items(df, complexity_threshold, risky_grades)

	if len(risky_df) > 0:
	st.warning(f"⚠️ Found {len(risky_df)} items that need investigation")

	# Group by severity
	col1, col2 = st.columns(2)
	with col1:
	high_risk = risky_df[risky_df["complexity"] >= complexity_threshold + 5]
	st.metric("High Risk (Very High Complexity)", len(high_risk))

	with col2:
	bad_grade = risky_df[risky_df["rank"].isin(["D", "F"])]
	st.metric("Bad Grade Items", len(bad_grade))

	st.divider()

	# Detailed view of risky items
	for idx, (_, row) in enumerate(risky_df.head(20).iterrows(), 1):
	with st.expander(
	f"🚨 {row['name']} (Complexity: {row['complexity']}, Grade: {row['rank']})",
	expanded=(idx == 1),
	):
	col1, col2, col3, col4 = st.columns(4)
	with col1:
	st.metric("Complexity", row["complexity"])
	with col2:
	st.write(f"Grade: {row['rank']}")
	with col3:
	st.write(f"Type: {row['type']}")
	with col4:
	st.write(f"Lines: {row['lineno']}-{row['endline']}")

	st.write(f"File: `{row['filepath']}`")
	full_name = (
	f"{row['classname']}.{row['name']}"
	if row["type"] == "method"
	else row["name"]
	)
	st.write(f"Full Name: `{full_name}`")

	# Recommendation
	if row["complexity"] >= complexity_threshold + 5:
	st.error("🔴 CRITICAL: This needs immediate refactoring")
	elif row["complexity"] >= complexity_threshold:
	st.warning(
	"🟠 HIGH: Consider breaking this into smaller functions"
	)

	if row["rank"] in ["D", "E", "F"]:
	st.warning(
	f"Grade {row['rank']}: Code quality is poor, refactoring recommended"
	)
	else:
	st.success("✅ No risky items found! Your code looks good.")

	# ===== TAB 4: DETAILED VIEW =====
	with tab4:
	st.subheader("Detailed Item Analysis")

	# Select item to analyze
	df_display = df.copy()
	df_display["display_name"] = df_display.apply(
	lambda x: f"{x['name']} ({x['type']}) - {x['filepath'].split('/')[-1]}",
	axis=1,
	)

	selected_item = st.selectbox(
	"Select an item to analyze",
	options=df_display.index,
	format_func=lambda x: df_display.loc[x, "display_name"],
	)

	if selected_item is not None:
	item = df.iloc[selected_item]

	# Header with grade badge
	col1, col2 = st.columns([3, 1])
	with col1:
	st.title(item["name"])
	with col2:
	grade_html = display_grade_badge(item["rank"])
	st.markdown(grade_html, unsafe_allow_html=True)

	st.divider()

	# Detailed metrics
	col1, col2, col3, col4, col5 = st.columns(5)
	with col1:
	st.metric("Complexity", item["complexity"])
	with col2:
	st.metric("Type", item["type"])
	with col3:
	st.metric("Start Line", int(item["lineno"]))
	with col4:
	st.metric("End Line", int(item["endline"]))
	with col5:
	st.metric("Lines of Code", int(item["endline"] - item["lineno"] + 1))

	st.divider()

	# File and location info
	col1, col2 = st.columns(2)
	with col1:
	st.write("File Path:")
	st.code(item["filepath"], language="text")
	with col2:
	st.write("Location:")
	st.code(
	f"Line {int(item['lineno'])} to {int(item['endline'])}, Column {int(item['col_offset'])}",
	language="text",
	)

	if item["classname"]:
	st.write("Class Name:")
	st.code(item["classname"], language="text")

	st.divider()

	# Recommendations
	st.subheader("💡 Recommendations")

	complexity = int(item["complexity"])
	if complexity <= 3:
	st.success(
	"✅ Simple: This code is easy to understand and maintain."
	)
	elif complexity <= 7:
	st.info(
	"ℹ️ Moderate: Code is reasonably complex. Consider breaking into smaller functions if it exceeds 7."
	)
	elif complexity <= 10:
	st.warning(
	"⚠️ Complex: This code is complex and may be difficult to maintain. Consider refactoring."
	)
	else:
	st.error(
	"🔴 Very Complex: This code needs immediate refactoring. Break it into smaller, testable units."
	)

	if item["rank"] in ["D", "E", "F"]:
	st.error(
	f"📉 Grade {item['rank']}: Code quality needs improvement."
	)

	else:
	# Landing page
	st.title("📊 Radon Complexity Analyzer")
	st.markdown(
	"""
	Welcome to the Radon Cyclomatic Complexity Analyzer!

	This tool helps you analyze and visualize Python code complexity reports from the radon library.

	### Features:
	- 📈 Overview: See complexity distribution across your codebase
	- 🔍 Analysis: Filter, sort, and search for specific functions/classes
	- ⚠️ Warnings: Identify items that need immediate attention
	- 📋 Details: Get detailed analysis and recommendations for each item

	### How to use:
	1. Generate a radon complexity report as JSON:
	```bash
	radon cc your_project/ -j > report.json
	```
	2. Upload the JSON file using the sidebar
	3. Explore and analyze your code complexity!
	"""
	)

	# Create sample data for demonstration
	st.divider()
	st.subheader("Or try with sample data:")

	if st.button("Load Sample Report"):
	sample_file_path = Path(__file__).parent / "sample_report.json"

	try:
	with open(sample_file_path, "r") as f:
	sample_report = json.load(f)

	st.session_state.report_data = sample_report
	st.session_state.df = flatten_report(sample_report)
	# Reset scatter plot file selection when sample is loaded
	if "selected_scatter_files" in st.session_state:
	del st.session_state.selected_scatter_files
	st.success("✅ Sample data loaded! Refresh the page to see the analysis.")
	st.rerun()
	except FileNotFoundError:
	st.error("❌ Sample report file not found. Please upload your own report.")
	except Exception as e:
	st.error(f"❌ Error loading sample data: {str(e)}")
	# sample_report = {
	# "example/settings.py": [
	# {
	# "type": "class",
	# "rank": "A",
	# "lineno": 7,
	# "complexity": 1,
	# "endline": 8,
	# "name": "DBSettings",
	# "col_offset": 0,
	# "methods": [],
	# },
	# {
	# "type": "class",
	# "rank": "B",
	# "lineno": 11,
	# "complexity": 5,
	# "endline": 13,
	# "name": "ComplexSettings",
	# "col_offset": 0,
	# "methods": [
	# {
	# "type": "method",
	# "rank": "C",
	# "lineno": 12,
	# "classname": "ComplexSettings",
	# "complexity": 8,
	# "endline": 13,
	# "name": "validate",
	# "col_offset": 4,
	# "closures": [],
	# }
	# ],
	# },
	# ],
	# "example/base.py": [
	# {
	# "type": "function",
	# "rank": "F",
	# "lineno": 1,
	# "complexity": 15,
	# "endline": 50,
	# "name": "complex_function",
	# "col_offset": 0,
	# }
	# ],
	# }

	# st.session_state.report_data = sample_report
	# st.session_state.df = flatten_report(sample_report)
	# st.success("✅ Sample data loaded! Refresh the page to see the analysis.")
	# st.rerun()