Spaces:

teoat
/

zenith-backend

Paused

App Files Files Community

zenith-backend / app /services /workflow /code_quality_improvement.py

teoat

Upload folder using huggingface_hub

4ae946d verified 3 months ago

raw

history blame contribute delete

22.7 kB

	#!/usr/bin/env python3
	"""
	Code Quality Improvement Service
	Automated technical debt reduction and code smell remediation
	"""

	import ast
	import logging
	import os
	from collections.abc import Callable
	from dataclasses import dataclass, field
	from datetime import datetime
	from enum import Enum
	from typing import Any

	logger = logging.getLogger(__name__)


	class CodeSmell(Enum):
	LONG_METHOD = "long_method"
	LARGE_CLASS = "large_class"
	DUPLICATE_CODE = "duplicate_code"
	COMPLEX_CONDITIONAL = "complex_conditional"
	LONG_PARAMETER_LIST = "long_parameter_list"
	DATA_CLASS = "data_class"
	FEATURE_ENVY = "feature_envy"
	MESSAGE_CHAIN = "message_chain"
	MIDDLE_MAN = "middle_man"
	INAPPROPRIATE_INTIMACY = "inappropriate_intimacy"


	class DebtPriority(Enum):
	LOW = "low"
	MEDIUM = "medium"
	HIGH = "high"
	CRITICAL = "critical"


	@dataclass
	class CodeIssue:
	"""Represents a code quality issue"""

	issue_id: str
	file_path: str
	line_number: int
	smell_type: CodeSmell
	severity: DebtPriority
	description: str
	code_snippet: str
	estimated_effort: str # "quick_fix", "refactor", "major_rework"
	automated_fix_available: bool
	impact_score: float # 0.0 to 1.0
	identified_at: datetime


	@dataclass
	class RefactoringTask:
	"""Represents a refactoring task"""

	task_id: str
	issue_ids: list[str]
	title: str
	description: str
	priority: DebtPriority
	estimated_effort_days: float
	status: str # "pending", "in_progress", "completed", "blocked"
	assigned_to: str \| None = None
	created_at: datetime = field(default_factory=datetime.now)
	completed_at: datetime \| None = None
	automated: bool = False


	@dataclass
	class CodeQualityMetrics:
	"""Code quality metrics"""

	total_lines: int
	cyclomatic_complexity_avg: float
	duplication_percentage: float
	test_coverage: float
	technical_debt_ratio: float
	maintainability_index: float
	issues_count: int
	issues_fixed: int


	class CodeQualityImprovementService:
	"""Automated code quality improvement and technical debt reduction"""

	def __init__(self):
	self.code_issues: dict[str, CodeIssue] = {}
	self.refactoring_tasks: dict[str, RefactoringTask] = {}
	self.code_metrics: dict[str, CodeQualityMetrics] = {}
	self.automated_fixes: dict[CodeSmell, Callable] = {}

	self._initialize_automated_fixes()
	self._setup_code_analysis()

	def _initialize_automated_fixes(self):
	"""Initialize automated code fixes"""
	self.automated_fixes = {
	CodeSmell.LONG_METHOD: self._fix_long_method,
	CodeSmell.DUPLICATE_CODE: self._fix_duplicate_code,
	CodeSmell.LONG_PARAMETER_LIST: self._fix_long_parameter_list,
	CodeSmell.COMPLEX_CONDITIONAL: self._fix_complex_conditional,
	}

	def _setup_code_analysis(self):
	"""Setup code analysis tools"""
	self.analysis_rules = {
	"max_method_length": 30,
	"max_class_length": 300,
	"max_parameters": 5,
	"max_complexity": 10,
	"duplicate_threshold": 0.8, # 80% similarity
	}

	async def analyze_codebase(
	self, root_path: str = "/Users/Arief/Desktop/Zenith"
	) -> dict[str, Any]:
	"""Comprehensive codebase analysis"""
	logger.info(f"Starting codebase analysis for: {root_path}")

	analysis_results = {
	"files_analyzed": 0,
	"issues_found": 0,
	"automated_fixes_available": 0,
	"technical_debt_estimate": 0,
	"issues_by_type": {},
	"issues_by_severity": {},
	}

	# Find Python files
	python_files = []
	for root, dirs, files in os.walk(root_path):
	# Skip certain directories
	dirs[:] = [
	d
	for d in dirs
	if not d.startswith(".")
	and d not in ["node_modules", "__pycache__", ".git"]
	]

	for file in files:
	if file.endswith(".py"):
	python_files.append(os.path.join(root, file))

	logger.info(f"Found {len(python_files)} Python files to analyze")

	for file_path in python_files[:50]: # Limit for performance
	try:
	issues = await self._analyze_file(file_path)
	analysis_results["files_analyzed"] += 1

	for issue in issues:
	self.code_issues[issue.issue_id] = issue
	analysis_results["issues_found"] += 1

	if issue.automated_fix_available:
	analysis_results["automated_fixes_available"] += 1

	# Categorize issues
	issue_type = issue.smell_type.value
	severity = issue.severity.value

	analysis_results["issues_by_type"][issue_type] = (
	analysis_results["issues_by_type"].get(issue_type, 0) + 1
	)
	analysis_results["issues_by_severity"][severity] = (
	analysis_results["issues_by_severity"].get(severity, 0) + 1
	)

	# Estimate technical debt
	effort_multiplier = {
	"quick_fix": 0.5,
	"refactor": 2,
	"major_rework": 5,
	}
	analysis_results[
	"technical_debt_estimate"
	] += issue.impact_score * effort_multiplier.get(
	issue.estimated_effort, 1
	)

	except Exception as e:
	logger.error(f"Failed to analyze {file_path}: {e}")

	# Calculate overall metrics
	analysis_results["technical_debt_hours"] = analysis_results[
	"technical_debt_estimate"
	]
	analysis_results["code_quality_score"] = max(
	0, 100 - (analysis_results["issues_found"] * 2)
	)

	logger.info(
	f"Analysis complete: {analysis_results['issues_found']} issues found in {analysis_results['files_analyzed']} files"
	)

	return analysis_results

	async def _analyze_file(self, file_path: str) -> list[CodeIssue]:
	"""Analyze a single Python file for code smells"""
	issues = []

	try:
	with open(file_path, encoding="utf-8") as f:
	content = f.read()

	lines = content.split("\n")
	tree = ast.parse(content, file_path)

	# Analyze AST for various smells
	issues.extend(self._detect_long_methods(tree, file_path, lines))
	issues.extend(self._detect_large_classes(tree, file_path, lines))
	issues.extend(self._detect_long_parameter_lists(tree, file_path, lines))
	issues.extend(self._detect_complex_conditionals(tree, file_path, lines))

	# Text-based analysis
	issues.extend(self._detect_duplicate_code(content, file_path, lines))

	except SyntaxError:
	logger.warning(f"Syntax error in {file_path}, skipping AST analysis")
	except Exception as e:
	logger.error(f"Error analyzing {file_path}: {e}")

	return issues

	def _detect_long_methods(
	self, tree: ast.AST, file_path: str, lines: list[str]
	) -> list[CodeIssue]:
	"""Detect methods that are too long"""
	issues = []

	for node in ast.walk(tree):
	if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
	method_length = node.end_lineno - node.lineno

	if method_length > self.analysis_rules["max_method_length"]:
	severity = (
	DebtPriority.HIGH if method_length > 50 else DebtPriority.MEDIUM
	)

	issue = CodeIssue(
	issue_id=f"long_method_{file_path}_{node.lineno}",
	file_path=file_path,
	line_number=node.lineno,
	smell_type=CodeSmell.LONG_METHOD,
	severity=severity,
	description=f"Method '{node.name}' is {method_length} lines long (max recommended: {self.analysis_rules['max_method_length']})",
	code_snippet="\n".join(
	lines[node.lineno - 1 : node.lineno + 5]
	),
	estimated_effort="refactor",
	automated_fix_available=True,
	impact_score=min(1.0, method_length / 100),
	identified_at=datetime.now(),
	)
	issues.append(issue)

	return issues

	def _detect_large_classes(
	self, tree: ast.AST, file_path: str, lines: list[str]
	) -> list[CodeIssue]:
	"""Detect classes that are too large"""
	issues = []

	for node in ast.walk(tree):
	if isinstance(node, ast.ClassDef):
	class_length = node.end_lineno - node.lineno

	if class_length > self.analysis_rules["max_class_length"]:
	issue = CodeIssue(
	issue_id=f"large_class_{file_path}_{node.lineno}",
	file_path=file_path,
	line_number=node.lineno,
	smell_type=CodeSmell.LARGE_CLASS,
	severity=DebtPriority.HIGH,
	description=f"Class '{node.name}' is {class_length} lines long (max recommended: {self.analysis_rules['max_class_length']})",
	code_snippet="\n".join(
	lines[node.lineno - 1 : node.lineno + 3]
	),
	estimated_effort="major_rework",
	automated_fix_available=False,
	impact_score=min(1.0, class_length / 500),
	identified_at=datetime.now(),
	)
	issues.append(issue)

	return issues

	def _detect_long_parameter_lists(
	self, tree: ast.AST, file_path: str, lines: list[str]
	) -> list[CodeIssue]:
	"""Detect functions with too many parameters"""
	issues = []

	for node in ast.walk(tree):
	if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
	param_count = len(node.args.args)

	if param_count > self.analysis_rules["max_parameters"]:
	issue = CodeIssue(
	issue_id=f"long_params_{file_path}_{node.lineno}",
	file_path=file_path,
	line_number=node.lineno,
	smell_type=CodeSmell.LONG_PARAMETER_LIST,
	severity=DebtPriority.MEDIUM,
	description=f"Function '{node.name}' has {param_count} parameters (max recommended: {self.analysis_rules['max_parameters']})",
	code_snippet="\n".join(
	lines[node.lineno - 1 : node.lineno + 2]
	),
	estimated_effort="refactor",
	automated_fix_available=True,
	impact_score=min(1.0, param_count / 10),
	identified_at=datetime.now(),
	)
	issues.append(issue)

	return issues

	def _detect_complex_conditionals(
	self, tree: ast.AST, file_path: str, lines: list[str]
	) -> list[CodeIssue]:
	"""Detect complex conditional statements"""
	issues = []

	for node in ast.walk(tree):
	if isinstance(node, ast.If):
	# Calculate complexity based on nested conditions
	complexity = self._calculate_conditional_complexity(node)

	if complexity > self.analysis_rules["max_complexity"]:
	issue = CodeIssue(
	issue_id=f"complex_conditional_{file_path}_{node.lineno}",
	file_path=file_path,
	line_number=node.lineno,
	smell_type=CodeSmell.COMPLEX_CONDITIONAL,
	severity=DebtPriority.MEDIUM,
	description=f"Complex conditional with complexity score {complexity} (max recommended: {self.analysis_rules['max_complexity']})",
	code_snippet="\n".join(
	lines[node.lineno - 1 : node.lineno + 3]
	),
	estimated_effort="refactor",
	automated_fix_available=True,
	impact_score=min(1.0, complexity / 20),
	identified_at=datetime.now(),
	)
	issues.append(issue)

	return issues

	def _calculate_conditional_complexity(self, node: ast.If, depth: int = 1) -> int:
	"""Calculate complexity of conditional statement"""
	complexity = depth

	# Check for and/or operators
	if hasattr(node.test, "left"):
	complexity += 1

	# Check nested if statements
	if node.orelse:
	for child in node.orelse:
	if isinstance(child, ast.If):
	complexity += self._calculate_conditional_complexity(
	child, depth + 1
	)

	return complexity

	def _detect_duplicate_code(
	self, content: str, file_path: str, lines: list[str]
	) -> list[CodeIssue]:
	"""Detect duplicate code blocks"""
	issues = []

	# Simple duplicate detection - check for repeated code blocks
	code_blocks = []
	for i, line in enumerate(lines):
	if line.strip() and not line.strip().startswith("#"):
	# Extract code blocks of 3-5 lines
	if i + 2 < len(lines):
	block = "\n".join(lines[i : i + 3]).strip()
	if len(block) > 20: # Minimum block size
	code_blocks.append((i + 1, block))

	# Find duplicates
	seen_blocks = {}
	for line_num, block in code_blocks:
	if block in seen_blocks:
	# Found duplicate
	original_line = seen_blocks[block]

	issue = CodeIssue(
	issue_id=f"duplicate_code_{file_path}_{line_num}",
	file_path=file_path,
	line_number=line_num,
	smell_type=CodeSmell.DUPLICATE_CODE,
	severity=DebtPriority.MEDIUM,
	description=f"Duplicate code block found (original at line {original_line})",
	code_snippet=block[:100] + "..." if len(block) > 100 else block,
	estimated_effort="refactor",
	automated_fix_available=True,
	impact_score=0.6,
	identified_at=datetime.now(),
	)
	issues.append(issue)
	else:
	seen_blocks[block] = line_num

	return issues

	async def apply_automated_fixes(self) -> dict[str, Any]:
	"""Apply all available automated fixes"""
	results = {
	"fixes_attempted": 0,
	"fixes_successful": 0,
	"fixes_failed": 0,
	"issues_resolved": [],
	}

	for issue in self.code_issues.values():
	if issue.automated_fix_available and not self._is_issue_resolved(issue):
	results["fixes_attempted"] += 1

	try:
	success = await self._apply_fix(issue)
	if success:
	results["fixes_successful"] += 1
	results["issues_resolved"].append(issue.issue_id)
	logger.info(f"Successfully fixed issue: {issue.issue_id}")
	else:
	results["fixes_failed"] += 1
	logger.warning(f"Failed to fix issue: {issue.issue_id}")

	except Exception as e:
	results["fixes_failed"] += 1
	logger.error(f"Error fixing issue {issue.issue_id}: {e}")

	return results

	async def _apply_fix(self, issue: CodeIssue) -> bool:
	"""Apply automated fix for a specific issue"""
	if issue.smell_type in self.automated_fixes:
	return await self.automated_fixes[issue.smell_type](issue)
	return False

	async def _fix_long_method(self, issue: CodeIssue) -> bool:
	"""Apply automated fix for long method"""
	# This would require more sophisticated code analysis and transformation
	# For now, we'll create a refactoring task
	task = RefactoringTask(
	task_id=f"refactor_{issue.issue_id}",
	issue_ids=[issue.issue_id],
	title=f"Refactor long method: {issue.description}",
	description="Break down long method into smaller, focused functions",
	priority=issue.severity,
	estimated_effort_days=2.0,
	status="pending",
	automated=False,
	)

	self.refactoring_tasks[task.task_id] = task
	return True

	async def _fix_duplicate_code(self, issue: CodeIssue) -> bool:
	"""Apply automated fix for duplicate code"""
	# Extract common functionality to a utility function
	task = RefactoringTask(
	task_id=f"refactor_{issue.issue_id}",
	issue_ids=[issue.issue_id],
	title=f"Extract duplicate code: {issue.description}",
	description="Create utility function for duplicated code block",
	priority=issue.severity,
	estimated_effort_days=1.0,
	status="pending",
	automated=False,
	)

	self.refactoring_tasks[task.task_id] = task
	return True

	async def _fix_long_parameter_list(self, issue: CodeIssue) -> bool:
	"""Apply automated fix for long parameter list"""
	# Introduce parameter object pattern
	task = RefactoringTask(
	task_id=f"refactor_{issue.issue_id}",
	issue_ids=[issue.issue_id],
	title=f"Refactor parameter list: {issue.description}",
	description="Introduce parameter object to reduce parameter count",
	priority=issue.severity,
	estimated_effort_days=1.5,
	status="pending",
	automated=False,
	)

	self.refactoring_tasks[task.task_id] = task
	return True

	async def _fix_complex_conditional(self, issue: CodeIssue) -> bool:
	"""Apply automated fix for complex conditional"""
	# Extract method or use strategy pattern
	task = RefactoringTask(
	task_id=f"refactor_{issue.issue_id}",
	issue_ids=[issue.issue_id],
	title=f"Simplify complex conditional: {issue.description}",
	description="Extract conditional logic into separate method or use strategy pattern",
	priority=issue.severity,
	estimated_effort_days=1.5,
	status="pending",
	automated=False,
	)

	self.refactoring_tasks[task.task_id] = task
	return True

	def _is_issue_resolved(self, issue: CodeIssue) -> bool:
	"""Check if an issue has been resolved"""
	# In production, this would check if the issue still exists in the code
	# For now, assume issues are not resolved
	return False

	async def generate_refactoring_plan(self) -> dict[str, Any]:
	"""Generate comprehensive refactoring plan"""
	plan = {
	"total_issues": len(self.code_issues),
	"automated_fixes": len(
	[i for i in self.code_issues.values() if i.automated_fix_available]
	),
	"refactoring_tasks": len(self.refactoring_tasks),
	"estimated_effort_days": sum(
	t.estimated_effort_days for t in self.refactoring_tasks.values()
	),
	"tasks_by_priority": {},
	"tasks_by_type": {},
	}

	# Group tasks by priority and type
	for task in self.refactoring_tasks.values():
	priority = task.priority.value
	plan["tasks_by_priority"][priority] = (
	plan["tasks_by_priority"].get(priority, 0) + 1
	)

	# Determine task type from title
	if "duplicate" in task.title.lower():
	task_type = "duplicate_code"
	elif "parameter" in task.title.lower():
	task_type = "parameter_refactor"
	elif "conditional" in task.title.lower():
	task_type = "conditional_simplify"
	elif "method" in task.title.lower():
	task_type = "method_refactor"
	else:
	task_type = "general_refactor"

	plan["tasks_by_type"][task_type] = (
	plan["tasks_by_type"].get(task_type, 0) + 1
	)

	return plan

	def get_quality_dashboard(self) -> dict[str, Any]:
	"""Get code quality dashboard"""
	total_issues = len(self.code_issues)
	resolved_issues = len(
	[i for i in self.code_issues.values() if self._is_issue_resolved(i)]
	)

	return {
	"total_issues": total_issues,
	"resolved_issues": resolved_issues,
	"resolution_rate": (
	resolved_issues / total_issues if total_issues > 0 else 0
	),
	"refactoring_tasks": len(self.refactoring_tasks),
	"issues_by_severity": self._get_issues_by_severity(),
	"issues_by_type": self._get_issues_by_type(),
	"estimated_debt_hours": sum(
	i.impact_score * 8 for i in self.code_issues.values()
	), # Rough estimate
	}

	def _get_issues_by_severity(self) -> dict[str, int]:
	"""Get issues count by severity"""
	severities = {}
	for issue in self.code_issues.values():
	severity = issue.severity.value
	severities[severity] = severities.get(severity, 0) + 1
	return severities

	def _get_issues_by_type(self) -> dict[str, int]:
	"""Get issues count by type"""
	types = {}
	for issue in self.code_issues.values():
	smell_type = issue.smell_type.value
	types[smell_type] = types.get(smell_type, 0) + 1
	return types


	# Global instance
	code_quality_improvement = CodeQualityImprovementService()