NajmiHassan1 commited on
Commit
0c1b49c
·
verified ·
1 Parent(s): c4304c8

Create smell_detector.py

Browse files
Files changed (1) hide show
  1. smell_detector.py +179 -0
smell_detector.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ast
2
+ import hashlib
3
+ import datetime
4
+
5
+ class CodeSmellDetector:
6
+ """Main class to detect various code smells in Python code."""
7
+
8
+ def __init__(self, function_length_limit=10, param_limit=5):
9
+ """
10
+ Initialize the code smell detector with configurable thresholds.
11
+
12
+ Args:
13
+ function_length_limit (int): Maximum acceptable function length in lines
14
+ param_limit (int): Maximum acceptable number of parameters for a function
15
+ """
16
+ self.function_length_limit = function_length_limit
17
+ self.param_limit = param_limit
18
+
19
+ def analyze_code(self, code_string, block_size=3):
20
+ """
21
+ Analyze Python code for various code smells.
22
+
23
+ Args:
24
+ code_string (str): Python code as a string
25
+ block_size (int): Size of code blocks to check for duplication
26
+
27
+ Returns:
28
+ dict: Results containing detected smells and duplicate blocks
29
+ """
30
+ # Parse the code into an AST
31
+ try:
32
+ tree = ast.parse(code_string)
33
+ except SyntaxError as e:
34
+ return {
35
+ "error": f"Syntax error in the provided code: {str(e)}",
36
+ "function_smells": [],
37
+ "duplicate_blocks": []
38
+ }
39
+
40
+ # Detect function-level code smells
41
+ function_visitor = FunctionSmellVisitor(self.function_length_limit, self.param_limit)
42
+ function_visitor.visit(tree)
43
+
44
+ # Detect duplicate code blocks
45
+ duplicate_blocks = self.find_duplicate_blocks(code_string, block_size)
46
+
47
+ return {
48
+ "function_smells": function_visitor.smells,
49
+ "duplicate_blocks": duplicate_blocks
50
+ }
51
+
52
+ def find_duplicate_blocks(self, source_code, block_size):
53
+ """
54
+ Find duplicate blocks of code.
55
+
56
+ Args:
57
+ source_code (str): The source code to analyze
58
+ block_size (int): Size of blocks to check for duplication
59
+
60
+ Returns:
61
+ list: List of tuples containing (line1, line2, block_content)
62
+ """
63
+ lines = [line.strip() for line in source_code.splitlines()]
64
+ hashes = {}
65
+ duplicates = []
66
+
67
+ for i in range(len(lines) - block_size + 1):
68
+ block = "\n".join(lines[i:i+block_size])
69
+ # Skip empty blocks
70
+ if not block.strip():
71
+ continue
72
+
73
+ h = hashlib.md5(block.encode()).hexdigest()
74
+
75
+ if h in hashes:
76
+ duplicates.append((i + 1, hashes[h] + 1, block))
77
+ else:
78
+ hashes[h] = i
79
+
80
+ return duplicates
81
+
82
+ def generate_report(self, filename, results):
83
+ """
84
+ Generate a markdown report for the analysis results.
85
+
86
+ Args:
87
+ filename (str): The name of the analyzed file
88
+ results (dict): Analysis results
89
+
90
+ Returns:
91
+ str: Markdown formatted report
92
+ """
93
+ now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
94
+
95
+ report = f"""# Code Smell Analysis Report
96
+ Generated: {now}
97
+ File: {filename}
98
+
99
+ ## Analysis Settings
100
+ - Function length limit: {self.function_length_limit} lines
101
+ - Parameter limit: {self.param_limit}
102
+ - Duplicate block size: {len(results['duplicate_blocks'][0][2].splitlines()) if results['duplicate_blocks'] else 'N/A'} lines
103
+
104
+ ## Function-level Code Smells
105
+ """
106
+ if results.get("function_smells"):
107
+ for smell in results["function_smells"]:
108
+ report += f"- {smell}\n"
109
+ else:
110
+ report += "No function-level code smells detected.\n"
111
+
112
+ report += "\n## Duplicate Code Blocks\n"
113
+ if results.get("duplicate_blocks"):
114
+ for idx, dup in enumerate(results["duplicate_blocks"]):
115
+ report += f"### Duplicate Block #{idx+1}\n"
116
+ report += f"- Found at lines {dup[0]} and {dup[1]}\n"
117
+ report += "```python\n"
118
+ report += dup[2] + "\n"
119
+ report += "```\n\n"
120
+ else:
121
+ report += "No duplicate blocks detected.\n"
122
+
123
+ return report
124
+
125
+
126
+ class FunctionSmellVisitor(ast.NodeVisitor):
127
+ """AST visitor to detect function-related code smells."""
128
+
129
+ def __init__(self, function_length_limit, param_limit):
130
+ """
131
+ Initialize with thresholds for function smells.
132
+
133
+ Args:
134
+ function_length_limit (int): Maximum acceptable function length
135
+ param_limit (int): Maximum acceptable parameter count
136
+ """
137
+ self.function_length_limit = function_length_limit
138
+ self.param_limit = param_limit
139
+ self.smells = []
140
+
141
+ def visit_FunctionDef(self, node):
142
+ """
143
+ Visit a function definition and check for smells.
144
+
145
+ Args:
146
+ node (ast.FunctionDef): The function node to analyze
147
+ """
148
+ # Check function length
149
+ start_line = node.lineno
150
+ end_line = 0
151
+
152
+ # Find the last line in the function body
153
+ for item in node.body:
154
+ if hasattr(item, 'lineno'):
155
+ end_line = max(end_line, item.lineno)
156
+
157
+ # If the item has an end_lineno attribute (Python 3.8+), use it
158
+ if hasattr(item, 'end_lineno') and item.end_lineno is not None:
159
+ end_line = max(end_line, item.end_lineno)
160
+
161
+ # Fall back if we couldn't determine end line
162
+ if end_line == 0:
163
+ end_line = start_line + len(node.body)
164
+
165
+ length = end_line - start_line + 1
166
+
167
+ if length > self.function_length_limit:
168
+ self.smells.append(
169
+ f"🔴 Long function '{node.name}' ({length} lines) at line {start_line}"
170
+ )
171
+
172
+ # Check parameter count
173
+ if len(node.args.args) > self.param_limit:
174
+ self.smells.append(
175
+ f"🟠 Function '{node.name}' has too many parameters ({len(node.args.args)}) at line {start_line}"
176
+ )
177
+
178
+ # Continue visiting child nodes
179
+ self.generic_visit(node)