diff --git "a/visualization_module.py" "b/visualization_module.py" --- "a/visualization_module.py" +++ "b/visualization_module.py" @@ -1,4 +1,3 @@ -# visualization_module.py import os import json import networkx as nx @@ -12,16 +11,22 @@ import google.generativeai as genai class RepositoryVisualizer: - """Handles visualization of GitHub repository data using Pyvis""" + """Handles visualization of GitHub repository data using Enhanced PyVis""" - def __init__(self, max_nodes: int = 150): + def __init__(self, config: Any = None, max_nodes: int = 150): """ Initialize the repository visualizer Args: - max_nodes: Maximum number of nodes to include in visualizations + config: Configuration object (optional) + max_nodes: Maximum number of nodes to include in visualizations (if config not provided) """ - self.max_nodes = max_nodes + # Handle both config object and direct parameters + if config is not None: + self.max_nodes = getattr(config, 'visualization_node_limit', 150) + else: + self.max_nodes = max_nodes + self.node_colors = { 'file': { 'py': '#3572A5', # Python (blue) @@ -43,10 +48,185 @@ class RepositoryVisualizer: 'issue': '#3498db', # Issue (blue) 'directory': '#2ecc71' # Directory (green) } + # Add group definitions for visualization + self.groups = { + 'files': {"color": {"background": "#3498db"}, "shape": "dot"}, + 'contributors': {"color": {"background": "#e74c3c"}, "shape": "diamond"}, + 'directories': {"color": {"background": "#2ecc71"}, "shape": "triangle"}, + 'issues': {"color": {"background": "#9b59b6"}, "shape": "star"} + } + + def _get_important_subgraph(self, graph: nx.Graph, max_nodes: int) -> nx.Graph: + """ + Get a subgraph containing the most important nodes + + Args: + graph: Input graph + max_nodes: Maximum number of nodes to include + + Returns: + Subgraph with most important nodes + """ + # Return original graph if it's already small enough + if len(graph.nodes) <= max_nodes: + return graph + + # Try different centrality measures + try: + # First try degree centrality + centrality = nx.degree_centrality(graph) + except: + # Fall back to simpler degree if that fails + centrality = {node: graph.degree(node) for node in graph.nodes()} + + # Sort nodes by importance + sorted_nodes = sorted(centrality.items(), key=lambda x: x[1], reverse=True) + + # Take top nodes + top_nodes = [node for node, _ in sorted_nodes[:max_nodes]] + + # Create subgraph + return graph.subgraph(top_nodes) + + def _extract_dependencies(self, file_contents: Dict) -> Dict[str, List[str]]: + """ + Extract file dependencies based on imports and includes + + Args: + file_contents: Dictionary of file contents + + Returns: + Dictionary mapping files to their dependencies + """ + dependencies = defaultdict(list) + + # Map of common import patterns by language + import_patterns = { + 'py': [ + r'^\s*import\s+(\w+)', # import module + r'^\s*from\s+(\w+)', # from module import + r'^\s*import\s+([\w.]+)' # import module.submodule + ], + 'js': [ + r'^\s*import.*from\s+[\'"](.+)[\'"]', # ES6 import + r'^\s*require\([\'"](.+)[\'"]\)', # CommonJS require + r'^\s*import\s+[\'"](.+)[\'"]' # Side-effect import + ], + 'java': [ + r'^\s*import\s+([\w.]+)' # Java import + ], + 'cpp': [ + r'^\s*#include\s+[<"](.+)[>"]' # C/C++ include + ], + 'go': [ + r'^\s*import\s+[\'"](.+)[\'"]', # Go single import + r'^\s*import\s+\(\s*[\'"](.+)[\'"]' # Go multiple imports + ] + } + + # Process each file + for filename, file_data in file_contents.items(): + # Get file extension + _, ext = os.path.splitext(filename) + ext = ext.lstrip('.').lower() if ext else '' + + # Skip if we don't have patterns for this language + if ext not in import_patterns: + continue + + # Get content + content = file_data.get('content', '') + if not content: + continue + + # Search for imports + lines = content.split('\n') + patterns = import_patterns[ext] + + for line in lines: + for pattern in patterns: + # Find imports + import_match = re.search(pattern, line) + if import_match: + imported = import_match.group(1) + + # Look for matching files + for target_file in file_contents.keys(): + target_name = os.path.basename(target_file) + target_module = os.path.splitext(target_name)[0] + + # Check if this might be the imported file + if imported == target_module or imported.endswith('.' + target_module): + dependencies[filename].append(target_file) + break + + return dependencies + + def _format_size(self, size_bytes: int) -> str: + """ + Format file size in human-readable format + + Args: + size_bytes: Size in bytes + + Returns: + Formatted size string + """ + if size_bytes < 1024: + return f"{size_bytes} bytes" + elif size_bytes < 1024 * 1024: + return f"{size_bytes / 1024:.1f} KB" + else: + return f"{size_bytes / (1024 * 1024):.1f} MB" + + def _add_directory_nodes(self, graph: nx.Graph) -> None: + """ + Add directory nodes to graph for hierarchical structure + + Args: + graph: NetworkX graph to modify + """ + file_nodes = [node for node, data in graph.nodes(data=True) + if data.get('type') == 'file'] + + # Extract unique directories + directories = set() + for filepath in file_nodes: + path_parts = os.path.dirname(filepath).split('/') + current_path = "" + + for part in path_parts: + if not part: # Skip empty parts + continue + + if current_path: + current_path = f"{current_path}/{part}" + else: + current_path = part + + directories.add(current_path) + + # Add directory nodes + for directory in directories: + if directory not in graph: + graph.add_node(directory, type='directory') + + # Connect files to their parent directories + for filepath in file_nodes: + parent_dir = os.path.dirname(filepath) + if parent_dir and parent_dir in graph: + graph.add_edge(filepath, parent_dir, type='parent') + + # Connect directories to their parents + for directory in directories: + parent_dir = os.path.dirname(directory) + if parent_dir and parent_dir in graph: + graph.add_edge(directory, parent_dir, type='parent') def create_repository_graph(self, knowledge_graph: nx.Graph, output_path: str = "repo_graph.html") -> str: """ Create an interactive visualization of the repository structure + Enhanced with better physics, filtering, and groups Args: knowledge_graph: NetworkX graph of repository data @@ -63,15 +243,22 @@ class RepositoryVisualizer: print(f"Graph has {len(graph.nodes())} nodes, limiting to {self.max_nodes} most important nodes") graph = self._get_important_subgraph(graph, self.max_nodes) - # Create Pyvis network + # Extract directories from file paths for hierarchical structure + self._add_directory_nodes(graph) + + # Create PyVis network with improved settings net = Network(height="750px", width="100%", notebook=False, directed=False, - bgcolor="#222222", font_color="white") + bgcolor="#222222", font_color="white", select_menu=True, filter_menu=True) + + # Add custom groups for better filtering + for group_name, group_props in self.groups.items(): + net.add_node(f"group_{group_name}", hidden=True, **group_props) # Customize physics for better visualization net.barnes_hut(gravity=-80000, central_gravity=0.3, spring_length=250, spring_strength=0.001, damping=0.09, overlap=0) - # Add nodes with appropriate styling + # Add nodes with appropriate styling and interactive features for node_id in graph.nodes(): node_data = graph.nodes[node_id] node_type = node_data.get('type', 'unknown') @@ -81,6 +268,7 @@ class RepositoryVisualizer: color = self.node_colors.get(node_type, {}).get('default', "#7F7F7F") shape = "dot" size = 15 + group = None if node_type == 'file': # Get file extension @@ -96,7 +284,10 @@ class RepositoryVisualizer: # Set title with additional info file_type = node_data.get('file_type', 'unknown') file_size = node_data.get('size', 0) - title = f"File: {node_id}
Type: {file_type}
Size: {self._format_size(file_size)}" + title = f"

{label}


Path: {node_id}
Type: {file_type}
Size: {self._format_size(file_size)}
" + + # Set group for filtering + group = 'files' elif node_type == 'contributor': # Contributor styling @@ -108,28 +299,36 @@ class RepositoryVisualizer: size = min(30, 15 + contributions / 20) label = node_id - title = f"Contributor: {node_id}
Contributions: {contributions}" + title = f"

Contributor: {node_id}


Contributions: {contributions}
" + + # Set group for filtering + group = 'contributors' elif node_type == 'directory': # Directory styling color = self.node_colors['directory'] shape = "triangle" label = os.path.basename(node_id) if node_id else "/" - title = f"Directory: {node_id}" + title = f"

Directory: {label}


Path: {node_id}
" + + # Set group for filtering + group = 'directories' else: # Default styling label = node_id - # Add node to network - net.add_node(node_id, label=label, title=title, color=color, shape=shape, size=size) + # Add node to network with searchable property and group + net.add_node(node_id, label=label, title=title, color=color, shape=shape, size=size, + group=group, searchable=True) - # Add edges with appropriate styling + # Add edges with appropriate styling and information for source, target, data in graph.edges(data=True): # Default edge properties width = 1 color = "#ffffff80" # Semi-transparent white title = f"{source} → {target}" + smooth = True # Enable smooth edges # Adjust based on edge data edge_type = data.get('type', 'default') @@ -139,23 +338,23 @@ class RepositoryVisualizer: width = min(10, 1 + weight / 5) if edge_type == 'co-occurrence': - title = f"Co-occurred in {weight} commits" + title = f"
Co-occurred in {weight} commits
Files modified together frequently
" color = "#9b59b680" # Semi-transparent purple elif edge_type == 'contribution': - title = f"Modified {weight} times" + title = f"
Modified {weight} times
By this contributor
" color = "#e74c3c80" # Semi-transparent red elif edge_type == 'imports': - title = f"Imports" + title = f"
Imports
This file imports the target
" color = "#3498db80" # Semi-transparent blue elif edge_type == 'parent': - title = f"Parent directory" + title = f"
Parent directory
" color = "#2ecc7180" # Semi-transparent green width = 1 # Fixed width for parent relationships - # Add edge to network - net.add_edge(source, target, title=title, width=width, color=color) + # Add edge to network with additional properties + net.add_edge(source, target, title=title, width=width, color=color, smooth=smooth, selectionWidth=width*1.5) - # Configure network options + # Configure network options with improved UI and interactivity options = """ var options = { "nodes": { @@ -201,21 +400,271 @@ class RepositoryVisualizer: "interaction": { "tooltipDelay": 200, "hideEdgesOnDrag": true, - "multiselect": true + "multiselect": true, + "hover": true, + "navigationButtons": true, + "keyboard": { + "enabled": true, + "speed": { + "x": 10, + "y": 10, + "zoom": 0.1 + }, + "bindToWindow": true + } + }, + "configure": { + "enabled": true, + "filter": ["physics", "nodes", "edges"], + "showButton": true + }, + "groups": { + "files": {"color": {"background": "#3498db"}, "shape": "dot"}, + "contributors": {"color": {"background": "#e74c3c"}, "shape": "diamond"}, + "directories": {"color": {"background": "#2ecc71"}, "shape": "triangle"}, + "issues": {"color": {"background": "#9b59b6"}, "shape": "star"} } } """ net.set_options(options) + + # Add search functionality and control buttons to the HTML + html_before = """ + + + + + Repository Visualization + + + +
+ + + + + +
+
+

Legend

+
Files
+
Contributors
+
Directories
+
Issues
+
Co-occurrence
+
Contribution
+
Imports
+
Parent
+
+ """ + + html_after = """ + + + + """ - # Save network visualization to HTML file + # Convert file_stats to JSON for the template + file_stats_json = json.dumps(file_stats) + + # Replace placeholder with actual data + html = html.replace('FILE_STATS', file_stats_json) + + # Save to file + with open(output_path, 'w', encoding='utf-8') as f: + f.write(html) + + return output_path + + # Save network visualization to HTML file with custom HTML net.save_graph(output_path) + + # Read the generated file + with open(output_path, 'r', encoding='utf-8') as f: + net_html = f.read() + + # Insert our custom HTML + net_html = net_html.replace('', html_before).replace('', html_after) + + # Write the modified file + with open(output_path, 'w', encoding='utf-8') as f: + f.write(net_html) return output_path def create_contributor_network(self, contributors: Dict, commits: List[Dict], output_path: str = "contributor_network.html") -> str: """ - Create a network visualization of contributor relationships + Create an enhanced network visualization of contributor relationships Args: contributors: Dictionary of contributor data @@ -254,14 +703,14 @@ class RepositoryVisualizer: else: graph.add_edge(author1, author2, weight=1, files={filename}, type='collaboration') - # Create Pyvis network + # Create Pyvis network with enhanced settings net = Network(height="750px", width="100%", notebook=False, directed=False, - bgcolor="#222222", font_color="white") + bgcolor="#222222", font_color="white", select_menu=True, filter_menu=True) # Configure physics net.barnes_hut(gravity=-5000, central_gravity=0.3, spring_length=150, spring_strength=0.05) - # Add nodes + # Add nodes with improved styling for login in graph.nodes(): # Get node data node_data = graph.nodes[login] @@ -270,11 +719,22 @@ class RepositoryVisualizer: # Scale size based on contributions size = 15 + min(20, contributions / 10) - # Add node - net.add_node(login, label=login, title=f"Contributor: {login}
Contributions: {contributions}", - color=self.node_colors['contributor'], shape="dot", size=size) + # Create detailed HTML tooltip + tooltip = f""" +
+

Contributor: {login}

+
+ Contributions: {contributions}
+ Activity Level: {"High" if contributions > 50 else "Medium" if contributions > 20 else "Low"} +
+ """ + + # Add node with improved metadata + net.add_node(login, label=login, title=tooltip, + color=self.node_colors['contributor'], shape="dot", size=size, + group='contributors', searchable=True) - # Add edges + # Add edges with enhanced information for source, target, data in graph.edges(data=True): weight = data.get('weight', 1) files = data.get('files', set()) @@ -282,18 +742,29 @@ class RepositoryVisualizer: # Scale width based on collaboration strength width = min(10, 1 + weight / 2) - # Create title with file information + # Create a better-formatted tooltip with file information file_list = "
".join(list(files)[:5]) if len(files) > 5: file_list += f"
...and {len(files) - 5} more" - title = f"Collaborated on {weight} files

Including:
{file_list}" + tooltip = f""" +
+

Collaboration

+
+ Contributors: {source} & {target}
+ Shared Files: {weight}
+ Collaboration Strength: {"Strong" if weight > 5 else "Medium" if weight > 2 else "Light"}
+
+ Example Files:
+ {file_list} +
+ """ - # Add edge - color = "#3498db80" # Semi-transparent blue - net.add_edge(source, target, title=title, width=width, color=color) + # Add edge with enhanced styling + color = "#3498db" + hex(min(255, 80 + (weight * 10)))[2:].zfill(2) # Vary opacity by weight + net.add_edge(source, target, title=tooltip, width=width, color=color, smooth=True) - # Configure options + # Configure options with enhanced UI options = """ var options = { "nodes": { @@ -311,9 +782,11 @@ class RepositoryVisualizer: "inherit": false }, "smooth": { - "type": "continuous" + "type": "continuous", + "forceDirection": "horizontal" }, - "shadow": true + "shadow": true, + "selectionWidth": 3 }, "physics": { "barnesHut": { @@ -328,19 +801,270 @@ class RepositoryVisualizer: "enabled": true, "iterations": 1000 } + }, + "interaction": { + "hover": true, + "tooltipDelay": 200, + "hideEdgesOnDrag": true, + "multiselect": true, + "navigationButtons": true + }, + "configure": { + "enabled": true, + "filter": ["physics", "nodes", "edges"], + "showButton": true } } """ net.set_options(options) - # Save to HTML file + # Add search and controls similar to repository graph + html_before = """ + + + + + Contributor Network + + + +
+ + + + + +
+
+

Network Statistics

+

Contributors: 0

+

Collaborations: 0

+

Avg. Collaborations: 0

+

Click on a contributor to see their relationships

+
+ """ + + html_after = """ + + + + """ + + # Save to HTML file with custom HTML net.save_graph(output_path) + + # Read the generated file + with open(output_path, 'r', encoding='utf-8') as f: + net_html = f.read() + + # Insert our custom HTML + net_html = net_html.replace('', html_before).replace('', html_after) + + # Write the modified file + with open(output_path, 'w', encoding='utf-8') as f: + f.write(net_html) return output_path def create_file_dependency_graph(self, file_contents: Dict, output_path: str = "dependency_graph.html") -> str: """ - Create a graph of file dependencies based on imports and references + Create an enhanced graph of file dependencies based on imports and references + Using direct PyVis implementation without relying on NetworkX Args: file_contents: Dictionary of file contents @@ -349,63 +1073,99 @@ class RepositoryVisualizer: Returns: Path to the saved HTML file """ - # Create graph for dependencies - graph = nx.DiGraph() - - # Process files to find dependencies - dependencies = self._extract_dependencies(file_contents) - - # Add file nodes - for filename in file_contents.keys(): - # Get file extension - _, ext = os.path.splitext(filename) - ext = ext.lstrip('.').lower() if ext else 'default' - - # Get file type - file_data = file_contents[filename] - file_type = file_data.get('type', 'unknown') - - # Add node - graph.add_node(filename, type='file', file_type=file_type, extension=ext) - - # Add dependency edges - for source, targets in dependencies.items(): - for target in targets: - graph.add_edge(source, target, type='imports') - - # Create Pyvis network + # Create PyVis network directly net = Network(height="750px", width="100%", notebook=False, directed=True, - bgcolor="#222222", font_color="white") + bgcolor="#222222", font_color="white", select_menu=True, filter_menu=True) # Customize physics net.barnes_hut(gravity=-10000, central_gravity=0.3, spring_length=200) - # Add nodes with styling - for filename in graph.nodes(): - # Get node data - node_data = graph.nodes[filename] - node_type = node_data.get('type', 'unknown') - - # Get file extension for color - ext = node_data.get('extension', 'default') - color = self.node_colors['file'].get(ext, self.node_colors['file']['default']) - - # Use filename as label - label = os.path.basename(filename) - - # Set title - file_type = node_data.get('file_type', 'unknown') - title = f"File: {filename}
Type: {file_type}" - - # Add node - net.add_node(filename, label=label, title=title, color=color, shape="dot", size=15) - - # Add edges - for source, target in graph.edges(): - net.add_edge(source, target, title=f"{source} imports {target}", - color="#2ecc7180", arrows="to") - - # Configure options + # Process files to find dependencies + dependencies = self._extract_dependencies(file_contents) + + # Keep track of added nodes to avoid duplicates + added_nodes = set() + + # Add file nodes with improved styling + for filename, targets in dependencies.items(): + if filename not in added_nodes: + # Get file extension for color + _, ext = os.path.splitext(filename) + ext = ext.lstrip('.').lower() if ext else 'default' + color = self.node_colors['file'].get(ext, self.node_colors['file']['default']) + + # Use filename as label + label = os.path.basename(filename) + + # Enhanced tooltip with file information + file_data = file_contents.get(filename, {}) + file_type = file_data.get('type', 'unknown') + file_size = file_data.get('size', 0) + + tooltip = f""" +
+

{label}

+
+ Path: {filename}
+ Type: {file_type}
+ Size: {self._format_size(file_size)}
+ Dependencies: {len(targets)} +
+ """ + + # Add node with improved styling and metadata + net.add_node(filename, label=label, title=tooltip, color=color, + shape="dot", size=15, group=ext, searchable=True) + added_nodes.add(filename) + + # Add target nodes if not already added + for target in targets: + if target not in added_nodes: + # Get file extension for color + _, ext = os.path.splitext(target) + ext = ext.lstrip('.').lower() if ext else 'default' + color = self.node_colors['file'].get(ext, self.node_colors['file']['default']) + + # Use filename as label + label = os.path.basename(target) + + # Enhanced tooltip with file information + file_data = file_contents.get(target, {}) + file_type = file_data.get('type', 'unknown') + file_size = file_data.get('size', 0) + + tooltip = f""" +
+

{label}

+
+ Path: {target}
+ Type: {file_type}
+ Size: {self._format_size(file_size)} +
+ """ + + # Add node with improved styling and metadata + net.add_node(target, label=label, title=tooltip, color=color, + shape="dot", size=15, group=ext, searchable=True) + added_nodes.add(target) + + # Add edges with improved styling + for source, targets in dependencies.items(): + for target in targets: + # Enhanced tooltip with relationship information + tooltip = f""" +
+

Dependency

+
+ {os.path.basename(source)} imports {os.path.basename(target)} +
+ """ + + # Add edge with improved styling + net.add_edge(source, target, title=tooltip, arrows="to", + color="#2ecc7180", smooth=True, width=1.5) + + # Configure options with improved UI for dependencies options = """ var options = { "nodes": { @@ -430,7 +1190,8 @@ class RepositoryVisualizer: "enabled": true, "scaleFactor": 0.5 } - } + }, + "shadow": true }, "layout": { "hierarchical": { @@ -443,158 +1204,328 @@ class RepositoryVisualizer: }, "physics": { "enabled": false + }, + "interaction": { + "hover": true, + "tooltipDelay": 200, + "hideEdgesOnDrag": true, + "navigationButtons": true + }, + "configure": { + "enabled": true, + "filter": ["layout", "nodes", "edges"], + "showButton": true } } """ net.set_options(options) - # Save to HTML file - net.save_graph(output_path) - - return output_path - - def create_commit_activity_chart(self, commits: List[Dict], output_path: str = "commit_activity.html") -> str: - """ - Create an interactive chart showing commit activity over time - - Args: - commits: List of commit data - output_path: Path to save the HTML visualization - - Returns: - Path to the saved HTML file - """ - # Prepare commit data by month - monthly_data = defaultdict(int) - - for commit in commits: - date = commit.get('date') - if date: - # Format as year-month - month_key = date.strftime('%Y-%m') - monthly_data[month_key] += 1 - - # Sort by date - sorted_data = sorted(monthly_data.items()) - - # Create HTML with Chart.js - html = """ + # Add search and controls similar to previous graphs + html_before = """ - Commit Activity - + + File Dependency Graph -
-

Repository Commit Activity

- +
+ + + +
- +
+

Dependency Statistics

+

Files: 0

+

Dependencies: 0

+

Click a file to see its dependencies

+
+ """ + + html_after = """ """ - # Replace placeholders with actual data - labels_json = json.dumps([d[0] for d in sorted_data]) - data_json = json.dumps([d[1] for d in sorted_data]) - - html = html.replace('CHART_LABELS', labels_json) - html = html.replace('CHART_DATA', data_json) - - # Save to file + # Save to HTML file with custom HTML + net.save_graph(output_path) + + # Read the generated file + with open(output_path, 'r', encoding='utf-8') as f: + net_html = f.read() + + # Insert our custom HTML + net_html = net_html.replace('', html_before).replace('', html_after) + + # Write the modified file with open(output_path, 'w', encoding='utf-8') as f: - f.write(html) + f.write(net_html) return output_path - def create_code_change_heatmap(self, commits: List[Dict], output_path: str = "code_changes.html") -> str: + def create_commit_activity_chart(self, commits: List[Dict], output_path: str = "commit_activity.html") -> str: """ - Create a heatmap showing which files are changed most frequently + Create an enhanced interactive chart showing commit activity over time Args: commits: List of commit data @@ -603,220 +1534,830 @@ class RepositoryVisualizer: Returns: Path to the saved HTML file """ - # Count file modifications - file_changes = Counter() + # Prepare commit data by month + monthly_data = defaultdict(int) + author_data = defaultdict(lambda: defaultdict(int)) + file_type_data = defaultdict(lambda: defaultdict(int)) for commit in commits: - for file_data in commit.get('files', []): - filename = file_data.get('filename', '') - if filename: - file_changes[filename] += 1 - - # Get top files - top_files = file_changes.most_common(20) + date = commit.get('date') + author = commit.get('author', 'Unknown') + + if date: + # Format as year-month + month_key = date.strftime('%Y-%m') + monthly_data[month_key] += 1 + author_data[author][month_key] += 1 + + # Count file types in this commit + for file in commit.get('files', []): + filename = file.get('filename', '') + ext = os.path.splitext(filename)[1].lower() + if ext: + file_type_data[ext][month_key] += 1 - # Create HTML with Chart.js + # Sort by date + sorted_data = sorted(monthly_data.items()) + + # Prepare author data for chart + authors = list(author_data.keys()) + author_datasets = [] + + # Generate colors for authors + author_colors = [ + '#3498db', '#e74c3c', '#2ecc71', '#f39c12', '#9b59b6', + '#1abc9c', '#d35400', '#34495e', '#16a085', '#c0392b' + ] + + for i, author in enumerate(authors[:10]): # Limit to top 10 authors + color = author_colors[i % len(author_colors)] + author_data_points = [] + + for month_key, _ in sorted_data: + author_data_points.append(author_data[author].get(month_key, 0)) + + author_datasets.append({ + 'label': author, + 'data': author_data_points, + 'backgroundColor': color + '80', + 'borderColor': color, + 'borderWidth': 1 + }) + + # Create HTML with Chart.js and custom UI html = """ - + - Code Change Heatmap - + + + Repository Activity Analysis + + -
-

Most Frequently Modified Files

- +
+

Repository Commit Activity

+ +
+
+
0
+
Total Commits
+
+
+
0
+
Active Months
+
+
+
0
+
Avg. Commits per Month
+
+
+
0
+
Contributors
+
+
+ +
+
Activity Overview
+
By Contributor
+
By File Type
+
+ +
+
+
+ + +
+
+ + +
+
+ +
+
+ +
+ +
+
+ +
+
+
+ + +
+
+ + +
+
+ +
+ +
+ +

Contributor Commit Summary

+ + + + + + + + + + + + + +
ContributorCommitsPercentageFirst CommitLast Commit
+
+ +
+
+
+ + +
+
+ +
+ +
+ +

File Type Statistics

+ + + + + + + + + + + +
File TypeChangesPercentage
+
""" - # Prepare data for chart - labels = [os.path.basename(f[0]) for f in top_files] - data = [f[1] for f in top_files] + # Replace placeholders with actual data + labels_json = json.dumps([d[0] for d in sorted_data]) + data_json = json.dumps([d[1] for d in sorted_data]) + + # Author data for chart + author_data_json = json.dumps(author_data) + author_datasets_json = json.dumps(author_datasets) + + # File type data for chart + file_type_data_json = json.dumps(file_type_data) - # Replace placeholders - html = html.replace('FILE_LABELS', json.dumps(labels)) - html = html.replace('FILE_DATA', json.dumps(data)) + html = html.replace('CHART_LABELS', labels_json) + html = html.replace('CHART_DATA', data_json) + html = html.replace('AUTHOR_DATA', author_data_json) + html = html.replace('AUTHOR_DATASETS', author_datasets_json) + html = html.replace('FILE_TYPE_DATA', file_type_data_json) # Save to file with open(output_path, 'w', encoding='utf-8') as f: f.write(html) return output_path - def _get_important_subgraph(self, graph: nx.Graph, max_nodes: int) -> nx.Graph: - """Get a subgraph containing the most important nodes""" - # Calculate node importance - if len(graph.nodes) <= max_nodes: - return graph - - # Try different centrality measures - try: - # First try degree centrality - centrality = nx.degree_centrality(graph) - except: - # Fall back to simpler degree if that fails - centrality = {node: graph.degree(node) for node in graph.nodes()} - - # Sort nodes by importance - sorted_nodes = sorted(centrality.items(), key=lambda x: x[1], reverse=True) - # Take top nodes - top_nodes = [node for node, _ in sorted_nodes[:max_nodes]] - - # Create subgraph - return graph.subgraph(top_nodes) - - def _extract_dependencies(self, file_contents: Dict) -> Dict[str, List[str]]: - """Extract file dependencies based on imports and includes""" - dependencies = defaultdict(list) - - # Map of common import patterns by language - import_patterns = { - 'py': [ - r'^\s*import\s+(\w+)', # import module - r'^\s*from\s+(\w+)', # from module import - r'^\s*import\s+([\w.]+)' # import module.submodule - ], - 'js': [ - r'^\s*import.*from\s+[\'"](.+)[\'"]', # ES6 import - r'^\s*require\([\'"](.+)[\'"]\)', # CommonJS require - r'^\s*import\s+[\'"](.+)[\'"]' # Side-effect import - ], - 'java': [ - r'^\s*import\s+([\w.]+)' # Java import - ], - 'cpp': [ - r'^\s*#include\s+[<"](.+)[>"]' # C/C++ include - ], - 'go': [ - r'^\s*import\s+[\'"](.+)[\'"]', # Go single import - r'^\s*import\s+\(\s*[\'"](.+)[\'"]' # Go multiple imports - ] - } - - # Process each file - for filename, file_data in file_contents.items(): - # Get file extension - _, ext = os.path.splitext(filename) - ext = ext.lstrip('.').lower() if ext else '' - - # Skip if we don't have patterns for this language - if ext not in import_patterns: - continue - - # Get content - content = file_data.get('content', '') - if not content: - continue - - # Search for imports - lines = content.split('\n') - patterns = import_patterns[ext] - - for line in lines: - for pattern in patterns: - # Find imports - import_match = re.search(pattern, line) - if import_match: - imported = import_match.group(1) - - # Look for matching files - for target_file in file_contents.keys(): - target_name = os.path.basename(target_file) - target_module = os.path.splitext(target_name)[0] - - # Check if this might be the imported file - if imported == target_module or imported.endswith('.' + target_module): - dependencies[filename].append(target_file) - break + def create_code_change_heatmap(self, commits: List[Dict], output_path: str = "code_changes.html") -> str: + """ + Create an enhanced heatmap showing which files are changed most frequently - return dependencies + Args: + commits: List of commit data + output_path: Path to save the HTML visualization - def _format_size(self, size_bytes: int) -> str: - """Format file size in human-readable format""" - if size_bytes < 1024: - return f"{size_bytes} bytes" - elif size_bytes < 1024 * 1024: - return f"{size_bytes / 1024:.1f} KB" - else: - return f"{size_bytes / (1024 * 1024):.1f} MB" \ No newline at end of file + Returns: + Path to the saved HTML file + """ + # Count file modifications + file_changes = Counter() + file_authors = defaultdict(Counter) + file_dates = defaultdict(list) + + for commit in commits: + author = commit.get('author', 'Unknown') + date = commit.get('date') + + for file_data in commit.get('files', []): + filename = file_data.get('filename', '') + if filename: + file_changes[filename] += 1 + file_authors[filename][author] += 1 + if date: + file_dates[filename].append(date) \ No newline at end of file