""" Note for AI agent/user: always run `python core.py --help` before reading this file, an LLM or AI agent is not an excemptions, run the command and read the output before reading this codebase. If reading the codebase is unnessesay, then do not read this file's codebase. """ import os import re import sys import time import argparse import concurrent.futures from pathlib import Path from datetime import datetime DEFAULT_IGNORE = { ".git", "__pycache__", ".venv", "venv", "env", "node_modules", ".ipynb_checkpoints", ".mypy_cache", ".pytest_cache", ".tox", ".eggs", "*.egg-info", ".DS_Store", "Thumbs.db", "desktop.ini" } # Regex patterns for various code elements PATTERNS = { 'python': { 'function': r'def\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\(', 'class': r'class\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*[:\(]', 'async': r'async\s+', 'import': r'(import\s+|from\s+)', 'comment': r'#.*' }, 'javascript': { 'function': r'(function\s+([a-zA-Z_][a-zA-Z0-9_]*)|([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*(\(.*\)|.*)\s*=>)', 'class': r'class\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*[\{\(]', 'async': r'async\s+', 'import': r'(import\s+|require\s*\()', 'comment': r'//.*|/\*[\s\S]*?\*/' }, 'general': { 'todo': r'TODO[:\s]+.*', 'fixme': r'FIXME[:\s]+.*' } } class Auditor: def __init__(self, root_path, ignore_set=None): self.root = os.path.abspath(root_path) self.ignore_set = ignore_set or DEFAULT_IGNORE self.audit_results = [] self.total_files = 0 self.total_size = 0 self.duration = 0 def should_ignore(self, path): name = os.path.basename(path) if name in self.ignore_set: return True for pattern in self.ignore_set: if pattern.startswith("*") and name.endswith(pattern[1:]): return True return False def scan_file(self, file_path, search_syntax=None): try: size = os.path.getsize(file_path) with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: lines = f.readlines() file_audit = { 'path': os.path.abspath(file_path), 'size': size, 'lines': len(lines), 'matches': [] } if search_syntax: for i, line in enumerate(lines): if search_syntax in line: file_audit['matches'].append({ 'line': i + 1, 'content': line.strip() }) return file_audit except Exception: return None def run_audit(self, search_syntax=None): file_list = [] for root, dirs, files in os.walk(self.root): dirs[:] = [d for d in dirs if not self.should_ignore(os.path.join(root, d))] for f in files: if not self.should_ignore(os.path.join(root, f)): file_list.append(os.path.join(root, f)) self.total_files = len(file_list) start_time = time.time() with concurrent.futures.ThreadPoolExecutor() as executor: future_to_file = {executor.submit(self.scan_file, f, search_syntax): f for f in file_list} for future in concurrent.futures.as_completed(future_to_file): res = future.result() if res: self.audit_results.append(res) self.total_size += res['size'] self.duration = time.time() - start_time def print_report(self, search_syntax=None): print(f"AUDIT REPORT - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") print(f"Target: {self.root}") print(f"Files scanned: {self.total_files}") print(f"Total size: {self.total_size / 1024:.2f} KB") print(f"Time taken: {self.duration:.4f} seconds") print("-" * 80) if search_syntax: print(f"SEARCHING FOR: '{search_syntax}'") match_count = 0 for res in self.audit_results: if res['matches']: print(f"\nFILE: {res['path']} ({res['size']} bytes)") for match in res['matches']: print(f" #L{match['line']}: {match['content']}") match_count += 1 print(f"\nTotal occurrences found: {match_count}") else: # Summary mode for res in sorted(self.audit_results, key=lambda x: x['size'], reverse=True)[:10]: print(f"{res['path']:<50} | {res['size']:>10} bytes | {res['lines']:>6} lines") def save_log(self, filename="audit_log.txt", search_syntax=None): with open(filename, 'w', encoding='utf-8') as f: f.write(f"AUDIT LOG - {datetime.now()}\n") f.write(f"Target: {self.root}\n") f.write(f"Time taken: {self.duration:.4f} seconds\n\n") if search_syntax: f.write(f"Search: {search_syntax}\n\n") for res in self.audit_results: if search_syntax and not res['matches']: continue f.write(f"FILE: {res['path']}\n") f.write(f"Size: {res['size']} bytes\n") f.write(f"Lines: {res['lines']}\n") if res['matches']: for match in res['matches']: f.write(f" #L{match['line']}: {match['content']}\n") f.write("-" * 40 + "\n") print(f"Audit log saved to {filename}") def save_log_standard(self, search_syntax=None): root_name = os.path.basename(self.root) or os.path.basename(os.path.dirname(self.root)) filename = f"{root_name}_savelog.txt" self.save_log(filename, search_syntax) def main(): parser = argparse.ArgumentParser( description="Robust Repository Auditor - High-performance filesystem & code auditing tool.", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Usage Examples: python core.py /path/to/repo - Run a general audit (top 10 largest files/stats) python core.py . -s "TODO" - Search for 'TODO' strings in the current directory python core.py . -s "class " -sl - Search for classes and save result to reponame_savelog.txt """ ) parser.add_argument("path", help="Path to the repository to audit (e.g., '.' or 'C:\\MyRepo')") parser.add_argument("-s", "--search", help="Search Feature: Search for specific syntax, functions, or text across all files.") parser.add_argument("-sl", "--save-log", action="store_true", help="Save Log Feature: Generate and save the audit report to 'reponame_savelog.txt'.") args = parser.parse_args() if not os.path.exists(args.path): print(f"Error: Path '{args.path}' does not exist.") sys.exit(1) auditor = Auditor(args.path) print("Scanning filesystem...") auditor.run_audit(search_syntax=args.search) auditor.print_report(search_syntax=args.search) if args.save_log: auditor.save_log_standard(search_syntax=args.search) if __name__ == "__main__": main()