File size: 4,905 Bytes

708f4a3

#!/usr/bin/env python3
"""
CRAYON Codebase Exporter

Exports all source code files (.py, .cu, .c, .cpp, .h, .hip) from the repository
into a single consolidated .txt file for documentation or analysis purposes.
"""

import os
from pathlib import Path
from datetime import datetime

# Configuration
REPO_ROOT = Path(__file__).parent
OUTPUT_FILE = REPO_ROOT / "CRAYON_Full_Codebase.txt"

# File extensions to include
EXTENSIONS = {'.py', '.cu', '.c', '.cpp', '.h', '.hip', '.hpp', '.cuh'}

# Directories to exclude
EXCLUDE_DIRS = {
    'venv', '.venv', 'env', '.env',
    '__pycache__', '.git', '.idea', '.vscode',
    'node_modules', 'build', 'dist', 'egg-info',
    '.eggs', '*.egg-info', 'site-packages'
}

# Files to exclude
EXCLUDE_FILES = {
    'export_codebase.py',  # Don't include this script itself
}


def should_exclude_dir(dir_name: str) -> bool:
    """Check if directory should be excluded."""
    return dir_name in EXCLUDE_DIRS or dir_name.startswith('.')


def should_include_file(file_path: Path) -> bool:
    """Check if file should be included based on extension and exclusions."""
    if file_path.name in EXCLUDE_FILES:
        return False
    if file_path.suffix.lower() not in EXTENSIONS:
        return False
    # Skip files in excluded directories
    for part in file_path.parts:
        if should_exclude_dir(part):
            return False
    return True


def get_file_header(file_path: Path, relative_path: Path) -> str:
    """Generate a header for each file section."""
    separator = "=" * 80
    return f"""
{separator}
FILE: {relative_path}
{separator}
"""


def collect_files(root: Path) -> list:
    """Collect all matching files from the repository."""
    files = []
    for dirpath, dirnames, filenames in os.walk(root):
        # Filter out excluded directories (modifies in-place to prevent descent)
        dirnames[:] = [d for d in dirnames if not should_exclude_dir(d)]
        
        for filename in filenames:
            file_path = Path(dirpath) / filename
            if should_include_file(file_path):
                files.append(file_path)
    
    # Sort files for consistent output
    return sorted(files)


def export_codebase():
    """Main export function."""
    print("=" * 60)
    print("CRAYON Codebase Exporter")
    print("=" * 60)
    print(f"\nScanning: {REPO_ROOT}")
    print(f"Extensions: {', '.join(sorted(EXTENSIONS))}")
    print()
    
    # Collect all files
    files = collect_files(REPO_ROOT)
    
    if not files:
        print("No matching files found!")
        return
    
    print(f"Found {len(files)} source files\n")
    
    # Statistics
    total_lines = 0
    total_bytes = 0
    file_stats = []
    
    # Build output content
    content_parts = []
    
    # Header
    header = f"""{'#' * 80}
#
# XERV CRAYON - Complete Codebase Export
#
# Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
# Total Files: {len(files)}
# Extensions: {', '.join(sorted(EXTENSIONS))}
#
{'#' * 80}

TABLE OF CONTENTS
{'=' * 40}
"""
    
    # Generate TOC
    toc_lines = []
    for i, file_path in enumerate(files, 1):
        relative = file_path.relative_to(REPO_ROOT)
        toc_lines.append(f"{i:4d}. {relative}")
    
    header += "\n".join(toc_lines)
    header += f"\n\n{'=' * 80}\nFILE CONTENTS\n{'=' * 80}\n"
    
    content_parts.append(header)
    
    # Process each file
    for file_path in files:
        relative = file_path.relative_to(REPO_ROOT)
        
        try:
            with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
                file_content = f.read()
            
            lines = file_content.count('\n') + 1
            bytes_count = len(file_content.encode('utf-8'))
            
            total_lines += lines
            total_bytes += bytes_count
            file_stats.append((relative, lines, bytes_count))
            
            # Add file section
            file_section = get_file_header(file_path, relative)
            file_section += file_content
            if not file_content.endswith('\n'):
                file_section += '\n'
            
            content_parts.append(file_section)
            print(f"  [OK] {relative} ({lines} lines)")
            
        except Exception as e:
            print(f"  [ERR] {relative} - Error: {e}")
    
    # Write output file
    print(f"\nWriting to: {OUTPUT_FILE}")
    
    with open(OUTPUT_FILE, 'w', encoding='utf-8') as f:
        f.write("".join(content_parts))
    
    # Summary
    output_size = OUTPUT_FILE.stat().st_size
    print("\n" + "=" * 60)
    print("EXPORT COMPLETE")
    print("=" * 60)
    print(f"  Files exported:  {len(files)}")
    print(f"  Total lines:     {total_lines:,}")
    print(f"  Output size:     {output_size / 1024:.2f} KB")
    print(f"  Output file:     {OUTPUT_FILE.name}")
    print("=" * 60)


if __name__ == "__main__":
    export_codebase()