Spaces:

AnkitSingh1809
/

README-Craft-AI

Sleeping

File size: 7,020 Bytes

cef8a01

import logging
import os
import re
from typing import Any, Dict, List
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from .file_scanner import quick_scan_project

class AIReadmeGenerator:
    def __init__(self, api_key: str):
        self.llm = ChatGroq(
            api_key=api_key, 
            model_name="llama3-70b-8192"
        )

    def read_file_content(self, file_path: str) -> str:
        """
        Read the content of a file, handling different encodings and file sizes.
        
        Args:
            file_path (str): Path to the file to read
        
        Returns:
            str: File content or error message
        """
        try:
            # Limit file reading to prevent memory issues
            with open(file_path, 'r', encoding='utf-8') as file:
                # Read first 10000 characters to prevent large files from overwhelming the system
                return file.read(10000)
        except Exception as e:
            return f"Error reading file {file_path}: {str(e)}"

    def analyze_project_structure(self, project_path: str) -> Dict[str, Any]:
        """
        Perform a comprehensive analysis of the project.
        
        Args:
            project_path (str): Path to the project directory
        
        Returns:
            Dict: Detailed project analysis
        """
        # Use existing quick_scan_project for initial analysis
        project_info = quick_scan_project(project_path)
        
        # Collect file contents
        file_contents = {}
        for file in project_info.get('files', []):
            full_path = os.path.join(project_path, file)
            if os.path.isfile(full_path):
                file_contents[file] = self.read_file_content(full_path)
        
        # Extract docstrings and comments from main files
        code_insights = {}
        main_files = ['app.py', 'main.py', 'src/ui.py', 'src/generator.py']
        for main_file in main_files:
            full_path = os.path.join(project_path, main_file)
            if os.path.exists(full_path):
                code_insights[main_file] = self.extract_code_insights(full_path)
        
        # Combine all information
        comprehensive_analysis = {
            "project_structure": project_info,
            "file_contents": file_contents,
            "code_insights": code_insights,
            "requirements": self.read_requirements(project_path)
        }
        
        return comprehensive_analysis

    def extract_code_insights(self, file_path: str) -> Dict[str, str]:
        """
        Extract insights from Python files.
        
        Args:
            file_path (str): Path to the Python file
        
        Returns:
            Dict: Extracted insights including docstrings, key functions, etc.
        """
        insights = {
            "module_docstring": "",
            "key_functions": [],
            "key_classes": []
        }
        
        try:
            with open(file_path, 'r', encoding='utf-8') as file:
                content = file.read()
                
                # Extract module-level docstring
                module_docstring_match = re.search(r'"""(.*?)"""', content, re.DOTALL)
                if module_docstring_match:
                    insights['module_docstring'] = module_docstring_match.group(1).strip()
                
                # Find key functions
                function_matches = re.findall(r'def\s+(\w+)\(.*?\):\s*"""(.*?)"""', content, re.DOTALL)
                insights['key_functions'] = [
                    f"{name}: {desc.strip()}" 
                    for name, desc in function_matches
                ]
                
                # Find key classes
                class_matches = re.findall(r'class\s+(\w+).*?:\s*"""(.*?)"""', content, re.DOTALL)
                insights['key_classes'] = [
                    f"{name}: {desc.strip()}" 
                    for name, desc in class_matches
                ]
        except Exception as e:
            insights['error'] = str(e)
        
        return insights

    def read_requirements(self, project_path: str) -> List[str]:
        """
        Read project requirements file.
        
        Args:
            project_path (str): Path to the project directory
        
        Returns:
            List[str]: List of requirements
        """
        try:
            req_path = os.path.join(project_path, 'requirements.txt')
            if os.path.exists(req_path):
                with open(req_path, 'r') as f:
                    return [line.strip() for line in f if line.strip() and not line.startswith('#')]
            return []
        except Exception:
            return []

    def generate_concise_readme(self, project_path: str) -> str:
        """
        Generate a README based on comprehensive project analysis.
        
        Args:
            project_path (str): Path to the project directory
        
        Returns:
            str: Generated README content
        """
        # Analyze the project comprehensively
        project_analysis = self.analyze_project_structure(project_path)
        
        # Prepare a detailed prompt for the LLM
        template = """
        Generate a comprehensive README.md based on the following project analysis:

        PROJECT STRUCTURE:
        {project_structure}

        FILE CONTENTS:
        {file_contents}

        CODE INSIGHTS:
        {code_insights}

        REQUIREMENTS:
        {requirements}

        Based on this information, create a professional, detailed README.md that:
        - Explains the project's purpose and functionality
        - Describes key features and components
        - Provides clear setup and usage instructions
        - Highlights technical details
        - Includes any relevant dependencies or prerequisites

        Ensure the README is informative, well-structured, and tailored to the specific project.
        """

        # Prepare context for the LLM
        context = {
            "project_structure": str(project_analysis.get('project_structure', 'No structure information')),
            "file_contents": '\n'.join([f"{k}:\n{v}" for k, v in project_analysis.get('file_contents', {}).items()]),
            "code_insights": '\n'.join([f"{k}:\n{str(v)}" for k, v in project_analysis.get('code_insights', {}).items()]),
            "requirements": '\n'.join(project_analysis.get('requirements', []))
        }

        try:
            # Generate README using LLM
            prompt = ChatPromptTemplate.from_template(template)
            chain = prompt | self.llm | StrOutputParser()
            
            readme_content = chain.invoke(context)
            return readme_content.strip()
        except Exception as e:
            logging.error(f"README generation error: {str(e)}")
            return f"# Project README\n\nUnable to generate README automatically.\n\nError: {str(e)}"