File size: 7,020 Bytes
cef8a01
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
import logging
import os
import re
from typing import Any, Dict, List
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from .file_scanner import quick_scan_project

class AIReadmeGenerator:
    def __init__(self, api_key: str):
        self.llm = ChatGroq(
            api_key=api_key, 
            model_name="llama3-70b-8192"
        )

    def read_file_content(self, file_path: str) -> str:
        """
        Read the content of a file, handling different encodings and file sizes.
        
        Args:
            file_path (str): Path to the file to read
        
        Returns:
            str: File content or error message
        """
        try:
            # Limit file reading to prevent memory issues
            with open(file_path, 'r', encoding='utf-8') as file:
                # Read first 10000 characters to prevent large files from overwhelming the system
                return file.read(10000)
        except Exception as e:
            return f"Error reading file {file_path}: {str(e)}"

    def analyze_project_structure(self, project_path: str) -> Dict[str, Any]:
        """
        Perform a comprehensive analysis of the project.
        
        Args:
            project_path (str): Path to the project directory
        
        Returns:
            Dict: Detailed project analysis
        """
        # Use existing quick_scan_project for initial analysis
        project_info = quick_scan_project(project_path)
        
        # Collect file contents
        file_contents = {}
        for file in project_info.get('files', []):
            full_path = os.path.join(project_path, file)
            if os.path.isfile(full_path):
                file_contents[file] = self.read_file_content(full_path)
        
        # Extract docstrings and comments from main files
        code_insights = {}
        main_files = ['app.py', 'main.py', 'src/ui.py', 'src/generator.py']
        for main_file in main_files:
            full_path = os.path.join(project_path, main_file)
            if os.path.exists(full_path):
                code_insights[main_file] = self.extract_code_insights(full_path)
        
        # Combine all information
        comprehensive_analysis = {
            "project_structure": project_info,
            "file_contents": file_contents,
            "code_insights": code_insights,
            "requirements": self.read_requirements(project_path)
        }
        
        return comprehensive_analysis

    def extract_code_insights(self, file_path: str) -> Dict[str, str]:
        """
        Extract insights from Python files.
        
        Args:
            file_path (str): Path to the Python file
        
        Returns:
            Dict: Extracted insights including docstrings, key functions, etc.
        """
        insights = {
            "module_docstring": "",
            "key_functions": [],
            "key_classes": []
        }
        
        try:
            with open(file_path, 'r', encoding='utf-8') as file:
                content = file.read()
                
                # Extract module-level docstring
                module_docstring_match = re.search(r'"""(.*?)"""', content, re.DOTALL)
                if module_docstring_match:
                    insights['module_docstring'] = module_docstring_match.group(1).strip()
                
                # Find key functions
                function_matches = re.findall(r'def\s+(\w+)\(.*?\):\s*"""(.*?)"""', content, re.DOTALL)
                insights['key_functions'] = [
                    f"{name}: {desc.strip()}" 
                    for name, desc in function_matches
                ]
                
                # Find key classes
                class_matches = re.findall(r'class\s+(\w+).*?:\s*"""(.*?)"""', content, re.DOTALL)
                insights['key_classes'] = [
                    f"{name}: {desc.strip()}" 
                    for name, desc in class_matches
                ]
        except Exception as e:
            insights['error'] = str(e)
        
        return insights

    def read_requirements(self, project_path: str) -> List[str]:
        """
        Read project requirements file.
        
        Args:
            project_path (str): Path to the project directory
        
        Returns:
            List[str]: List of requirements
        """
        try:
            req_path = os.path.join(project_path, 'requirements.txt')
            if os.path.exists(req_path):
                with open(req_path, 'r') as f:
                    return [line.strip() for line in f if line.strip() and not line.startswith('#')]
            return []
        except Exception:
            return []

    def generate_concise_readme(self, project_path: str) -> str:
        """
        Generate a README based on comprehensive project analysis.
        
        Args:
            project_path (str): Path to the project directory
        
        Returns:
            str: Generated README content
        """
        # Analyze the project comprehensively
        project_analysis = self.analyze_project_structure(project_path)
        
        # Prepare a detailed prompt for the LLM
        template = """
        Generate a comprehensive README.md based on the following project analysis:

        PROJECT STRUCTURE:
        {project_structure}

        FILE CONTENTS:
        {file_contents}

        CODE INSIGHTS:
        {code_insights}

        REQUIREMENTS:
        {requirements}

        Based on this information, create a professional, detailed README.md that:
        - Explains the project's purpose and functionality
        - Describes key features and components
        - Provides clear setup and usage instructions
        - Highlights technical details
        - Includes any relevant dependencies or prerequisites

        Ensure the README is informative, well-structured, and tailored to the specific project.
        """

        # Prepare context for the LLM
        context = {
            "project_structure": str(project_analysis.get('project_structure', 'No structure information')),
            "file_contents": '\n'.join([f"{k}:\n{v}" for k, v in project_analysis.get('file_contents', {}).items()]),
            "code_insights": '\n'.join([f"{k}:\n{str(v)}" for k, v in project_analysis.get('code_insights', {}).items()]),
            "requirements": '\n'.join(project_analysis.get('requirements', []))
        }

        try:
            # Generate README using LLM
            prompt = ChatPromptTemplate.from_template(template)
            chain = prompt | self.llm | StrOutputParser()
            
            readme_content = chain.invoke(context)
            return readme_content.strip()
        except Exception as e:
            logging.error(f"README generation error: {str(e)}")
            return f"# Project README\n\nUnable to generate README automatically.\n\nError: {str(e)}"