File size: 12,580 Bytes
3e802a5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
# codescribe/readme_generator.py

import os
import shutil
import ast
from pathlib import Path
from typing import List, Callable

from . import scanner
from .llm_handler import LLMHandler

# Prompt templates remain unchanged.
SUBDIR_PROMPT_TEMPLATE = """
SYSTEM: You are an expert technical writer creating a README.md file for a specific directory within a larger project. Your tone should be informative and concise.

USER:
You are generating a `README.md` for the directory: `{current_dir_relative}`

The overall project description is:
"{project_description}"

---
This directory contains the following source code files. Use them to describe the specific purpose of this directory:
{file_summaries}
---

This directory also contains the following subdirectories. Use their `README.md` content (provided below) to summarize their roles:
{subdirectory_readmes}
---

TASK:
Write a `README.md` for the `{current_dir_relative}` directory.
- Start with a heading (e.g., `# Directory: {dir_name}`).
- Briefly explain the purpose of this directory based on the files it contains.
- If there are subdirectories, provide a section summarizing what each one does, using the context from their READMEs.
- Use clear Markdown formatting. Do not describe the entire project; focus ONLY on the contents and role of THIS directory.
"""

ROOT_PROMPT_TEMPLATE = """
SYSTEM: You are an expert technical writer creating the main `README.md` for an entire software project. Your tone should be welcoming and comprehensive.

USER:
You are generating the main `README.md` for a project.

The user-provided project description is:
"{project_description}"

---
The project's root directory contains the following source code files:
{file_summaries}
---

The project has the following main subdirectories. Use their `README.md` content (provided below) to describe the overall structure of the project:
{subdirectory_readmes}
---

TASK:
Write a comprehensive `README.md` for the entire project. Structure it with the following sections:
- A main title (`# Project: {project_name}`).
- **Overview**: A slightly more detailed version of the user's description, enhanced with context from the files and subdirectories.
- **Project Structure**: A description of the key directories and their roles, using the information from the subdirectory READMEs.
- **Key Features**: Infer and list the key features of the project based on all the provided context.
"""

UPDATE_SUBDIR_PROMPT_TEMPLATE = """
SYSTEM: You are an expert technical writer updating a README.md file for a specific directory. Your tone should be informative and concise. A user has provided a note with instructions.

USER:
You are updating the `README.md` for the directory: `{current_dir_relative}`

The user-provided note with instructions for this update is:
"{user_note}"

The overall project description is:
"{project_description}"
---
This directory contains the following source code files. Use them to describe the specific purpose of this directory:
{file_summaries}
---
This directory also contains the following subdirectories. Use their `README.md` content (provided below) to summarize their roles:
{subdirectory_readmes}
---
Here is the OLD `README.md` content. You must update it based on the new context and the user's note.
---
{existing_readme}
---
TASK:
Rewrite the `README.md` for the `{current_dir_relative}` directory, incorporating the user's note and any new information from the files and subdirectories.
- Start with a heading (e.g., `# Directory: {dir_name}`).
- Use the existing content as a base, but modify it as needed.
- Use clear Markdown formatting. Do not describe the entire project; focus ONLY on the contents and role of THIS directory.
"""

UPDATE_ROOT_PROMPT_TEMPLATE = """
SYSTEM: You are an expert technical writer updating the main `README.md` for an entire software project. Your tone should be welcoming and comprehensive. A user has provided a note with instructions.

USER:
You are updating the main `README.md` for a project.

The user-provided note with instructions for this update is:
"{user_note}"

The user-provided project description is:
"{project_description}"
---
The project's root directory contains the following source code files:
{file_summaries}
---
The project has the following main subdirectories. Use their `README.md` content (provided below) to describe the overall structure of the project:
{subdirectory_readmes}
---
Here is the OLD `README.md` content. You must update it based on the new context and the user's note.
---
{existing_readme}
---
TASK:
Rewrite a comprehensive `README.md` for the entire project. Structure it with the following sections, using the old README as a base but incorporating changes based on the user's note and new context.
- A main title (`# Project: {project_name}`).
- **Overview**: An updated version of the user's description, enhanced with context.
- **Project Structure**: A description of the key directories and their roles.
- **Key Features**: Infer and list key features based on all the provided context.
"""

def no_op_callback(event: str, data: dict):
    pass

class ReadmeGenerator:
    def __init__(self, path_or_url: str, description: str, exclude: List[str], llm_handler: LLMHandler, user_note: str = "", repo_full_name="", progress_callback: Callable[[str, dict], None] = no_op_callback):
        self.path_or_url = path_or_url
        self.description = description
        self.exclude = exclude + ["README.md"]
        self.llm_handler = llm_handler
        self.user_note = user_note
        self.progress_callback = progress_callback
        self.project_path = None
        self.is_temp_dir = path_or_url.startswith("http")
        self.repo_full_name  = repo_full_name
        
        def llm_log_wrapper(message: str):
            self.progress_callback("log", {"message": message})
        
        self.llm_handler.progress_callback = llm_log_wrapper

    def run(self):
        """Public method to start README generation."""
        try:
            self.project_path = scanner.get_project_path(self.path_or_url)
            self.progress_callback("phase", {"id": "readmes", "name": "Generating READMEs", "status": "in-progress"})
            self.run_with_structured_logging()
            self.progress_callback("phase", {"id": "readmes", "status": "success"})
        except Exception as e:
            self.progress_callback("log", {"message": f"An unexpected error occurred in README generation: {e}"})
            self.progress_callback("phase", {"id": "readmes", "status": "error"})
            raise
        finally:
            if self.is_temp_dir and self.project_path and self.project_path.exists():
                shutil.rmtree(self.project_path, ignore_errors=True)

    def _summarize_py_file(self, file_path: Path) -> str:
        """
        Extracts the module-level docstring, or a list of function/class names
        as a fallback, to summarize a Python file.
        """
        try:
            with open(file_path, "r", encoding="utf-8") as f:
                content = f.read()
            tree = ast.parse(content)
            
            # 1. Prioritize the module-level docstring
            docstring = ast.get_docstring(tree)
            if docstring:
                return f"`{file_path.name}`: {docstring.strip().splitlines()[0]}"

            # 2. Fallback: Find function and class names
            definitions = []
            for node in tree.body:
                if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
                    definitions.append(node.name)
            
            if definitions:
                summary = f"Contains definitions for: `{', '.join(definitions)}`."
                return f"`{file_path.name}`: {summary}"

        except Exception as e:
            self.progress_callback("log", {"message": f"Could not parse {file_path.name} for summary: {e}"})
        
        # 3. Final fallback
        return f"`{file_path.name}`: A Python source file."

    def run_with_structured_logging(self):
        """
        Generates README files for each directory from the bottom up,
        emitting structured events for the UI.
        """
        if not self.project_path:
             self.project_path = scanner.get_project_path(self.path_or_url)
             
        for dir_path, subdir_names, file_names in os.walk(self.project_path, topdown=False):
            current_dir = Path(dir_path)
            
            if scanner.is_excluded(current_dir, self.exclude, self.project_path):
                continue

            rel_path = current_dir.relative_to(self.project_path).as_posix()
            dir_id = rel_path if rel_path != "." else "root"
            dir_name_display = rel_path if rel_path != "." else "Project Root"

            self.progress_callback("subtask", {"parentId": "readmes", "listId": "readme-dir-list", "id": dir_id, "name": f"Directory: {dir_name_display}", "status": "in-progress"})
            
            try:
                file_summaries = self._gather_file_summaries(current_dir, file_names)
                subdirectory_readmes = self._gather_subdirectory_readmes(current_dir, subdir_names)

                existing_readme_content = None
                existing_readme_path = current_dir / "README.md"
                if existing_readme_path.exists():
                    with open(existing_readme_path, "r", encoding="utf-8") as f:
                        existing_readme_content = f.read()

                prompt = self._build_prompt(current_dir, file_summaries, subdirectory_readmes, existing_readme_content)
                generated_content = self.llm_handler.generate_text_response(prompt)
                
                with open(current_dir / "README.md", "w", encoding="utf-8") as f:
                    f.write(generated_content)
                
                self.progress_callback("subtask", {"parentId": "readmes", "id": dir_id, "status": "success"})

            except Exception as e:
                self.progress_callback("log", {"message": f"Failed to generate README for {dir_name_display}: {e}"})
                self.progress_callback("subtask", {"parentId": "readmes", "id": dir_id, "status": "error"})

    # ... The rest of the file (_gather_file_summaries, _gather_subdirectory_readmes, _build_prompt) remains unchanged ...
    def _gather_file_summaries(self, current_dir: Path, file_names: List[str]) -> str:
        file_summaries_list = []
        for fname in file_names:
            if fname.endswith(".py"):
               file_path = current_dir / fname
               if not scanner.is_excluded(file_path, self.exclude, self.project_path):
                   file_summaries_list.append(self._summarize_py_file(file_path))
        return "\n".join(file_summaries_list) or "No Python source files in this directory."

    def _gather_subdirectory_readmes(self, current_dir: Path, subdir_names: List[str]) -> str:
        subdir_readmes_list = []
        for sub_name in subdir_names:
            readme_path = current_dir / sub_name / "README.md"
            if readme_path.exists():
                with open(readme_path, "r", encoding="utf-8") as f:
                    content = f.read()
                subdir_readmes_list.append(f"--- Subdirectory: `{sub_name}` ---\n{content}\n")
        return "\n".join(subdir_readmes_list) or "No subdirectories with READMEs."

    def _build_prompt(self, current_dir: Path, file_summaries: str, subdirectory_readmes: str, existing_readme: str | None) -> str:
        is_root = current_dir == self.project_path
        common_args = {
            "project_description": self.description,
            "file_summaries": file_summaries,
            "subdirectory_readmes": subdirectory_readmes,
            "user_note": self.user_note or "No specific instructions provided.",
        }

        if is_root:
            template = UPDATE_ROOT_PROMPT_TEMPLATE if existing_readme else ROOT_PROMPT_TEMPLATE
            args = {**common_args, "project_name": self.repo_full_name if self.repo_full_name else self.project_path.name}
            if existing_readme: args["existing_readme"] = existing_readme
        else: # is subdirectory
            template = UPDATE_SUBDIR_PROMPT_TEMPLATE if existing_readme else SUBDIR_PROMPT_TEMPLATE
            args = {**common_args, "current_dir_relative": current_dir.relative_to(self.project_path).as_posix(), "dir_name": current_dir.name}
            if existing_readme: args["existing_readme"] = existing_readme
        
        return template.format(**args)