# codescribe/readme_generator.py import os import shutil import ast from pathlib import Path from typing import List, Callable from . import scanner from .llm_handler import LLMHandler # Prompt templates remain unchanged. SUBDIR_PROMPT_TEMPLATE = """ SYSTEM: You are an expert technical writer creating a README.md file for a specific directory within a larger project. Your tone should be informative and concise. USER: You are generating a `README.md` for the directory: `{current_dir_relative}` The overall project description is: "{project_description}" --- This directory contains the following source code files. Use them to describe the specific purpose of this directory: {file_summaries} --- This directory also contains the following subdirectories. Use their `README.md` content (provided below) to summarize their roles: {subdirectory_readmes} --- TASK: Write a `README.md` for the `{current_dir_relative}` directory. - Start with a heading (e.g., `# Directory: {dir_name}`). - Briefly explain the purpose of this directory based on the files it contains. - If there are subdirectories, provide a section summarizing what each one does, using the context from their READMEs. - Use clear Markdown formatting. Do not describe the entire project; focus ONLY on the contents and role of THIS directory. """ ROOT_PROMPT_TEMPLATE = """ SYSTEM: You are an expert technical writer creating the main `README.md` for an entire software project. Your tone should be welcoming and comprehensive. USER: You are generating the main `README.md` for a project. The user-provided project description is: "{project_description}" --- The project's root directory contains the following source code files: {file_summaries} --- The project has the following main subdirectories. Use their `README.md` content (provided below) to describe the overall structure of the project: {subdirectory_readmes} --- TASK: Write a comprehensive `README.md` for the entire project. Structure it with the following sections: - A main title (`# Project: {project_name}`). - **Overview**: A slightly more detailed version of the user's description, enhanced with context from the files and subdirectories. - **Project Structure**: A description of the key directories and their roles, using the information from the subdirectory READMEs. - **Key Features**: Infer and list the key features of the project based on all the provided context. """ UPDATE_SUBDIR_PROMPT_TEMPLATE = """ SYSTEM: You are an expert technical writer updating a README.md file for a specific directory. Your tone should be informative and concise. A user has provided a note with instructions. USER: You are updating the `README.md` for the directory: `{current_dir_relative}` The user-provided note with instructions for this update is: "{user_note}" The overall project description is: "{project_description}" --- This directory contains the following source code files. Use them to describe the specific purpose of this directory: {file_summaries} --- This directory also contains the following subdirectories. Use their `README.md` content (provided below) to summarize their roles: {subdirectory_readmes} --- Here is the OLD `README.md` content. You must update it based on the new context and the user's note. --- {existing_readme} --- TASK: Rewrite the `README.md` for the `{current_dir_relative}` directory, incorporating the user's note and any new information from the files and subdirectories. - Start with a heading (e.g., `# Directory: {dir_name}`). - Use the existing content as a base, but modify it as needed. - Use clear Markdown formatting. Do not describe the entire project; focus ONLY on the contents and role of THIS directory. """ UPDATE_ROOT_PROMPT_TEMPLATE = """ SYSTEM: You are an expert technical writer updating the main `README.md` for an entire software project. Your tone should be welcoming and comprehensive. A user has provided a note with instructions. USER: You are updating the main `README.md` for a project. The user-provided note with instructions for this update is: "{user_note}" The user-provided project description is: "{project_description}" --- The project's root directory contains the following source code files: {file_summaries} --- The project has the following main subdirectories. Use their `README.md` content (provided below) to describe the overall structure of the project: {subdirectory_readmes} --- Here is the OLD `README.md` content. You must update it based on the new context and the user's note. --- {existing_readme} --- TASK: Rewrite a comprehensive `README.md` for the entire project. Structure it with the following sections, using the old README as a base but incorporating changes based on the user's note and new context. - A main title (`# Project: {project_name}`). - **Overview**: An updated version of the user's description, enhanced with context. - **Project Structure**: A description of the key directories and their roles. - **Key Features**: Infer and list key features based on all the provided context. """ def no_op_callback(event: str, data: dict): pass class ReadmeGenerator: def __init__(self, path_or_url: str, description: str, exclude: List[str], llm_handler: LLMHandler, user_note: str = "", repo_full_name="", progress_callback: Callable[[str, dict], None] = no_op_callback): self.path_or_url = path_or_url self.description = description self.exclude = exclude + ["README.md"] self.llm_handler = llm_handler self.user_note = user_note self.progress_callback = progress_callback self.project_path = None self.is_temp_dir = path_or_url.startswith("http") self.repo_full_name = repo_full_name def llm_log_wrapper(message: str): self.progress_callback("log", {"message": message}) self.llm_handler.progress_callback = llm_log_wrapper def run(self): """Public method to start README generation.""" try: self.project_path = scanner.get_project_path(self.path_or_url) self.progress_callback("phase", {"id": "readmes", "name": "Generating READMEs", "status": "in-progress"}) self.run_with_structured_logging() self.progress_callback("phase", {"id": "readmes", "status": "success"}) except Exception as e: self.progress_callback("log", {"message": f"An unexpected error occurred in README generation: {e}"}) self.progress_callback("phase", {"id": "readmes", "status": "error"}) raise finally: if self.is_temp_dir and self.project_path and self.project_path.exists(): shutil.rmtree(self.project_path, ignore_errors=True) def _summarize_py_file(self, file_path: Path) -> str: """ Extracts the module-level docstring, or a list of function/class names as a fallback, to summarize a Python file. """ try: with open(file_path, "r", encoding="utf-8") as f: content = f.read() tree = ast.parse(content) # 1. Prioritize the module-level docstring docstring = ast.get_docstring(tree) if docstring: return f"`{file_path.name}`: {docstring.strip().splitlines()[0]}" # 2. Fallback: Find function and class names definitions = [] for node in tree.body: if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)): definitions.append(node.name) if definitions: summary = f"Contains definitions for: `{', '.join(definitions)}`." return f"`{file_path.name}`: {summary}" except Exception as e: self.progress_callback("log", {"message": f"Could not parse {file_path.name} for summary: {e}"}) # 3. Final fallback return f"`{file_path.name}`: A Python source file." def run_with_structured_logging(self): """ Generates README files for each directory from the bottom up, emitting structured events for the UI. """ if not self.project_path: self.project_path = scanner.get_project_path(self.path_or_url) for dir_path, subdir_names, file_names in os.walk(self.project_path, topdown=False): current_dir = Path(dir_path) if scanner.is_excluded(current_dir, self.exclude, self.project_path): continue rel_path = current_dir.relative_to(self.project_path).as_posix() dir_id = rel_path if rel_path != "." else "root" dir_name_display = rel_path if rel_path != "." else "Project Root" self.progress_callback("subtask", {"parentId": "readmes", "listId": "readme-dir-list", "id": dir_id, "name": f"Directory: {dir_name_display}", "status": "in-progress"}) try: file_summaries = self._gather_file_summaries(current_dir, file_names) subdirectory_readmes = self._gather_subdirectory_readmes(current_dir, subdir_names) existing_readme_content = None existing_readme_path = current_dir / "README.md" if existing_readme_path.exists(): with open(existing_readme_path, "r", encoding="utf-8") as f: existing_readme_content = f.read() prompt = self._build_prompt(current_dir, file_summaries, subdirectory_readmes, existing_readme_content) generated_content = self.llm_handler.generate_text_response(prompt) with open(current_dir / "README.md", "w", encoding="utf-8") as f: f.write(generated_content) self.progress_callback("subtask", {"parentId": "readmes", "id": dir_id, "status": "success"}) except Exception as e: self.progress_callback("log", {"message": f"Failed to generate README for {dir_name_display}: {e}"}) self.progress_callback("subtask", {"parentId": "readmes", "id": dir_id, "status": "error"}) # ... The rest of the file (_gather_file_summaries, _gather_subdirectory_readmes, _build_prompt) remains unchanged ... def _gather_file_summaries(self, current_dir: Path, file_names: List[str]) -> str: file_summaries_list = [] for fname in file_names: if fname.endswith(".py"): file_path = current_dir / fname if not scanner.is_excluded(file_path, self.exclude, self.project_path): file_summaries_list.append(self._summarize_py_file(file_path)) return "\n".join(file_summaries_list) or "No Python source files in this directory." def _gather_subdirectory_readmes(self, current_dir: Path, subdir_names: List[str]) -> str: subdir_readmes_list = [] for sub_name in subdir_names: readme_path = current_dir / sub_name / "README.md" if readme_path.exists(): with open(readme_path, "r", encoding="utf-8") as f: content = f.read() subdir_readmes_list.append(f"--- Subdirectory: `{sub_name}` ---\n{content}\n") return "\n".join(subdir_readmes_list) or "No subdirectories with READMEs." def _build_prompt(self, current_dir: Path, file_summaries: str, subdirectory_readmes: str, existing_readme: str | None) -> str: is_root = current_dir == self.project_path common_args = { "project_description": self.description, "file_summaries": file_summaries, "subdirectory_readmes": subdirectory_readmes, "user_note": self.user_note or "No specific instructions provided.", } if is_root: template = UPDATE_ROOT_PROMPT_TEMPLATE if existing_readme else ROOT_PROMPT_TEMPLATE args = {**common_args, "project_name": self.repo_full_name if self.repo_full_name else self.project_path.name} if existing_readme: args["existing_readme"] = existing_readme else: # is subdirectory template = UPDATE_SUBDIR_PROMPT_TEMPLATE if existing_readme else SUBDIR_PROMPT_TEMPLATE args = {**common_args, "current_dir_relative": current_dir.relative_to(self.project_path).as_posix(), "dir_name": current_dir.name} if existing_readme: args["existing_readme"] = existing_readme return template.format(**args)