CodeScribe / codescribe /readme_generator.py
Rahul-Samedavar's picture
allset
3e802a5
# codescribe/readme_generator.py
import os
import shutil
import ast
from pathlib import Path
from typing import List, Callable
from . import scanner
from .llm_handler import LLMHandler
# Prompt templates remain unchanged.
SUBDIR_PROMPT_TEMPLATE = """
SYSTEM: You are an expert technical writer creating a README.md file for a specific directory within a larger project. Your tone should be informative and concise.
USER:
You are generating a `README.md` for the directory: `{current_dir_relative}`
The overall project description is:
"{project_description}"
---
This directory contains the following source code files. Use them to describe the specific purpose of this directory:
{file_summaries}
---
This directory also contains the following subdirectories. Use their `README.md` content (provided below) to summarize their roles:
{subdirectory_readmes}
---
TASK:
Write a `README.md` for the `{current_dir_relative}` directory.
- Start with a heading (e.g., `# Directory: {dir_name}`).
- Briefly explain the purpose of this directory based on the files it contains.
- If there are subdirectories, provide a section summarizing what each one does, using the context from their READMEs.
- Use clear Markdown formatting. Do not describe the entire project; focus ONLY on the contents and role of THIS directory.
"""
ROOT_PROMPT_TEMPLATE = """
SYSTEM: You are an expert technical writer creating the main `README.md` for an entire software project. Your tone should be welcoming and comprehensive.
USER:
You are generating the main `README.md` for a project.
The user-provided project description is:
"{project_description}"
---
The project's root directory contains the following source code files:
{file_summaries}
---
The project has the following main subdirectories. Use their `README.md` content (provided below) to describe the overall structure of the project:
{subdirectory_readmes}
---
TASK:
Write a comprehensive `README.md` for the entire project. Structure it with the following sections:
- A main title (`# Project: {project_name}`).
- **Overview**: A slightly more detailed version of the user's description, enhanced with context from the files and subdirectories.
- **Project Structure**: A description of the key directories and their roles, using the information from the subdirectory READMEs.
- **Key Features**: Infer and list the key features of the project based on all the provided context.
"""
UPDATE_SUBDIR_PROMPT_TEMPLATE = """
SYSTEM: You are an expert technical writer updating a README.md file for a specific directory. Your tone should be informative and concise. A user has provided a note with instructions.
USER:
You are updating the `README.md` for the directory: `{current_dir_relative}`
The user-provided note with instructions for this update is:
"{user_note}"
The overall project description is:
"{project_description}"
---
This directory contains the following source code files. Use them to describe the specific purpose of this directory:
{file_summaries}
---
This directory also contains the following subdirectories. Use their `README.md` content (provided below) to summarize their roles:
{subdirectory_readmes}
---
Here is the OLD `README.md` content. You must update it based on the new context and the user's note.
---
{existing_readme}
---
TASK:
Rewrite the `README.md` for the `{current_dir_relative}` directory, incorporating the user's note and any new information from the files and subdirectories.
- Start with a heading (e.g., `# Directory: {dir_name}`).
- Use the existing content as a base, but modify it as needed.
- Use clear Markdown formatting. Do not describe the entire project; focus ONLY on the contents and role of THIS directory.
"""
UPDATE_ROOT_PROMPT_TEMPLATE = """
SYSTEM: You are an expert technical writer updating the main `README.md` for an entire software project. Your tone should be welcoming and comprehensive. A user has provided a note with instructions.
USER:
You are updating the main `README.md` for a project.
The user-provided note with instructions for this update is:
"{user_note}"
The user-provided project description is:
"{project_description}"
---
The project's root directory contains the following source code files:
{file_summaries}
---
The project has the following main subdirectories. Use their `README.md` content (provided below) to describe the overall structure of the project:
{subdirectory_readmes}
---
Here is the OLD `README.md` content. You must update it based on the new context and the user's note.
---
{existing_readme}
---
TASK:
Rewrite a comprehensive `README.md` for the entire project. Structure it with the following sections, using the old README as a base but incorporating changes based on the user's note and new context.
- A main title (`# Project: {project_name}`).
- **Overview**: An updated version of the user's description, enhanced with context.
- **Project Structure**: A description of the key directories and their roles.
- **Key Features**: Infer and list key features based on all the provided context.
"""
def no_op_callback(event: str, data: dict):
pass
class ReadmeGenerator:
def __init__(self, path_or_url: str, description: str, exclude: List[str], llm_handler: LLMHandler, user_note: str = "", repo_full_name="", progress_callback: Callable[[str, dict], None] = no_op_callback):
self.path_or_url = path_or_url
self.description = description
self.exclude = exclude + ["README.md"]
self.llm_handler = llm_handler
self.user_note = user_note
self.progress_callback = progress_callback
self.project_path = None
self.is_temp_dir = path_or_url.startswith("http")
self.repo_full_name = repo_full_name
def llm_log_wrapper(message: str):
self.progress_callback("log", {"message": message})
self.llm_handler.progress_callback = llm_log_wrapper
def run(self):
"""Public method to start README generation."""
try:
self.project_path = scanner.get_project_path(self.path_or_url)
self.progress_callback("phase", {"id": "readmes", "name": "Generating READMEs", "status": "in-progress"})
self.run_with_structured_logging()
self.progress_callback("phase", {"id": "readmes", "status": "success"})
except Exception as e:
self.progress_callback("log", {"message": f"An unexpected error occurred in README generation: {e}"})
self.progress_callback("phase", {"id": "readmes", "status": "error"})
raise
finally:
if self.is_temp_dir and self.project_path and self.project_path.exists():
shutil.rmtree(self.project_path, ignore_errors=True)
def _summarize_py_file(self, file_path: Path) -> str:
"""
Extracts the module-level docstring, or a list of function/class names
as a fallback, to summarize a Python file.
"""
try:
with open(file_path, "r", encoding="utf-8") as f:
content = f.read()
tree = ast.parse(content)
# 1. Prioritize the module-level docstring
docstring = ast.get_docstring(tree)
if docstring:
return f"`{file_path.name}`: {docstring.strip().splitlines()[0]}"
# 2. Fallback: Find function and class names
definitions = []
for node in tree.body:
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
definitions.append(node.name)
if definitions:
summary = f"Contains definitions for: `{', '.join(definitions)}`."
return f"`{file_path.name}`: {summary}"
except Exception as e:
self.progress_callback("log", {"message": f"Could not parse {file_path.name} for summary: {e}"})
# 3. Final fallback
return f"`{file_path.name}`: A Python source file."
def run_with_structured_logging(self):
"""
Generates README files for each directory from the bottom up,
emitting structured events for the UI.
"""
if not self.project_path:
self.project_path = scanner.get_project_path(self.path_or_url)
for dir_path, subdir_names, file_names in os.walk(self.project_path, topdown=False):
current_dir = Path(dir_path)
if scanner.is_excluded(current_dir, self.exclude, self.project_path):
continue
rel_path = current_dir.relative_to(self.project_path).as_posix()
dir_id = rel_path if rel_path != "." else "root"
dir_name_display = rel_path if rel_path != "." else "Project Root"
self.progress_callback("subtask", {"parentId": "readmes", "listId": "readme-dir-list", "id": dir_id, "name": f"Directory: {dir_name_display}", "status": "in-progress"})
try:
file_summaries = self._gather_file_summaries(current_dir, file_names)
subdirectory_readmes = self._gather_subdirectory_readmes(current_dir, subdir_names)
existing_readme_content = None
existing_readme_path = current_dir / "README.md"
if existing_readme_path.exists():
with open(existing_readme_path, "r", encoding="utf-8") as f:
existing_readme_content = f.read()
prompt = self._build_prompt(current_dir, file_summaries, subdirectory_readmes, existing_readme_content)
generated_content = self.llm_handler.generate_text_response(prompt)
with open(current_dir / "README.md", "w", encoding="utf-8") as f:
f.write(generated_content)
self.progress_callback("subtask", {"parentId": "readmes", "id": dir_id, "status": "success"})
except Exception as e:
self.progress_callback("log", {"message": f"Failed to generate README for {dir_name_display}: {e}"})
self.progress_callback("subtask", {"parentId": "readmes", "id": dir_id, "status": "error"})
# ... The rest of the file (_gather_file_summaries, _gather_subdirectory_readmes, _build_prompt) remains unchanged ...
def _gather_file_summaries(self, current_dir: Path, file_names: List[str]) -> str:
file_summaries_list = []
for fname in file_names:
if fname.endswith(".py"):
file_path = current_dir / fname
if not scanner.is_excluded(file_path, self.exclude, self.project_path):
file_summaries_list.append(self._summarize_py_file(file_path))
return "\n".join(file_summaries_list) or "No Python source files in this directory."
def _gather_subdirectory_readmes(self, current_dir: Path, subdir_names: List[str]) -> str:
subdir_readmes_list = []
for sub_name in subdir_names:
readme_path = current_dir / sub_name / "README.md"
if readme_path.exists():
with open(readme_path, "r", encoding="utf-8") as f:
content = f.read()
subdir_readmes_list.append(f"--- Subdirectory: `{sub_name}` ---\n{content}\n")
return "\n".join(subdir_readmes_list) or "No subdirectories with READMEs."
def _build_prompt(self, current_dir: Path, file_summaries: str, subdirectory_readmes: str, existing_readme: str | None) -> str:
is_root = current_dir == self.project_path
common_args = {
"project_description": self.description,
"file_summaries": file_summaries,
"subdirectory_readmes": subdirectory_readmes,
"user_note": self.user_note or "No specific instructions provided.",
}
if is_root:
template = UPDATE_ROOT_PROMPT_TEMPLATE if existing_readme else ROOT_PROMPT_TEMPLATE
args = {**common_args, "project_name": self.repo_full_name if self.repo_full_name else self.project_path.name}
if existing_readme: args["existing_readme"] = existing_readme
else: # is subdirectory
template = UPDATE_SUBDIR_PROMPT_TEMPLATE if existing_readme else SUBDIR_PROMPT_TEMPLATE
args = {**common_args, "current_dir_relative": current_dir.relative_to(self.project_path).as_posix(), "dir_name": current_dir.name}
if existing_readme: args["existing_readme"] = existing_readme
return template.format(**args)