import shutil from pathlib import Path from typing import List, Callable, Dict import networkx as nx import json from collections import defaultdict from . import scanner, parser, updater from .llm_handler import LLMHandler COMBINED_DOCSTRING_PROMPT_TEMPLATE = """ SYSTEM: You are an expert programmer writing high-quality, comprehensive Python docstrings in reStructuredText (reST) format. Your output MUST be a single JSON object. USER: Project Description: \"\"\" {project_description} \"\"\" --- CONTEXT FROM DEPENDENCIES: This file depends on other modules. Here is their documentation for context: {dependency_context} --- DOCUMENT THE FOLLOWING SOURCE FILE: File Path: `{file_path}` ```python {file_content} ``` INSTRUCTIONS: Provide a single JSON object as your response. 1. The JSON object MUST have a special key `\"__module__\"`. The value for this key should be a concise, single-paragraph docstring that summarizes the purpose of the entire file. 2. The other keys in the JSON object should be the function or class names (e.g., "my_function", "MyClass", "MyClass.my_method"). 3. The values for these other keys should be their complete docstrings. 4. Do NOT include the original code in your response. Only generate the JSON containing the docstrings. """ # --- UNCHANGED --- PACKAGE_INIT_PROMPT_TEMPLATE = """ SYSTEM: You are an expert programmer writing a high-level, one-paragraph summary for a Python package. This summary will be the main docstring for the package's `__init__.py` file. USER: Project Description: \"\"\" {project_description} \"\"\" You are writing the docstring for the `__init__.py` of the `{package_name}` package. This package contains the following modules. Their summaries are provided below: {module_summaries} INSTRUCTIONS: Write a concise, single-paragraph docstring that summarizes the overall purpose and responsibility of the `{package_name}` package, based on the modules it contains. This docstring will be placed in the `__init__.py` file. """ def no_op_callback(event: str, data: dict): print(f"{event}: {json.dumps(data, indent=2)}") class DocstringOrchestrator: # --- UNCHANGED --- def __init__(self, path_or_url: str, description: str, exclude: List[str], llm_handler: LLMHandler, progress_callback: Callable[[str, dict], None] = no_op_callback, repo_full_name: str = None): self.path_or_url = path_or_url self.description = description self.exclude = exclude self.llm_handler = llm_handler self.progress_callback = progress_callback self.project_path = None self.is_temp_dir = path_or_url.startswith("http") self.repo_full_name = repo_full_name def llm_log_wrapper(message: str): self.progress_callback("log", {"message": message}) self.llm_handler.progress_callback = llm_log_wrapper # --- MODIFIED: The run() method is refactored --- def run(self): def log_to_ui(message: str): self.progress_callback("log", {"message": message}) try: self.project_path = scanner.get_project_path(self.path_or_url, log_callback=log_to_ui) self.progress_callback("phase", {"id": "scan", "name": "Scanning Project", "status": "in-progress"}) files = scanner.scan_project(self.project_path, self.exclude) log_to_ui(f"Found {len(files)} Python files to document.") self.progress_callback("phase", {"id": "scan", "status": "success"}) graph = parser.build_dependency_graph(files, self.project_path, log_callback=log_to_ui) self.progress_callback("phase", {"id": "docstrings", "name": "Generating Docstrings", "status": "in-progress"}) doc_order = list(nx.topological_sort(graph)) if nx.is_directed_acyclic_graph(graph) else list(graph.nodes) # --- COMBINED PASS 1 & 2: Generate all docstrings in a single call per file --- documented_context = {} module_docstrings = {} for file_path in doc_order: rel_path = file_path.relative_to(self.project_path).as_posix() # Use a single subtask for the entire file documentation process self.progress_callback("subtask", {"parentId": "docstrings", "listId": "docstring-file-list", "id": f"doc-{rel_path}", "name": f"Documenting {rel_path}", "status": "in-progress"}) deps = graph.predecessors(file_path) dep_context_str = "\n".join([f"File: `{dep.relative_to(self.project_path)}`\n{json.dumps(documented_context.get(dep, {}), indent=2)}\n" for dep in deps]) or "No internal dependencies have been documented yet." with open(file_path, "r", encoding="utf-8") as f: file_content = f.read() # Use the new combined prompt prompt = COMBINED_DOCSTRING_PROMPT_TEMPLATE.format(project_description=self.description, dependency_context=dep_context_str, file_path=rel_path, file_content=file_content) try: # Single LLM call to get all docstrings for the file combined_docs = self.llm_handler.generate_documentation(prompt) # Separate the module docstring from the function/class docstrings module_summary = combined_docs.pop("__module__", None) function_class_docs = combined_docs # The remainder of the dict # Update the file with function/class docstrings updater.update_file_with_docstrings(file_path, function_class_docs, log_callback=log_to_ui) # Update the file with the module-level docstring if it was generated if module_summary: updater.update_module_docstring(file_path, module_summary, log_callback=log_to_ui) module_docstrings[file_path] = module_summary self.progress_callback("subtask", {"parentId": "docstrings", "id": f"doc-{rel_path}", "status": "success"}) documented_context[file_path] = function_class_docs # Store for dependency context except Exception as e: log_to_ui(f"Error processing docstrings for {rel_path}: {e}") self.progress_callback("subtask", {"parentId": "docstrings", "id": f"doc-{rel_path}", "status": "error"}) # --- PASS 3: Generate __init__.py Docstrings for Packages (Unchanged) --- packages = defaultdict(list) for file_path, docstring in module_docstrings.items(): if file_path.name != "__init__.py": packages[file_path.parent].append(f"- `{file_path.name}`: {docstring}") for package_path, summaries in packages.items(): rel_path = package_path.relative_to(self.project_path).as_posix() init_file = package_path / "__init__.py" self.progress_callback("subtask", {"parentId": "docstrings", "listId": "docstring-package-list", "id": f"pkg-{rel_path}", "name": f"Package summary for {rel_path}", "status": "in-progress"}) try: is_root_package = (package_path == self.project_path) package_name = self.repo_full_name if is_root_package and self.repo_full_name else package_path.name prompt = PACKAGE_INIT_PROMPT_TEMPLATE.format( project_description=self.description, package_name=package_name, module_summaries="\n".join(summaries) ) package_summary = self.llm_handler.generate_text_response(prompt).strip().strip('"""').strip("'''").strip() if not init_file.exists(): init_file.touch() updater.update_module_docstring(init_file, package_summary, log_callback=log_to_ui) self.progress_callback("subtask", {"parentId": "docstrings", "id": f"pkg-{rel_path}", "status": "success"}) except Exception as e: log_to_ui(f"Error generating package docstring for {rel_path}: {e}") self.progress_callback("subtask", {"parentId": "docstrings", "id": f"pkg-{rel_path}", "status": "error"}) self.progress_callback("phase", {"id": "docstrings", "status": "success"}) finally: if self.is_temp_dir and self.project_path and self.project_path.exists(): shutil.rmtree(self.project_path, ignore_errors=True)