Spaces:
Sleeping
Sleeping
File size: 8,847 Bytes
3e802a5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 |
import shutil
from pathlib import Path
from typing import List, Callable, Dict
import networkx as nx
import json
from collections import defaultdict
from . import scanner, parser, updater
from .llm_handler import LLMHandler
COMBINED_DOCSTRING_PROMPT_TEMPLATE = """
SYSTEM: You are an expert programmer writing high-quality, comprehensive Python docstrings in reStructuredText (reST) format. Your output MUST be a single JSON object.
USER:
Project Description:
\"\"\"
{project_description}
\"\"\"
---
CONTEXT FROM DEPENDENCIES:
This file depends on other modules. Here is their documentation for context:
{dependency_context}
---
DOCUMENT THE FOLLOWING SOURCE FILE:
File Path: `{file_path}`
```python
{file_content}
```
INSTRUCTIONS:
Provide a single JSON object as your response.
1. The JSON object MUST have a special key `\"__module__\"`. The value for this key should be a concise, single-paragraph docstring that summarizes the purpose of the entire file.
2. The other keys in the JSON object should be the function or class names (e.g., "my_function", "MyClass", "MyClass.my_method").
3. The values for these other keys should be their complete docstrings.
4. Do NOT include the original code in your response. Only generate the JSON containing the docstrings.
"""
# --- UNCHANGED ---
PACKAGE_INIT_PROMPT_TEMPLATE = """
SYSTEM: You are an expert programmer writing a high-level, one-paragraph summary for a Python package. This summary will be the main docstring for the package's `__init__.py` file.
USER:
Project Description:
\"\"\"
{project_description}
\"\"\"
You are writing the docstring for the `__init__.py` of the `{package_name}` package.
This package contains the following modules. Their summaries are provided below:
{module_summaries}
INSTRUCTIONS:
Write a concise, single-paragraph docstring that summarizes the overall purpose and responsibility of the `{package_name}` package, based on the modules it contains. This docstring will be placed in the `__init__.py` file.
"""
def no_op_callback(event: str, data: dict):
print(f"{event}: {json.dumps(data, indent=2)}")
class DocstringOrchestrator:
# --- UNCHANGED ---
def __init__(self, path_or_url: str, description: str, exclude: List[str], llm_handler: LLMHandler, progress_callback: Callable[[str, dict], None] = no_op_callback, repo_full_name: str = None):
self.path_or_url = path_or_url
self.description = description
self.exclude = exclude
self.llm_handler = llm_handler
self.progress_callback = progress_callback
self.project_path = None
self.is_temp_dir = path_or_url.startswith("http")
self.repo_full_name = repo_full_name
def llm_log_wrapper(message: str):
self.progress_callback("log", {"message": message})
self.llm_handler.progress_callback = llm_log_wrapper
# --- MODIFIED: The run() method is refactored ---
def run(self):
def log_to_ui(message: str):
self.progress_callback("log", {"message": message})
try:
self.project_path = scanner.get_project_path(self.path_or_url, log_callback=log_to_ui)
self.progress_callback("phase", {"id": "scan", "name": "Scanning Project", "status": "in-progress"})
files = scanner.scan_project(self.project_path, self.exclude)
log_to_ui(f"Found {len(files)} Python files to document.")
self.progress_callback("phase", {"id": "scan", "status": "success"})
graph = parser.build_dependency_graph(files, self.project_path, log_callback=log_to_ui)
self.progress_callback("phase", {"id": "docstrings", "name": "Generating Docstrings", "status": "in-progress"})
doc_order = list(nx.topological_sort(graph)) if nx.is_directed_acyclic_graph(graph) else list(graph.nodes)
# --- COMBINED PASS 1 & 2: Generate all docstrings in a single call per file ---
documented_context = {}
module_docstrings = {}
for file_path in doc_order:
rel_path = file_path.relative_to(self.project_path).as_posix()
# Use a single subtask for the entire file documentation process
self.progress_callback("subtask", {"parentId": "docstrings", "listId": "docstring-file-list", "id": f"doc-{rel_path}", "name": f"Documenting {rel_path}", "status": "in-progress"})
deps = graph.predecessors(file_path)
dep_context_str = "\n".join([f"File: `{dep.relative_to(self.project_path)}`\n{json.dumps(documented_context.get(dep, {}), indent=2)}\n" for dep in deps]) or "No internal dependencies have been documented yet."
with open(file_path, "r", encoding="utf-8") as f:
file_content = f.read()
# Use the new combined prompt
prompt = COMBINED_DOCSTRING_PROMPT_TEMPLATE.format(project_description=self.description, dependency_context=dep_context_str, file_path=rel_path, file_content=file_content)
try:
# Single LLM call to get all docstrings for the file
combined_docs = self.llm_handler.generate_documentation(prompt)
# Separate the module docstring from the function/class docstrings
module_summary = combined_docs.pop("__module__", None)
function_class_docs = combined_docs # The remainder of the dict
# Update the file with function/class docstrings
updater.update_file_with_docstrings(file_path, function_class_docs, log_callback=log_to_ui)
# Update the file with the module-level docstring if it was generated
if module_summary:
updater.update_module_docstring(file_path, module_summary, log_callback=log_to_ui)
module_docstrings[file_path] = module_summary
self.progress_callback("subtask", {"parentId": "docstrings", "id": f"doc-{rel_path}", "status": "success"})
documented_context[file_path] = function_class_docs # Store for dependency context
except Exception as e:
log_to_ui(f"Error processing docstrings for {rel_path}: {e}")
self.progress_callback("subtask", {"parentId": "docstrings", "id": f"doc-{rel_path}", "status": "error"})
# --- PASS 3: Generate __init__.py Docstrings for Packages (Unchanged) ---
packages = defaultdict(list)
for file_path, docstring in module_docstrings.items():
if file_path.name != "__init__.py":
packages[file_path.parent].append(f"- `{file_path.name}`: {docstring}")
for package_path, summaries in packages.items():
rel_path = package_path.relative_to(self.project_path).as_posix()
init_file = package_path / "__init__.py"
self.progress_callback("subtask", {"parentId": "docstrings", "listId": "docstring-package-list", "id": f"pkg-{rel_path}", "name": f"Package summary for {rel_path}", "status": "in-progress"})
try:
is_root_package = (package_path == self.project_path)
package_name = self.repo_full_name if is_root_package and self.repo_full_name else package_path.name
prompt = PACKAGE_INIT_PROMPT_TEMPLATE.format(
project_description=self.description,
package_name=package_name,
module_summaries="\n".join(summaries)
)
package_summary = self.llm_handler.generate_text_response(prompt).strip().strip('"""').strip("'''").strip()
if not init_file.exists():
init_file.touch()
updater.update_module_docstring(init_file, package_summary, log_callback=log_to_ui)
self.progress_callback("subtask", {"parentId": "docstrings", "id": f"pkg-{rel_path}", "status": "success"})
except Exception as e:
log_to_ui(f"Error generating package docstring for {rel_path}: {e}")
self.progress_callback("subtask", {"parentId": "docstrings", "id": f"pkg-{rel_path}", "status": "error"})
self.progress_callback("phase", {"id": "docstrings", "status": "success"})
finally:
if self.is_temp_dir and self.project_path and self.project_path.exists():
shutil.rmtree(self.project_path, ignore_errors=True) |