File size: 8,847 Bytes
3e802a5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
import shutil
from pathlib import Path
from typing import List, Callable, Dict
import networkx as nx
import json
from collections import defaultdict

from . import scanner, parser, updater
from .llm_handler import LLMHandler

COMBINED_DOCSTRING_PROMPT_TEMPLATE = """
SYSTEM: You are an expert programmer writing high-quality, comprehensive Python docstrings in reStructuredText (reST) format. Your output MUST be a single JSON object.

USER:
Project Description:
\"\"\"
{project_description}
\"\"\"

---
CONTEXT FROM DEPENDENCIES:
This file depends on other modules. Here is their documentation for context:

{dependency_context}
---

DOCUMENT THE FOLLOWING SOURCE FILE:

File Path: `{file_path}`

```python
{file_content}
```
INSTRUCTIONS:
Provide a single JSON object as your response.
1.  The JSON object MUST have a special key `\"__module__\"`. The value for this key should be a concise, single-paragraph docstring that summarizes the purpose of the entire file.
2.  The other keys in the JSON object should be the function or class names (e.g., "my_function", "MyClass", "MyClass.my_method").
3.  The values for these other keys should be their complete docstrings.
4.  Do NOT include the original code in your response. Only generate the JSON containing the docstrings.
"""

# --- UNCHANGED ---
PACKAGE_INIT_PROMPT_TEMPLATE = """
SYSTEM: You are an expert programmer writing a high-level, one-paragraph summary for a Python package. This summary will be the main docstring for the package's `__init__.py` file.

USER:
Project Description:
\"\"\"
{project_description}
\"\"\"

You are writing the docstring for the `__init__.py` of the `{package_name}` package.

This package contains the following modules. Their summaries are provided below:
{module_summaries}

INSTRUCTIONS:
Write a concise, single-paragraph docstring that summarizes the overall purpose and responsibility of the `{package_name}` package, based on the modules it contains. This docstring will be placed in the `__init__.py` file.
"""

def no_op_callback(event: str, data: dict):
    print(f"{event}: {json.dumps(data, indent=2)}")

class DocstringOrchestrator:
    # --- UNCHANGED ---
    def __init__(self, path_or_url: str, description: str, exclude: List[str], llm_handler: LLMHandler, progress_callback: Callable[[str, dict], None] = no_op_callback, repo_full_name: str = None):
        self.path_or_url = path_or_url
        self.description = description
        self.exclude = exclude
        self.llm_handler = llm_handler
        self.progress_callback = progress_callback
        self.project_path = None
        self.is_temp_dir = path_or_url.startswith("http")
        self.repo_full_name = repo_full_name

        def llm_log_wrapper(message: str):
            self.progress_callback("log", {"message": message})
        
        self.llm_handler.progress_callback = llm_log_wrapper

    # --- MODIFIED: The run() method is refactored ---
    def run(self):
        def log_to_ui(message: str):
            self.progress_callback("log", {"message": message})

        try:
            self.project_path = scanner.get_project_path(self.path_or_url, log_callback=log_to_ui)
            
            self.progress_callback("phase", {"id": "scan", "name": "Scanning Project", "status": "in-progress"})
            files = scanner.scan_project(self.project_path, self.exclude)
            log_to_ui(f"Found {len(files)} Python files to document.")
            self.progress_callback("phase", {"id": "scan", "status": "success"})

            graph = parser.build_dependency_graph(files, self.project_path, log_callback=log_to_ui)
            
            self.progress_callback("phase", {"id": "docstrings", "name": "Generating Docstrings", "status": "in-progress"})
            doc_order = list(nx.topological_sort(graph)) if nx.is_directed_acyclic_graph(graph) else list(graph.nodes)
            
            # --- COMBINED PASS 1 & 2: Generate all docstrings in a single call per file ---
            documented_context = {}
            module_docstrings = {}
            for file_path in doc_order:
                rel_path = file_path.relative_to(self.project_path).as_posix()
                # Use a single subtask for the entire file documentation process
                self.progress_callback("subtask", {"parentId": "docstrings", "listId": "docstring-file-list", "id": f"doc-{rel_path}", "name": f"Documenting {rel_path}", "status": "in-progress"})
                
                deps = graph.predecessors(file_path)
                dep_context_str = "\n".join([f"File: `{dep.relative_to(self.project_path)}`\n{json.dumps(documented_context.get(dep, {}), indent=2)}\n" for dep in deps]) or "No internal dependencies have been documented yet."
                    
                with open(file_path, "r", encoding="utf-8") as f:
                    file_content = f.read()
                
                # Use the new combined prompt
                prompt = COMBINED_DOCSTRING_PROMPT_TEMPLATE.format(project_description=self.description, dependency_context=dep_context_str, file_path=rel_path, file_content=file_content)
                
                try:
                    # Single LLM call to get all docstrings for the file
                    combined_docs = self.llm_handler.generate_documentation(prompt)

                    # Separate the module docstring from the function/class docstrings
                    module_summary = combined_docs.pop("__module__", None)
                    function_class_docs = combined_docs # The remainder of the dict

                    # Update the file with function/class docstrings
                    updater.update_file_with_docstrings(file_path, function_class_docs, log_callback=log_to_ui)
                    
                    # Update the file with the module-level docstring if it was generated
                    if module_summary:
                        updater.update_module_docstring(file_path, module_summary, log_callback=log_to_ui)
                        module_docstrings[file_path] = module_summary

                    self.progress_callback("subtask", {"parentId": "docstrings", "id": f"doc-{rel_path}", "status": "success"})
                    documented_context[file_path] = function_class_docs # Store for dependency context
                except Exception as e:
                    log_to_ui(f"Error processing docstrings for {rel_path}: {e}")
                    self.progress_callback("subtask", {"parentId": "docstrings", "id": f"doc-{rel_path}", "status": "error"})
            
            # --- PASS 3: Generate __init__.py Docstrings for Packages (Unchanged) ---
            packages = defaultdict(list)
            for file_path, docstring in module_docstrings.items():
                if file_path.name != "__init__.py":
                    packages[file_path.parent].append(f"- `{file_path.name}`: {docstring}")
            
            for package_path, summaries in packages.items():
                rel_path = package_path.relative_to(self.project_path).as_posix()
                init_file = package_path / "__init__.py"
                self.progress_callback("subtask", {"parentId": "docstrings", "listId": "docstring-package-list", "id": f"pkg-{rel_path}", "name": f"Package summary for {rel_path}", "status": "in-progress"})
                
                try:
                    is_root_package = (package_path == self.project_path)
                    package_name = self.repo_full_name if is_root_package and self.repo_full_name else package_path.name
                    
                    prompt = PACKAGE_INIT_PROMPT_TEMPLATE.format(
                        project_description=self.description,
                        package_name=package_name,
                        module_summaries="\n".join(summaries)
                    )
                    package_summary = self.llm_handler.generate_text_response(prompt).strip().strip('"""').strip("'''").strip()
                    
                    if not init_file.exists():
                        init_file.touch()
                    
                    updater.update_module_docstring(init_file, package_summary, log_callback=log_to_ui)
                    self.progress_callback("subtask", {"parentId": "docstrings", "id": f"pkg-{rel_path}", "status": "success"})
                except Exception as e:
                    log_to_ui(f"Error generating package docstring for {rel_path}: {e}")
                    self.progress_callback("subtask", {"parentId": "docstrings", "id": f"pkg-{rel_path}", "status": "error"})

            self.progress_callback("phase", {"id": "docstrings", "status": "success"})

        finally:
            if self.is_temp_dir and self.project_path and self.project_path.exists():
                shutil.rmtree(self.project_path, ignore_errors=True)