python_project_explainer / prompt_generator.py
lafifi-24's picture
i
933c2fa
"""
Prompt Generator Module
Generates structured prompts for LLM-based code explanation organized by node level.
"""
import re
import ast
import copy
import networkx as nx
from structlog import get_logger
from modal_client import ModalClient
logger = get_logger(__name__)
def generate_explaination_by_level(graph: nx.DiGraph, levels: dict) -> dict[int, dict]:
"""
Generate LLM prompts organized by node level.
Creates prompts for each node that include:
- File path
- Used modules (name + content from graph successors)
- Node content (unparsed AST)
Nodes without a namespace are skipped as they typically represent
external or incomplete references.
Args:
graph: NetworkX directed graph with code nodes
levels: Dictionary mapping nodes to their levels
Returns:
Dictionary mapping level → {node: prompt_string}
"""
# Generate prompts for each level
prompts_by_level = {}
for level in range(max(levels.keys()) + 1):
if level not in levels:
continue
batch = {}
for node in levels[level]:
if node.namespace is None or node.get_short_name() in ["lambda" ] or node.ast_node is None:
continue
if len(ast.unparse(node.ast_node))<1000:
continue
prompt = prompt = """You are a Python code analysis expert.
**CRITICAL RULES:**
1. ONLY use information directly visible in the "TARGET CODE" section
2. For methods marked as "[SUMMARIZED]", reference them by their actual name shown
3. If a method body is replaced with a summary, DO NOT invent details about its implementation
4. State "implementation details not shown" for summarized methods
Your explanation must be brief and cover:
- Purpose: What this code does (1-2 sentences)
- Inputs: Parameters (only those visible)
- Outputs: Return values (only those visible)
- Exceptions: Only exceptions explicitly raised in the visible code (1 sentence)
"""
node_copy = copy.deepcopy(node)
# Extract used modules from graph successors
used_modules = []
summarized_methods = []
for used_node in graph.successors(node):
if used_node.namespace is None or used_node.get_short_name() in ["lambda" ] or used_node.ast_node is None:# this will ignore Python built-in functions
continue
label = graph.get_edge_data(node, used_node).get("label")
# Only include "use" edges, skip "contains" edges
if used_node.ast_node is None:
continue
elif label == 'contains':
if used_node.ast_node in node.ast_node.body and\
hasattr(used_node,"explination"):
if isinstance(used_node.ast_node, ast.FunctionDef):
# Keep function signature visible
signature = f"def {used_node.ast_node.name}({ast.unparse(used_node.ast_node.args)})"
if used_node.ast_node.returns:
signature += f" -> {ast.unparse(used_node.ast_node.returns)}"
marker_text = f"""[SUMMARIZED METHOD]
Method: {used_node.name}
Signature: {signature}
Summary: {used_node.explination}
Note: Full implementation replaced for brevity"""
elif isinstance(used_node.ast_node, ast.ClassDef):
marker_text = f"""[SUMMARIZED CLASS]
Class: {used_node.name}
Summary: {used_node.explination}
Note: Full implementation replaced for brevity"""
else:
marker_text = f"""[SUMMARIZED]
Name: {used_node.name}
Summary: {used_node.explination}"""
new_child = ast.Expr(value=ast.Constant(value=marker_text))
for i, child in enumerate(node.ast_node.body):
if child == used_node.ast_node:
node_copy.ast_node.body[i] = new_child
summarized_methods.append(used_node.name)
break
pass
elif hasattr(used_node,"explination") is False:
pass
elif label == 'use':
used_modules.append(used_node)
# Build the prompt
prompt += f"**Target File Path:** {node.filename}\n\n"
logger.info(f"used modules numers {len(used_modules)}")
if used_modules:
if len(used_modules) > 20:
pass
prompt += "**External Dependencies Used:**\n"
for used_node in used_modules:
if hasattr(used_node, "explination"):
prompt += f"""- **{used_node.name}** [EXPLAINED]
- File: {used_node.filename}
- Explanation: {used_node.explination}"""
else:
prompt += f"""- **{used_node.name}**
- File: {used_node.filename}
- Python Code: {ast.unparse(used_node.ast_node)}"""
if summarized_methods:
prompt += f"**Note:** The following methods are summarized in the code below: {', '.join(summarized_methods)}\n\n"
prompt += f"""**TARGET CODE:**
```python
{ast.unparse(node_copy.ast_node)}
```
Explain the TARGET CODE above and Brief and precise
"""
batch[node] = prompt
if batch:
results = ModalClient.infer_llm(batch.values())
for index, node in enumerate(batch.keys()):
node.explination = results[index]
return prompts_by_level