File size: 11,408 Bytes
5374a2d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 |
import os
from typing import Optional, List, Dict
from pydantic import Field
from ..models.base_model import BaseLLM, LLMOutputParser
from .action import Action, ActionInput, ActionOutput
from ..prompts.code_extraction import CODE_EXTRACTION
class CodeExtractionInput(ActionInput):
"""
Input parameters for the CodeExtraction action.
"""
code_string: str = Field(description="The string containing code blocks to extract")
target_directory: str = Field(description="The directory path where extracted code files will be saved")
project_name: Optional[str] = Field(default=None, description="Optional name for the project folder")
class CodeExtractionOutput(ActionOutput):
"""
Output of the CodeExtraction action.
"""
extracted_files: Dict[str, str] = Field(description="Map of filename to file path of saved files")
main_file: Optional[str] = Field(default=None, description="Path to the main file if identified")
error: Optional[str] = Field(default=None, description="Error message if any operation failed")
class CodeBlockInfo(LLMOutputParser):
"""
Information about an extracted code block.
"""
language: str = Field(description="Programming language of the code block")
filename: str = Field(description="Suggested filename for the code block")
content: str = Field(description="The actual code content")
class CodeBlockList(LLMOutputParser):
"""
List of code blocks extracted from text.
"""
code_blocks: List[CodeBlockInfo] = Field(description="List of code blocks")
class CodeExtraction(Action):
"""
An action that extracts and organizes code blocks from text.
This action uses an LLM to analyze text containing code blocks, extract them,
suggest appropriate filenames, and save them to a specified directory. It can
also identify which file is likely the main entry point based on heuristics.
Attributes:
name: The name of the action.
description: A description of what the action does.
prompt: The prompt template used by the action.
inputs_format: The expected format of inputs to this action.
outputs_format: The format of the action's output.
"""
def __init__(self, **kwargs):
name = kwargs.pop("name") if "name" in kwargs else CODE_EXTRACTION["name"]
description = kwargs.pop("description") if "description" in kwargs else CODE_EXTRACTION["description"]
prompt = kwargs.pop("prompt") if "prompt" in kwargs else CODE_EXTRACTION["prompt"]
# inputs_format = kwargs.pop("inputs_format") if "inputs_format" in kwargs else CodeExtractionInput
# outputs_format = kwargs.pop("outputs_format") if "outputs_format" in kwargs else CodeExtractionOutput
inputs_format = kwargs.pop("inputs_format", None) or CodeExtractionInput
outputs_format = kwargs.pop("outputs_format", None) or CodeExtractionOutput
super().__init__(name=name, description=description, prompt=prompt, inputs_format=inputs_format, outputs_format=outputs_format, **kwargs)
def identify_main_file(self, saved_files: Dict[str, str]) -> Optional[str]:
"""Identify the main file from the saved files based on content and file type.
This method uses a combination of common filename conventions and content
analysis to determine which file is likely the main entry point of a project.
Args:
saved_files: Dictionary mapping filenames to their full paths
Returns:
Path to the main file if found, None otherwise
"""
# Priority lookup for common main files by language
main_file_priorities = [
# HTML files
"index.html",
# Python files
"main.py",
"app.py",
# JavaScript files
"index.js",
"main.js",
"app.js",
# Java files
"Main.java",
# C/C++ files
"main.cpp",
"main.c",
# Go files
"main.go",
# Other common entry points
"index.php",
"Program.cs"
]
# First check priority list
for main_file in main_file_priorities:
if main_file in saved_files:
return saved_files[main_file]
# If no priority file found, use heuristics based on file extensions
# If we have HTML files, use the first one
html_files = {k: v for k, v in saved_files.items() if k.endswith('.html')}
if html_files:
return next(iter(html_files.values()))
# Check for Python files with "__main__" section
py_files = {k: v for k, v in saved_files.items() if k.endswith('.py')}
if py_files:
for filename, path in py_files.items():
with open(path, 'r', encoding='utf-8') as f:
content = f.read()
if "if __name__ == '__main__'" in content or 'if __name__ == "__main__"' in content:
return path
# If no main found, return the first Python file
if py_files:
return next(iter(py_files.values()))
# If we have Java files, look for one with a main method
java_files = {k: v for k, v in saved_files.items() if k.endswith('.java')}
if java_files:
for filename, path in java_files.items():
with open(path, 'r', encoding='utf-8') as f:
content = f.read()
if "public static void main" in content:
return path
# If no main found, return the first Java file
if java_files:
return next(iter(java_files.values()))
# For JavaScript applications
js_files = {k: v for k, v in saved_files.items() if k.endswith('.js')}
if js_files:
return next(iter(js_files.values()))
# If all else fails, return the first file
if saved_files:
return next(iter(saved_files.values()))
# No files found
return None
def save_code_blocks(self, code_blocks: List[Dict], target_directory: str) -> Dict[str, str]:
"""Save code blocks to files in the target directory.
Creates the target directory if it doesn't exist and saves each code block
to a file with an appropriate name, handling filename conflicts.
Args:
code_blocks: List of dictionaries containing code block information
target_directory: Directory path where files should be saved
Returns:
Dictionary mapping filenames to their full paths
"""
os.makedirs(target_directory, exist_ok=True)
saved_files = {}
for block in code_blocks:
filename = block.get("filename", "unknown.txt")
content = block.get("content", "")
# Skip empty blocks
if not content.strip():
continue
# Handle filename conflicts
base_filename = filename
counter = 1
while filename in saved_files:
name_parts = base_filename.split('.')
if len(name_parts) > 1:
filename = f"{'.'.join(name_parts[:-1])}_{counter}.{name_parts[-1]}"
else:
filename = f"{base_filename}_{counter}"
counter += 1
# Save to file
file_path = os.path.join(target_directory, filename)
with open(file_path, 'w', encoding='utf-8') as f:
f.write(content)
# Add to map
saved_files[filename] = file_path
return saved_files
def execute(self, llm: Optional[BaseLLM] = None, inputs: Optional[dict] = None, sys_msg: Optional[str]=None, return_prompt: bool = False, **kwargs) -> CodeExtractionOutput:
"""Execute the CodeExtraction action.
Extracts code blocks from the provided text using the specified LLM,
saves them to the target directory, and identifies the main file.
Args:
llm: The LLM to use for code extraction
inputs: Dictionary containing:
- code_string: The string with code blocks to extract
- target_directory: Where to save the files
- project_name: Optional project folder name
sys_msg: Optional system message override for the LLM
return_prompt: Whether to return the prompt along with the result
**kwargs (Any): Additional keyword arguments
Returns:
CodeExtractionOutput with extracted file information
"""
if not llm:
error_msg = "CodeExtraction action requires an LLM."
return CodeExtractionOutput(extracted_files={}, error=error_msg)
if not inputs:
error_msg = "CodeExtraction action received invalid `inputs`: None or empty."
return CodeExtractionOutput(extracted_files={}, error=error_msg)
code_string = inputs.get("code_string", "")
target_directory = inputs.get("target_directory", "")
project_name = inputs.get("project_name", None)
if not code_string:
error_msg = "No code string provided."
return CodeExtractionOutput(extracted_files={}, error=error_msg)
if not target_directory:
error_msg = "No target directory provided."
return CodeExtractionOutput(extracted_files={}, error=error_msg)
# Create project folder if name is provided
if project_name:
project_dir = os.path.join(target_directory, project_name)
else:
project_dir = target_directory
try:
# Use LLM to extract code blocks and suggest filenames
prompt_params = {"code_string": code_string}
system_message = CODE_EXTRACTION["system_prompt"] if sys_msg is None else sys_msg
llm_response: CodeBlockList = llm.generate(
prompt=self.prompt.format(**prompt_params),
system_message=system_message,
parser=CodeBlockList,
parse_mode="json"
)
code_blocks = llm_response.get_structured_data().get("code_blocks", [])
# Save code blocks to files
saved_files = self.save_code_blocks(code_blocks, project_dir)
# Identify main file
main_file = self.identify_main_file(saved_files)
result = CodeExtractionOutput(
extracted_files=saved_files,
main_file=main_file
)
if return_prompt:
return result, self.prompt.format(**prompt_params)
return result
except Exception as e:
error_msg = f"Error extracting code: {str(e)}"
return CodeExtractionOutput(extracted_files={}, error=error_msg) |