selfevolveagent / evoagentx /actions /code_extraction.py
iLOVE2D's picture
Upload 2846 files
5374a2d verified
import os
from typing import Optional, List, Dict
from pydantic import Field
from ..models.base_model import BaseLLM, LLMOutputParser
from .action import Action, ActionInput, ActionOutput
from ..prompts.code_extraction import CODE_EXTRACTION
class CodeExtractionInput(ActionInput):
"""
Input parameters for the CodeExtraction action.
"""
code_string: str = Field(description="The string containing code blocks to extract")
target_directory: str = Field(description="The directory path where extracted code files will be saved")
project_name: Optional[str] = Field(default=None, description="Optional name for the project folder")
class CodeExtractionOutput(ActionOutput):
"""
Output of the CodeExtraction action.
"""
extracted_files: Dict[str, str] = Field(description="Map of filename to file path of saved files")
main_file: Optional[str] = Field(default=None, description="Path to the main file if identified")
error: Optional[str] = Field(default=None, description="Error message if any operation failed")
class CodeBlockInfo(LLMOutputParser):
"""
Information about an extracted code block.
"""
language: str = Field(description="Programming language of the code block")
filename: str = Field(description="Suggested filename for the code block")
content: str = Field(description="The actual code content")
class CodeBlockList(LLMOutputParser):
"""
List of code blocks extracted from text.
"""
code_blocks: List[CodeBlockInfo] = Field(description="List of code blocks")
class CodeExtraction(Action):
"""
An action that extracts and organizes code blocks from text.
This action uses an LLM to analyze text containing code blocks, extract them,
suggest appropriate filenames, and save them to a specified directory. It can
also identify which file is likely the main entry point based on heuristics.
Attributes:
name: The name of the action.
description: A description of what the action does.
prompt: The prompt template used by the action.
inputs_format: The expected format of inputs to this action.
outputs_format: The format of the action's output.
"""
def __init__(self, **kwargs):
name = kwargs.pop("name") if "name" in kwargs else CODE_EXTRACTION["name"]
description = kwargs.pop("description") if "description" in kwargs else CODE_EXTRACTION["description"]
prompt = kwargs.pop("prompt") if "prompt" in kwargs else CODE_EXTRACTION["prompt"]
# inputs_format = kwargs.pop("inputs_format") if "inputs_format" in kwargs else CodeExtractionInput
# outputs_format = kwargs.pop("outputs_format") if "outputs_format" in kwargs else CodeExtractionOutput
inputs_format = kwargs.pop("inputs_format", None) or CodeExtractionInput
outputs_format = kwargs.pop("outputs_format", None) or CodeExtractionOutput
super().__init__(name=name, description=description, prompt=prompt, inputs_format=inputs_format, outputs_format=outputs_format, **kwargs)
def identify_main_file(self, saved_files: Dict[str, str]) -> Optional[str]:
"""Identify the main file from the saved files based on content and file type.
This method uses a combination of common filename conventions and content
analysis to determine which file is likely the main entry point of a project.
Args:
saved_files: Dictionary mapping filenames to their full paths
Returns:
Path to the main file if found, None otherwise
"""
# Priority lookup for common main files by language
main_file_priorities = [
# HTML files
"index.html",
# Python files
"main.py",
"app.py",
# JavaScript files
"index.js",
"main.js",
"app.js",
# Java files
"Main.java",
# C/C++ files
"main.cpp",
"main.c",
# Go files
"main.go",
# Other common entry points
"index.php",
"Program.cs"
]
# First check priority list
for main_file in main_file_priorities:
if main_file in saved_files:
return saved_files[main_file]
# If no priority file found, use heuristics based on file extensions
# If we have HTML files, use the first one
html_files = {k: v for k, v in saved_files.items() if k.endswith('.html')}
if html_files:
return next(iter(html_files.values()))
# Check for Python files with "__main__" section
py_files = {k: v for k, v in saved_files.items() if k.endswith('.py')}
if py_files:
for filename, path in py_files.items():
with open(path, 'r', encoding='utf-8') as f:
content = f.read()
if "if __name__ == '__main__'" in content or 'if __name__ == "__main__"' in content:
return path
# If no main found, return the first Python file
if py_files:
return next(iter(py_files.values()))
# If we have Java files, look for one with a main method
java_files = {k: v for k, v in saved_files.items() if k.endswith('.java')}
if java_files:
for filename, path in java_files.items():
with open(path, 'r', encoding='utf-8') as f:
content = f.read()
if "public static void main" in content:
return path
# If no main found, return the first Java file
if java_files:
return next(iter(java_files.values()))
# For JavaScript applications
js_files = {k: v for k, v in saved_files.items() if k.endswith('.js')}
if js_files:
return next(iter(js_files.values()))
# If all else fails, return the first file
if saved_files:
return next(iter(saved_files.values()))
# No files found
return None
def save_code_blocks(self, code_blocks: List[Dict], target_directory: str) -> Dict[str, str]:
"""Save code blocks to files in the target directory.
Creates the target directory if it doesn't exist and saves each code block
to a file with an appropriate name, handling filename conflicts.
Args:
code_blocks: List of dictionaries containing code block information
target_directory: Directory path where files should be saved
Returns:
Dictionary mapping filenames to their full paths
"""
os.makedirs(target_directory, exist_ok=True)
saved_files = {}
for block in code_blocks:
filename = block.get("filename", "unknown.txt")
content = block.get("content", "")
# Skip empty blocks
if not content.strip():
continue
# Handle filename conflicts
base_filename = filename
counter = 1
while filename in saved_files:
name_parts = base_filename.split('.')
if len(name_parts) > 1:
filename = f"{'.'.join(name_parts[:-1])}_{counter}.{name_parts[-1]}"
else:
filename = f"{base_filename}_{counter}"
counter += 1
# Save to file
file_path = os.path.join(target_directory, filename)
with open(file_path, 'w', encoding='utf-8') as f:
f.write(content)
# Add to map
saved_files[filename] = file_path
return saved_files
def execute(self, llm: Optional[BaseLLM] = None, inputs: Optional[dict] = None, sys_msg: Optional[str]=None, return_prompt: bool = False, **kwargs) -> CodeExtractionOutput:
"""Execute the CodeExtraction action.
Extracts code blocks from the provided text using the specified LLM,
saves them to the target directory, and identifies the main file.
Args:
llm: The LLM to use for code extraction
inputs: Dictionary containing:
- code_string: The string with code blocks to extract
- target_directory: Where to save the files
- project_name: Optional project folder name
sys_msg: Optional system message override for the LLM
return_prompt: Whether to return the prompt along with the result
**kwargs (Any): Additional keyword arguments
Returns:
CodeExtractionOutput with extracted file information
"""
if not llm:
error_msg = "CodeExtraction action requires an LLM."
return CodeExtractionOutput(extracted_files={}, error=error_msg)
if not inputs:
error_msg = "CodeExtraction action received invalid `inputs`: None or empty."
return CodeExtractionOutput(extracted_files={}, error=error_msg)
code_string = inputs.get("code_string", "")
target_directory = inputs.get("target_directory", "")
project_name = inputs.get("project_name", None)
if not code_string:
error_msg = "No code string provided."
return CodeExtractionOutput(extracted_files={}, error=error_msg)
if not target_directory:
error_msg = "No target directory provided."
return CodeExtractionOutput(extracted_files={}, error=error_msg)
# Create project folder if name is provided
if project_name:
project_dir = os.path.join(target_directory, project_name)
else:
project_dir = target_directory
try:
# Use LLM to extract code blocks and suggest filenames
prompt_params = {"code_string": code_string}
system_message = CODE_EXTRACTION["system_prompt"] if sys_msg is None else sys_msg
llm_response: CodeBlockList = llm.generate(
prompt=self.prompt.format(**prompt_params),
system_message=system_message,
parser=CodeBlockList,
parse_mode="json"
)
code_blocks = llm_response.get_structured_data().get("code_blocks", [])
# Save code blocks to files
saved_files = self.save_code_blocks(code_blocks, project_dir)
# Identify main file
main_file = self.identify_main_file(saved_files)
result = CodeExtractionOutput(
extracted_files=saved_files,
main_file=main_file
)
if return_prompt:
return result, self.prompt.format(**prompt_params)
return result
except Exception as e:
error_msg = f"Error extracting code: {str(e)}"
return CodeExtractionOutput(extracted_files={}, error=error_msg)