Spaces:
Runtime error
Runtime error
charSLee013
feat: complete Hugging Face Spaces deployment with production-ready CognitiveKernel-Launchpad
1ea26af | """ | |
| File prompt management for CognitiveKernel-Pro. | |
| Clean, type-safe prompt building following Linus Torvalds' engineering principles: | |
| - No magic strings or eval() calls | |
| - Clear interfaces and data structures | |
| - Fail fast with proper validation | |
| - Zero technical debt | |
| """ | |
| from dataclasses import dataclass, field | |
| from enum import Enum | |
| from typing import List, Dict, Any, Optional, Union | |
| from pathlib import Path | |
| class PromptType(Enum): | |
| """Prompt types for file operations""" | |
| PLAN = "plan" | |
| ACTION = "action" | |
| END = "end" | |
| class ActionType(Enum): | |
| """Valid file action types""" | |
| LOAD_FILE = "load_file" | |
| READ_TEXT = "read_text" | |
| READ_SCREENSHOT = "read_screenshot" | |
| SEARCH = "search" | |
| STOP = "stop" | |
| def is_valid(cls, action: str) -> bool: | |
| """Check if action is valid""" | |
| return action in [item.value for item in cls] | |
| class FileActionResult: | |
| """Result of a file action""" | |
| success: bool | |
| message: str | |
| data: Dict[str, Any] = field(default_factory=dict) | |
| def create_success(cls, message: str, data: Optional[Dict[str, Any]] = None) -> 'FileActionResult': | |
| """Create success result""" | |
| return cls(True, message, data or {}) | |
| def create_failure(cls, message: str) -> 'FileActionResult': | |
| """Create failure result""" | |
| return cls(False, message, {}) | |
| def to_dict(self) -> Dict[str, Any]: | |
| """Convert to dictionary""" | |
| return { | |
| "success": self.success, | |
| "message": self.message, | |
| "data": self.data | |
| } | |
| class FilePromptConfig: | |
| """Configuration for file prompt generation""" | |
| max_file_read_tokens: int = 4000 | |
| max_file_screenshots: int = 5 | |
| def __post_init__(self): | |
| """Validate configuration""" | |
| if self.max_file_read_tokens <= 0: | |
| raise ValueError("max_file_read_tokens must be positive") | |
| if self.max_file_screenshots < 0: | |
| raise ValueError("max_file_screenshots cannot be negative") | |
| # Template constants - clean separation of content from logic | |
| PLAN_SYSTEM_TEMPLATE = """You are an expert task planner for file agent tasks. | |
| ## Available Information | |
| - Target Task: The specific file task to accomplish | |
| - Recent Steps: Latest actions taken by the file agent | |
| - Previous Progress State: JSON representation of task progress | |
| ## Progress State Structure | |
| - completed_list (List[str]): Record of completed critical steps | |
| - todo_list (List[str]): Planned future actions (plan multiple steps ahead) | |
| - experience (List[str]): Self-contained notes from past attempts | |
| - information (List[str]): Important collected information for memory | |
| ## Guidelines | |
| 1. Update progress state based on latest observations | |
| 2. Create evaluable Python dictionary (no eval() calls in production) | |
| 3. Maintain clean, relevant progress state | |
| 4. Document insights in experience field for unproductive attempts | |
| 5. Record important page information in information field | |
| 6. Stop with N/A if repeated jailbreak/content filter issues | |
| 7. Scan the complete file when possible | |
| Example progress state: | |
| { | |
| "completed_list": ["Scanned last page"], | |
| "todo_list": ["Count Geoffrey Hinton mentions on penultimate page"], | |
| "experience": ["Visual information needed - use read_screenshot"], | |
| "information": ["Three Geoffrey Hinton mentions found on last page"] | |
| } | |
| """ | |
| ACTION_SYSTEM_TEMPLATE = """You are an intelligent file interaction assistant. | |
| Generate Python code using predefined action functions. | |
| ## Available Actions | |
| - load_file(file_name: str) -> str: Load file into memory (PDFs to Markdown) | |
| - read_text(file_name: str, page_id_list: list) -> str: Text-only processing | |
| - read_screenshot(file_name: str, page_id_list: list) -> str: Multimodal processing | |
| - search(file_name: str, key_word_list: list) -> str: Keyword search | |
| - stop(answer: str, summary: str) -> str: Conclude task | |
| ## Action Guidelines | |
| 1. Issue only valid, single actions per step | |
| 2. Avoid repetition | |
| 3. Always print action results | |
| 4. Stop when task completed or unrecoverable errors | |
| 5. Use defined functions only - no alternative libraries | |
| 6. Load files before reading (load_file first) | |
| 7. Use Python code if load_file fails (e.g., unzip archives) | |
| 8. Use search only for very long documents with exact keyword needs | |
| 9. Read fair amounts: <MAX_FILE_READ_TOKENS tokens, <MAX_FILE_SCREENSHOT images | |
| ## Strategy | |
| 1. Step-by-step approach for long documents | |
| 2. Reflect on previous steps and try alternatives for recurring errors | |
| 3. Review progress state and compare with current information | |
| 4. Follow See-Think-Act pattern: provide Thought, then Code | |
| """ | |
| END_SYSTEM_TEMPLATE = """Generate well-formatted output for completed file agent tasks. | |
| ## Available Information | |
| - Target Task: The specific task accomplished | |
| - Recent Steps: Latest agent actions | |
| - Progress State: JSON representation of task progress | |
| - Final Step: Last action before execution concludes | |
| - Stop Reason: Reason for stopping ("Normal Ending" if complete) | |
| ## Guidelines | |
| 1. Deliver well-formatted output per task instructions | |
| 2. Generate Python dictionary with 'output' and 'log' fields | |
| 3. For incomplete tasks: empty output string with detailed log explanations | |
| 4. Record partial information in logs for future reference | |
| ## Output Examples | |
| Success: {"output": "Found 5 Geoffrey Hinton mentions", "log": "Task completed..."} | |
| Failure: {"output": "", "log": "Incomplete due to max steps exceeded..."} | |
| """ | |
| class FilePromptBuilder: | |
| """Type-safe prompt builder for file operations""" | |
| def __init__(self, config: FilePromptConfig): | |
| self.config = config | |
| self._templates = { | |
| PromptType.PLAN: PLAN_SYSTEM_TEMPLATE, | |
| PromptType.ACTION: ACTION_SYSTEM_TEMPLATE, | |
| PromptType.END: END_SYSTEM_TEMPLATE | |
| } | |
| def build_plan_prompt( | |
| self, | |
| task: str, | |
| recent_steps: str, | |
| progress_state: Dict[str, Any], | |
| file_metadata: List[Dict[str, Any]], | |
| textual_content: str, | |
| visual_content: Optional[List[str]] = None, | |
| image_suffix: Optional[List[str]] = None | |
| ) -> List[Dict[str, Any]]: | |
| """Build planning prompt""" | |
| user_content = self._build_user_content( | |
| task=task, | |
| recent_steps=recent_steps, | |
| progress_state=progress_state, | |
| file_metadata=file_metadata, | |
| textual_content=textual_content, | |
| prompt_type=PromptType.PLAN | |
| ) | |
| return self._create_message_pair( | |
| PromptType.PLAN, | |
| user_content, | |
| visual_content, | |
| image_suffix | |
| ) | |
| def build_action_prompt( | |
| self, | |
| task: str, | |
| recent_steps: str, | |
| progress_state: Dict[str, Any], | |
| file_metadata: List[Dict[str, Any]], | |
| textual_content: str, | |
| visual_content: Optional[List[str]] = None, | |
| image_suffix: Optional[List[str]] = None | |
| ) -> List[Dict[str, Any]]: | |
| """Build action prompt""" | |
| user_content = self._build_user_content( | |
| task=task, | |
| recent_steps=recent_steps, | |
| progress_state=progress_state, | |
| file_metadata=file_metadata, | |
| textual_content=textual_content, | |
| prompt_type=PromptType.ACTION | |
| ) | |
| return self._create_message_pair( | |
| PromptType.ACTION, | |
| user_content, | |
| visual_content, | |
| image_suffix | |
| ) | |
| def build_end_prompt( | |
| self, | |
| task: str, | |
| recent_steps: str, | |
| progress_state: Dict[str, Any], | |
| textual_content: str, | |
| current_step: str, | |
| stop_reason: str | |
| ) -> List[Dict[str, Any]]: | |
| """Build end prompt""" | |
| user_content = self._build_end_user_content( | |
| task=task, | |
| recent_steps=recent_steps, | |
| progress_state=progress_state, | |
| textual_content=textual_content, | |
| current_step=current_step, | |
| stop_reason=stop_reason | |
| ) | |
| return self._create_message_pair(PromptType.END, user_content) | |
| def _build_user_content( | |
| self, | |
| task: str, | |
| recent_steps: str, | |
| progress_state: Dict[str, Any], | |
| file_metadata: List[Dict[str, Any]], | |
| textual_content: str, | |
| prompt_type: PromptType | |
| ) -> str: | |
| """Build user content for plan/action prompts""" | |
| sections = [ | |
| f"## Target Task\n{task}\n", | |
| f"## Recent Steps\n{recent_steps}\n", | |
| f"## Progress State\n{progress_state}\n", | |
| f"## File Metadata\n{file_metadata}\n", | |
| f"## Current Content\n{textual_content}\n", | |
| f"## Target Task (Repeated)\n{task}\n" | |
| ] | |
| if prompt_type == PromptType.PLAN: | |
| sections.append(self._get_plan_output_format()) | |
| elif prompt_type == PromptType.ACTION: | |
| sections.append(self._get_action_output_format()) | |
| return "\n".join(sections) | |
| def _build_end_user_content( | |
| self, | |
| task: str, | |
| recent_steps: str, | |
| progress_state: Dict[str, Any], | |
| textual_content: str, | |
| current_step: str, | |
| stop_reason: str | |
| ) -> str: | |
| """Build user content for end prompt""" | |
| sections = [ | |
| f"## Target Task\n{task}\n", | |
| f"## Recent Steps\n{recent_steps}\n", | |
| f"## Progress State\n{progress_state}\n", | |
| f"## Current Content\n{textual_content}\n", | |
| f"## Final Step\n{current_step}\n", | |
| f"## Stop Reason\n{stop_reason}\n", | |
| f"## Target Task (Repeated)\n{task}\n", | |
| self._get_end_output_format() | |
| ] | |
| return "\n".join(sections) | |
| def _create_message_pair( | |
| self, | |
| prompt_type: PromptType, | |
| user_content: str, | |
| visual_content: Optional[List[str]] = None, | |
| image_suffix: Optional[List[str]] = None | |
| ) -> List[Dict[str, Any]]: | |
| """Create system/user message pair""" | |
| system_template = self._replace_template_vars(self._templates[prompt_type]) | |
| messages = [ | |
| {"role": "system", "content": system_template}, | |
| {"role": "user", "content": user_content} | |
| ] | |
| # Add visual content if provided | |
| if visual_content: | |
| messages[1]["content"] = self._add_visual_content( | |
| user_content, visual_content, image_suffix | |
| ) | |
| return messages | |
| def _replace_template_vars(self, template: str) -> str: | |
| """Replace template variables with config values""" | |
| return template.replace( | |
| "MAX_FILE_READ_TOKENS", str(self.config.max_file_read_tokens) | |
| ).replace( | |
| "MAX_FILE_SCREENSHOT", str(self.config.max_file_screenshots) | |
| ) | |
| def _add_visual_content( | |
| self, | |
| text_content: str, | |
| visual_content: List[str], | |
| image_suffix: Optional[List[str]] = None | |
| ) -> List[Dict[str, Any]]: | |
| """Add visual content to message""" | |
| if not image_suffix: | |
| image_suffix = ["png"] * len(visual_content) | |
| elif len(image_suffix) < len(visual_content): | |
| image_suffix.extend(["png"] * (len(visual_content) - len(image_suffix))) | |
| content_parts = [ | |
| {"type": "text", "text": text_content + "\n\n## Screenshot of current pages"} | |
| ] | |
| for suffix, img_data in zip(image_suffix, visual_content): | |
| content_parts.append({ | |
| "type": "image_url", | |
| "image_url": {"url": f"data:image/{suffix};base64,{img_data}"} | |
| }) | |
| return content_parts | |
| def _get_plan_output_format(self) -> str: | |
| """Get output format for plan prompts""" | |
| return """## Output | |
| Please generate your response in this format: | |
| Thought: {Explain your planning reasoning in one line. Review previous steps, describe new observations, explain your rationale.} | |
| Code: {Output Python dict of updated progress state. Wrap with "```python ```" marks.} | |
| """ | |
| def _get_action_output_format(self) -> str: | |
| """Get output format for action prompts""" | |
| return """## Output | |
| Please generate your response in this format: | |
| Thought: {Explain your action reasoning in one line. Review previous steps, describe new observations, explain your rationale.} | |
| Code: {Output Python code for next action. Issue ONLY ONE action. Wrap with "```python ```" marks.} | |
| """ | |
| def _get_end_output_format(self) -> str: | |
| """Get output format for end prompts""" | |
| return """## Output | |
| Please generate your response in this format: | |
| Thought: {Explain your reasoning for the final output in one line.} | |
| Code: {Output Python dict with final result. Wrap with "```python ```" marks.} | |
| """ | |
| def _get_base_template(self, prompt_type: PromptType) -> str: | |
| """Get base template for testing""" | |
| return self._templates[prompt_type] | |
| # Backward compatibility interface - clean migration path | |
| def create_prompt_builder( | |
| max_file_read_tokens: int = 4000, | |
| max_file_screenshots: int = 5 | |
| ) -> FilePromptBuilder: | |
| """Factory function for creating prompt builder""" | |
| config = FilePromptConfig( | |
| max_file_read_tokens=max_file_read_tokens, | |
| max_file_screenshots=max_file_screenshots | |
| ) | |
| return FilePromptBuilder(config) | |
| # Legacy function wrappers for backward compatibility | |
| def file_plan(**kwargs) -> List[Dict[str, Any]]: | |
| """Legacy wrapper for plan prompt generation""" | |
| builder = create_prompt_builder( | |
| max_file_read_tokens=kwargs.get('max_file_read_tokens', 4000), | |
| max_file_screenshots=kwargs.get('max_file_screenshots', 5) | |
| ) | |
| return builder.build_plan_prompt( | |
| task=kwargs['task'], | |
| recent_steps=kwargs['recent_steps_str'], | |
| progress_state=kwargs['state'], | |
| file_metadata=_format_legacy_metadata(kwargs), | |
| textual_content=kwargs['textual_content'], | |
| visual_content=kwargs.get('visual_content'), | |
| image_suffix=kwargs.get('image_suffix') | |
| ) | |
| def file_action(**kwargs) -> List[Dict[str, Any]]: | |
| """Legacy wrapper for action prompt generation""" | |
| builder = create_prompt_builder( | |
| max_file_read_tokens=kwargs.get('max_file_read_tokens', 4000), | |
| max_file_screenshots=kwargs.get('max_file_screenshots', 5) | |
| ) | |
| return builder.build_action_prompt( | |
| task=kwargs['task'], | |
| recent_steps=kwargs['recent_steps_str'], | |
| progress_state=kwargs['state'], | |
| file_metadata=_format_legacy_metadata(kwargs), | |
| textual_content=kwargs['textual_content'], | |
| visual_content=kwargs.get('visual_content'), | |
| image_suffix=kwargs.get('image_suffix') | |
| ) | |
| def file_end(**kwargs) -> List[Dict[str, Any]]: | |
| """Legacy wrapper for end prompt generation""" | |
| builder = create_prompt_builder() | |
| return builder.build_end_prompt( | |
| task=kwargs['task'], | |
| recent_steps=kwargs['recent_steps_str'], | |
| progress_state=kwargs['state'], | |
| textual_content=kwargs['textual_content'], | |
| current_step=kwargs['current_step_str'], | |
| stop_reason=kwargs['stop_reason'] | |
| ) | |
| def _format_legacy_metadata(kwargs: Dict[str, Any]) -> List[Dict[str, Any]]: | |
| """Format legacy metadata for new interface""" | |
| return [ | |
| { | |
| "loaded_files": kwargs.get('loaded_files', []), | |
| "file_meta_data": kwargs.get('file_meta_data', {}) | |
| } | |
| ] | |
| # Legacy PROMPTS dict for backward compatibility | |
| PROMPTS = { | |
| "file_plan": file_plan, | |
| "file_action": file_action, | |
| "file_end": file_end, | |
| } | |
| # Clean implementation complete - all legacy code removed | |