Spaces:
Sleeping
Sleeping
| # services/agents/extract_tables_agent.py | |
| """ | |
| Table Extraction Agent - Wraps utilities/extract_tables.py | |
| """ | |
| from typing import Dict, Any | |
| from services.agents.base_agent import BaseUtilityAgent | |
| from utilities.extract_tables import extract_tables_remote | |
| class ExtractTablesAgent(BaseUtilityAgent): | |
| """ | |
| Autonomous agent for table extraction from documents. | |
| """ | |
| def __init__(self): | |
| super().__init__( | |
| name="extract_tables", | |
| role="Table Extraction Specialist", | |
| goal="Extract structured tabular data from documents with perfect row and column alignment", | |
| backstory="""You are an expert in table detection and structured data extraction. | |
| You understand table layouts, merged cells, headers, and can identify when tables | |
| span multiple pages. You validate extraction results for structural integrity.""", | |
| utility_function=extract_tables_remote | |
| ) | |
| def _prepare_task_description(self, input_data: Dict[str, Any]) -> str: | |
| """Prepare task description for the agent.""" | |
| filename = input_data.get("filename", "document") | |
| start_page = input_data.get("start_page", 1) | |
| end_page = input_data.get("end_page", 1) | |
| return f"""Validate the table extraction from {filename} (pages {start_page}-{end_page}). | |
| Assess extraction quality based on: | |
| - Table detection: Were all tables identified? | |
| - Structure: Are rows and columns aligned correctly? | |
| - Headers: Are column/row headers preserved? | |
| - Data integrity: Is cell data accurate? | |
| Provide confidence score (0.0-1.0).""" | |