from agency_swarm.tools import BaseTool from pydantic import Field import re from collections import defaultdict class DataValidationTool(BaseTool): """ This tool ensures that all data is accurate and compiles it into a cohesive final report. It validates numerical consistency and checks for valid date formats. """ raw_data: str = Field( ..., description="The raw data to be validated and compiled into the final report." ) expected_repetitions: dict = Field( default_factory=dict, description="A dictionary specifying numbers that are expected to repeat and their expected counts." ) def run(self): """ Validates the accuracy of the data and compiles it into a cohesive final report. """ # Validate numerical consistency numbers = re.findall(r'\b\d+\b', self.raw_data) number_counts = defaultdict(int) for number in numbers: number_counts[number] += 1 conflicting_numbers = [] for number, count in number_counts.items(): expected_count = self.expected_repetitions.get(number, 0) if expected_count == 0: # If no expected count is provided, assume the number should appear once expected_count = 1 if count != expected_count: conflicting_numbers.append(f"{number} (found {count}, expected {expected_count})") if conflicting_numbers: return f"Data validation failed: Conflicting numerical data found for numbers: {', '.join(conflicting_numbers)}." # Check for valid date formats (e.g., YYYY-MM-DD) dates = re.findall(r'\b\d{4}-\d{2}-\d{2}\b', self.raw_data) if not dates: return "Data validation failed: No valid date formats found." # Compile the validated data into a final report final_report = f"Final Report:\n\n{self.raw_data}" # Return the final report return final_report