Spaces:
Runtime error
Runtime error
Yago Bolivar
feat: implement SpreadsheetTool for parsing and querying Excel files with detailed summaries
108e7a1
| #!/usr/bin/env python3 | |
| # Testing the spreadsheet tool with a downloaded Excel file | |
| import os | |
| import sys | |
| # Add the parent directory to sys.path to find the src module | |
| sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
| from src.spreadsheet_tool import SpreadsheetTool | |
| def main(): | |
| # Initialize the spreadsheet tool | |
| spreadsheet_tool = SpreadsheetTool() | |
| # Path to the downloaded Excel file | |
| # Need to navigate up one level and then to downloaded_files | |
| project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | |
| excel_file_path = os.path.join(project_root, "downloaded_files", "7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx") | |
| print(f"Testing SpreadsheetTool with file: {excel_file_path}") | |
| print(f"File exists: {os.path.exists(excel_file_path)}") | |
| # Parse the spreadsheet | |
| print("\n--- PARSING SPREADSHEET ---") | |
| parsed_data = spreadsheet_tool.parse_spreadsheet(excel_file_path) | |
| if parsed_data.get("error"): | |
| print(f"Error: {parsed_data['error']}") | |
| return | |
| # Display basic information about the spreadsheet | |
| print(f"\nSpreadsheet contains {len(parsed_data['sheet_names'])} sheets:") | |
| print(f"Sheet names: {parsed_data['sheet_names']}") | |
| # Display a summary of each sheet | |
| print("\n--- SHEET SUMMARIES ---") | |
| for sheet_name, info in parsed_data["summary"].items(): | |
| print(f"\nSheet: {sheet_name}") | |
| print(f" Dimensions: {info['shape'][0]} rows × {info['shape'][1]} columns") | |
| print(f" Column names: {info['columns']}") | |
| print(f" Numeric columns: {info['numeric_columns']}") | |
| print(f" Text columns: {info['text_columns']}") | |
| print(f" Contains null values: {info['has_nulls']}") | |
| # Display a sample of the first 3 rows | |
| print(f"\n Sample data (first 3 rows):") | |
| for i, row in enumerate(info['first_few_rows']): | |
| print(f" Row {i+1}: {row}") | |
| # Test the query_data method for numeric operations | |
| print("\n--- TESTING QUERY OPERATIONS ---") | |
| for query in ["sum", "average", "count"]: | |
| print(f"\nTesting '{query}' operation:") | |
| query_result = spreadsheet_tool.query_data(parsed_data, query) | |
| if query_result.get("error"): | |
| print(f" Error: {query_result['error']}") | |
| else: | |
| # Remove data_structure from output to keep it cleaner | |
| if "data_structure" in query_result: | |
| del query_result["data_structure"] | |
| print(f" Result: {query_result}") | |
| # Test extracting specific data | |
| print("\n--- TESTING DATA EXTRACTION ---") | |
| # We'll extract data from the first sheet | |
| first_sheet = parsed_data["sheet_names"][0] | |
| all_columns = parsed_data["summary"][first_sheet]["columns"] | |
| # Extract first two columns from the first sheet | |
| if len(all_columns) >= 2: | |
| extract_columns = all_columns[:2] | |
| print(f"\nExtracting columns {extract_columns} from sheet '{first_sheet}':") | |
| extract_result = spreadsheet_tool.extract_specific_data( | |
| parsed_data, | |
| sheet_name=first_sheet, | |
| column_names=extract_columns | |
| ) | |
| if extract_result.get("error"): | |
| print(f" Error: {extract_result['error']}") | |
| else: | |
| print(f" Extracted data shape: {extract_result['shape']}") | |
| print(f" First few rows:") | |
| for i, row in enumerate(extract_result['data'][:3]): | |
| print(f" Row {i+1}: {row}") | |
| if __name__ == "__main__": | |
| main() | |