Spaces:
Runtime error
Runtime error
| #!/usr/bin/env python3 | |
| """ | |
| Excel File Parser for SDR Status Tracker | |
| Reads Excel files (.xlsx) and converts them to the same row format | |
| that the Google Sheets API returns, enabling local file fallback | |
| when Google Sheet access is not available. | |
| Usage: | |
| # As a module | |
| from excel_parser import read_excel_file | |
| rows = read_excel_file("data/Case status_ February 2026.xlsx") | |
| # CLI for testing | |
| python excel_parser.py data/Case_status_February_2026.xlsx | |
| """ | |
| import os | |
| import sys | |
| from typing import List, Optional | |
| from openpyxl import load_workbook | |
| # Default tab name to look for | |
| DEFAULT_TAB_NAME = "DAILY - for SDR to add data🌟" | |
| def read_excel_file( | |
| file_path: str, | |
| tab_name: str = DEFAULT_TAB_NAME, | |
| max_col: str = "AI" | |
| ) -> List[List]: | |
| """ | |
| Read an Excel file and return rows as list of lists. | |
| Matches the format returned by Google Sheets API: | |
| - Returns list of rows, where each row is a list of cell values | |
| - Empty cells are represented as empty strings "" | |
| - Rows are trimmed to remove trailing empty cells (like Sheets API) | |
| Args: | |
| file_path: Path to the .xlsx file | |
| tab_name: Name of the worksheet tab to read | |
| max_col: Maximum column to read (e.g., "AI" = column 35) | |
| Returns: | |
| List of rows, each row is a list of cell values (strings) | |
| Raises: | |
| FileNotFoundError: If the Excel file doesn't exist | |
| ValueError: If the specified tab is not found | |
| """ | |
| if not os.path.exists(file_path): | |
| raise FileNotFoundError(f"Excel file not found: {file_path}") | |
| # Load workbook (data_only=True to get calculated values, not formulas) | |
| wb = load_workbook(file_path, data_only=True) | |
| # Find the tab | |
| if tab_name not in wb.sheetnames: | |
| # Try partial match | |
| matching_tabs = [name for name in wb.sheetnames if tab_name.lower() in name.lower()] | |
| if matching_tabs: | |
| tab_name = matching_tabs[0] | |
| print(f"Using matching tab: {tab_name}") | |
| else: | |
| available_tabs = ", ".join(wb.sheetnames) | |
| raise ValueError(f"Tab '{tab_name}' not found. Available tabs: {available_tabs}") | |
| ws = wb[tab_name] | |
| # Convert column letter to number (e.g., "AI" -> 35) | |
| max_col_num = column_letter_to_number(max_col) | |
| # Read all rows | |
| rows = [] | |
| for row in ws.iter_rows(min_row=1, max_col=max_col_num): | |
| row_values = [] | |
| for cell in row: | |
| # Convert cell value to string (matching Google Sheets behavior) | |
| if cell.value is None: | |
| row_values.append("") | |
| elif isinstance(cell.value, (int, float)): | |
| # Handle percentages (stored as decimals in Excel) | |
| if cell.number_format and '%' in cell.number_format: | |
| # Convert decimal to percentage integer (0.5 -> 50) | |
| row_values.append(str(int(cell.value * 100))) | |
| else: | |
| # Keep numbers as-is | |
| if cell.value == int(cell.value): | |
| row_values.append(str(int(cell.value))) | |
| else: | |
| row_values.append(str(cell.value)) | |
| else: | |
| row_values.append(str(cell.value)) | |
| # Trim trailing empty cells (like Sheets API does) | |
| while row_values and row_values[-1] == "": | |
| row_values.pop() | |
| rows.append(row_values) | |
| # Remove trailing empty rows | |
| while rows and not any(rows[-1]): | |
| rows.pop() | |
| wb.close() | |
| return rows | |
| def column_letter_to_number(col_letter: str) -> int: | |
| """Convert column letter(s) to 1-based number. E.g., 'A'->1, 'Z'->26, 'AI'->35""" | |
| result = 0 | |
| for char in col_letter.upper(): | |
| result = result * 26 + (ord(char) - ord('A') + 1) | |
| return result | |
| def get_excel_file_path(month_id: str, data_dir: str = "data") -> Optional[str]: | |
| """ | |
| Get the Excel file path for a given month. | |
| Tries several naming patterns: | |
| - data/Case status_ February 2026.xlsx | |
| - data/Case_status_February_2026.xlsx | |
| - data/2026-02.xlsx | |
| Args: | |
| month_id: Month ID in format "2026-02" | |
| data_dir: Directory where Excel files are stored | |
| Returns: | |
| Path to Excel file if found, None otherwise | |
| """ | |
| # Parse month_id to get month name and year | |
| try: | |
| year, month_num = month_id.split("-") | |
| month_names = { | |
| "01": "January", "02": "February", "03": "March", | |
| "04": "April", "05": "May", "06": "June", | |
| "07": "July", "08": "August", "09": "September", | |
| "10": "October", "11": "November", "12": "December" | |
| } | |
| month_name = month_names.get(month_num, "") | |
| except ValueError: | |
| month_name = "" | |
| year = "" | |
| # List of patterns to try | |
| patterns = [ | |
| f"Case status_ {month_name} {year}.xlsx", | |
| f"Case_status_{month_name}_{year}.xlsx", | |
| f"{month_id}.xlsx", | |
| f"Case status {month_name} {year}.xlsx", | |
| ] | |
| for pattern in patterns: | |
| path = os.path.join(data_dir, pattern) | |
| if os.path.exists(path): | |
| return path | |
| return None | |
| def list_available_excel_files(data_dir: str = "data") -> List[str]: | |
| """List all .xlsx files in the data directory.""" | |
| if not os.path.exists(data_dir): | |
| return [] | |
| return [f for f in os.listdir(data_dir) if f.endswith('.xlsx')] | |
| def main(): | |
| """CLI for testing Excel parsing.""" | |
| if len(sys.argv) < 2: | |
| print("Usage: python excel_parser.py <excel_file> [tab_name]") | |
| print("\nExample: python excel_parser.py data/Case_status_February_2026.xlsx") | |
| # List available files | |
| files = list_available_excel_files() | |
| if files: | |
| print(f"\nAvailable Excel files in data/:") | |
| for f in files: | |
| print(f" - {f}") | |
| return 1 | |
| file_path = sys.argv[1] | |
| tab_name = sys.argv[2] if len(sys.argv) > 2 else DEFAULT_TAB_NAME | |
| try: | |
| print(f"Reading: {file_path}") | |
| print(f"Tab: {tab_name}") | |
| print("-" * 50) | |
| rows = read_excel_file(file_path, tab_name) | |
| print(f"Total rows: {len(rows)}") | |
| print(f"Header row 1: {rows[0][:10] if rows else 'empty'}...") | |
| print(f"Header row 4: {rows[3][:10] if len(rows) > 3 else 'empty'}...") | |
| # Show a few data rows | |
| print("\nFirst data row (row 5):") | |
| if len(rows) > 4: | |
| print(f" {rows[4][:15]}...") | |
| # Count rows with data in column C (activity) | |
| activity_rows = sum(1 for r in rows[4:] if len(r) > 2 and r[2]) | |
| print(f"\nRows with activity data: {activity_rows}") | |
| return 0 | |
| except Exception as e: | |
| print(f"Error: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return 1 | |
| if __name__ == "__main__": | |
| exit(main()) | |