Spaces:
Runtime error
Runtime error
File size: 6,861 Bytes
d5e4667 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 | #!/usr/bin/env python3
"""
Excel File Parser for SDR Status Tracker
Reads Excel files (.xlsx) and converts them to the same row format
that the Google Sheets API returns, enabling local file fallback
when Google Sheet access is not available.
Usage:
# As a module
from excel_parser import read_excel_file
rows = read_excel_file("data/Case status_ February 2026.xlsx")
# CLI for testing
python excel_parser.py data/Case_status_February_2026.xlsx
"""
import os
import sys
from typing import List, Optional
from openpyxl import load_workbook
# Default tab name to look for
DEFAULT_TAB_NAME = "DAILY - for SDR to add data🌟"
def read_excel_file(
file_path: str,
tab_name: str = DEFAULT_TAB_NAME,
max_col: str = "AI"
) -> List[List]:
"""
Read an Excel file and return rows as list of lists.
Matches the format returned by Google Sheets API:
- Returns list of rows, where each row is a list of cell values
- Empty cells are represented as empty strings ""
- Rows are trimmed to remove trailing empty cells (like Sheets API)
Args:
file_path: Path to the .xlsx file
tab_name: Name of the worksheet tab to read
max_col: Maximum column to read (e.g., "AI" = column 35)
Returns:
List of rows, each row is a list of cell values (strings)
Raises:
FileNotFoundError: If the Excel file doesn't exist
ValueError: If the specified tab is not found
"""
if not os.path.exists(file_path):
raise FileNotFoundError(f"Excel file not found: {file_path}")
# Load workbook (data_only=True to get calculated values, not formulas)
wb = load_workbook(file_path, data_only=True)
# Find the tab
if tab_name not in wb.sheetnames:
# Try partial match
matching_tabs = [name for name in wb.sheetnames if tab_name.lower() in name.lower()]
if matching_tabs:
tab_name = matching_tabs[0]
print(f"Using matching tab: {tab_name}")
else:
available_tabs = ", ".join(wb.sheetnames)
raise ValueError(f"Tab '{tab_name}' not found. Available tabs: {available_tabs}")
ws = wb[tab_name]
# Convert column letter to number (e.g., "AI" -> 35)
max_col_num = column_letter_to_number(max_col)
# Read all rows
rows = []
for row in ws.iter_rows(min_row=1, max_col=max_col_num):
row_values = []
for cell in row:
# Convert cell value to string (matching Google Sheets behavior)
if cell.value is None:
row_values.append("")
elif isinstance(cell.value, (int, float)):
# Handle percentages (stored as decimals in Excel)
if cell.number_format and '%' in cell.number_format:
# Convert decimal to percentage integer (0.5 -> 50)
row_values.append(str(int(cell.value * 100)))
else:
# Keep numbers as-is
if cell.value == int(cell.value):
row_values.append(str(int(cell.value)))
else:
row_values.append(str(cell.value))
else:
row_values.append(str(cell.value))
# Trim trailing empty cells (like Sheets API does)
while row_values and row_values[-1] == "":
row_values.pop()
rows.append(row_values)
# Remove trailing empty rows
while rows and not any(rows[-1]):
rows.pop()
wb.close()
return rows
def column_letter_to_number(col_letter: str) -> int:
"""Convert column letter(s) to 1-based number. E.g., 'A'->1, 'Z'->26, 'AI'->35"""
result = 0
for char in col_letter.upper():
result = result * 26 + (ord(char) - ord('A') + 1)
return result
def get_excel_file_path(month_id: str, data_dir: str = "data") -> Optional[str]:
"""
Get the Excel file path for a given month.
Tries several naming patterns:
- data/Case status_ February 2026.xlsx
- data/Case_status_February_2026.xlsx
- data/2026-02.xlsx
Args:
month_id: Month ID in format "2026-02"
data_dir: Directory where Excel files are stored
Returns:
Path to Excel file if found, None otherwise
"""
# Parse month_id to get month name and year
try:
year, month_num = month_id.split("-")
month_names = {
"01": "January", "02": "February", "03": "March",
"04": "April", "05": "May", "06": "June",
"07": "July", "08": "August", "09": "September",
"10": "October", "11": "November", "12": "December"
}
month_name = month_names.get(month_num, "")
except ValueError:
month_name = ""
year = ""
# List of patterns to try
patterns = [
f"Case status_ {month_name} {year}.xlsx",
f"Case_status_{month_name}_{year}.xlsx",
f"{month_id}.xlsx",
f"Case status {month_name} {year}.xlsx",
]
for pattern in patterns:
path = os.path.join(data_dir, pattern)
if os.path.exists(path):
return path
return None
def list_available_excel_files(data_dir: str = "data") -> List[str]:
"""List all .xlsx files in the data directory."""
if not os.path.exists(data_dir):
return []
return [f for f in os.listdir(data_dir) if f.endswith('.xlsx')]
def main():
"""CLI for testing Excel parsing."""
if len(sys.argv) < 2:
print("Usage: python excel_parser.py <excel_file> [tab_name]")
print("\nExample: python excel_parser.py data/Case_status_February_2026.xlsx")
# List available files
files = list_available_excel_files()
if files:
print(f"\nAvailable Excel files in data/:")
for f in files:
print(f" - {f}")
return 1
file_path = sys.argv[1]
tab_name = sys.argv[2] if len(sys.argv) > 2 else DEFAULT_TAB_NAME
try:
print(f"Reading: {file_path}")
print(f"Tab: {tab_name}")
print("-" * 50)
rows = read_excel_file(file_path, tab_name)
print(f"Total rows: {len(rows)}")
print(f"Header row 1: {rows[0][:10] if rows else 'empty'}...")
print(f"Header row 4: {rows[3][:10] if len(rows) > 3 else 'empty'}...")
# Show a few data rows
print("\nFirst data row (row 5):")
if len(rows) > 4:
print(f" {rows[4][:15]}...")
# Count rows with data in column C (activity)
activity_rows = sum(1 for r in rows[4:] if len(r) > 2 and r[2])
print(f"\nRows with activity data: {activity_rows}")
return 0
except Exception as e:
print(f"Error: {e}")
import traceback
traceback.print_exc()
return 1
if __name__ == "__main__":
exit(main())
|