Spaces:
Running
Running
| import gradio as gr | |
| import io | |
| import re | |
| from datetime import datetime | |
| from openpyxl import Workbook | |
| from openpyxl.styles import Font, Alignment, PatternFill | |
| # ---------------------------- | |
| # Helper functions | |
| # ---------------------------- | |
| def extract_periods_from_header(content: str): | |
| lines = content.split('\n') | |
| for i, line in enumerate(lines): | |
| if 'YTD-Actual' in line or 'YTD-Budget' in line: | |
| search_lines = [line] | |
| if i + 1 < len(lines): | |
| search_lines.append(lines[i + 1]) | |
| if i + 2 < len(lines): | |
| search_lines.append(lines[i + 2]) | |
| for search_line in search_lines: | |
| periods = re.findall(r'([A-Z][a-z]{2}-\d{2})', search_line) | |
| if periods: | |
| unique_periods = [] | |
| for p in periods: | |
| if p not in unique_periods: | |
| unique_periods.append(p) | |
| if unique_periods: | |
| return unique_periods | |
| return ['Sep-25', 'Sep-24'] | |
| def parse_txt_file(content: str): | |
| lines = content.split('\n') | |
| data = [] | |
| periods = extract_periods_from_header(content) | |
| current_period = periods[0] if len(periods) > 0 else 'Current' | |
| prior_period = periods[1] if len(periods) > 1 else 'Prior' | |
| data_started = False | |
| for line in lines: | |
| if not line.strip() or any(skip in line for skip in [ | |
| 'PCL Primary Ledger', 'Profit & Loss', 'Current Period', | |
| 'Currency:', 'No specific', 'Page:', 'Date:']): | |
| continue | |
| # Detect start of data | |
| if 'YTD-Actual' in line or 'YTD-Budget' in line or '------' in line \ | |
| or '======' in line or any(p in line for p in periods): | |
| data_started = True | |
| continue | |
| if not data_started: | |
| continue | |
| # Clean the line | |
| line_clean = line.strip() | |
| if not line_clean: | |
| continue | |
| # Match account description (allow mixed cases, numbers, symbols) | |
| # Updated regex — handles all rows including missing numeric columns | |
| account_match = re.match(r'^([A-Za-z0-9\s/&().,-]+?)(?:\s{2,}|$)', line_clean) | |
| if not account_match: | |
| continue | |
| account_name = account_match.group(1).strip() | |
| # Extract numeric values including negatives and decimals | |
| all_values = re.findall(r'(-?\d{1,3}(?:,\d{3})*(?:\.\d+)?|n/m)', line_clean[len(account_name):]) | |
| row = {'Account Description': account_name} | |
| column_mapping = [ | |
| f'YTD Actual {current_period}', | |
| f'% {current_period} (YTD)', | |
| f'YTD Budget {current_period}', | |
| f'% {current_period} (Budget)', | |
| f'YTD Actual {prior_period}', | |
| f'% {prior_period}', | |
| '% Inc/Dec vs Budget', | |
| '% Inc/Dec vs SPLY', | |
| f'QTD Actual {current_period}', | |
| f'QTD Budget {current_period}', | |
| f'QTD Actual {prior_period}' | |
| ] | |
| for idx, col_name in enumerate(column_mapping): | |
| if idx < len(all_values): | |
| row[col_name] = all_values[idx] | |
| # Only add rows that have either a description or at least one value | |
| if account_name or any(all_values): | |
| data.append(row) | |
| return data, periods | |
| # ---------------------------- | |
| # Main conversion function | |
| # ---------------------------- | |
| def convert_txt_to_excel(file_path): | |
| try: | |
| with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: | |
| content = f.read() | |
| data, periods = parse_txt_file(content) | |
| if not data: | |
| return None | |
| current_period = periods[0] | |
| prior_period = periods[1] if len(periods) > 1 else 'Prior' | |
| wb = Workbook() | |
| ws = wb.active | |
| ws.title = f"P&L {current_period}" | |
| headers = [ | |
| 'Account Description', | |
| f'YTD Actual {current_period}', | |
| f'% {current_period} (YTD)', | |
| f'YTD Budget {current_period}', | |
| f'% {current_period} (Budget)', | |
| f'YTD Actual {prior_period}', | |
| f'% {prior_period}', | |
| '% Inc/Dec vs Budget', | |
| '% Inc/Dec vs SPLY', | |
| f'QTD Actual {current_period}', | |
| f'QTD Budget {current_period}', | |
| f'QTD Actual {prior_period}' | |
| ] | |
| ws.append(headers) | |
| # Header styling | |
| header_fill = PatternFill(start_color='4472C4', end_color='4472C4', fill_type='solid') | |
| header_font = Font(bold=True, color='FFFFFF', size=11) | |
| for cell in ws[1]: | |
| cell.fill = header_fill | |
| cell.font = header_font | |
| cell.alignment = Alignment(horizontal='center', vertical='center') | |
| # Add data rows | |
| for row_data in data: | |
| row_values = [] | |
| for header in headers: | |
| value = row_data.get(header, '') | |
| if value and value != 'n/m': | |
| try: | |
| value = float(value.replace(',', '')) | |
| except ValueError: | |
| pass | |
| row_values.append(value) | |
| ws.append(row_values) | |
| # Format columns | |
| ws.column_dimensions['A'].width = 55 | |
| for col in 'BCDEFGHIJKL': | |
| ws.column_dimensions[col].width = 18 | |
| for row_idx in range(2, ws.max_row + 1): | |
| for col_idx in range(2, 13): | |
| cell = ws.cell(row=row_idx, column=col_idx) | |
| if isinstance(cell.value, (int, float)): | |
| cell.number_format = '#,##0.00' | |
| cell.alignment = Alignment(horizontal='right') | |
| elif cell.value == 'n/m': | |
| cell.alignment = Alignment(horizontal='center') | |
| # Save to /tmp for Gradio | |
| filename = f"PL_{current_period}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx" | |
| temp_path = f"/tmp/{filename}" | |
| wb.save(temp_path) | |
| return temp_path | |
| except Exception as e: | |
| return None | |
| # ---------------------------- | |
| # Gradio interface | |
| # ---------------------------- | |
| iface = gr.Interface( | |
| fn=convert_txt_to_excel, | |
| inputs=gr.File(label="Upload P&L TXT File", type="filepath"), | |
| outputs=gr.File(label="Download Excel File"), | |
| title="TXT to Excel Converter", | |
| description="Upload your P&L TXT file and download it as a formatted Excel file." | |
| ) | |
| iface.launch() | |