import gradio as gr import io import re from datetime import datetime from openpyxl import Workbook from openpyxl.styles import Font, Alignment, PatternFill # ---------------------------- # Helper functions # ---------------------------- def extract_periods_from_header(content: str): lines = content.split('\n') for i, line in enumerate(lines): if 'YTD-Actual' in line or 'YTD-Budget' in line: search_lines = [line] if i + 1 < len(lines): search_lines.append(lines[i + 1]) if i + 2 < len(lines): search_lines.append(lines[i + 2]) for search_line in search_lines: periods = re.findall(r'([A-Z][a-z]{2}-\d{2})', search_line) if periods: unique_periods = [] for p in periods: if p not in unique_periods: unique_periods.append(p) if unique_periods: return unique_periods return ['Sep-25', 'Sep-24'] def parse_txt_file(content: str): lines = content.split('\n') data = [] periods = extract_periods_from_header(content) current_period = periods[0] if len(periods) > 0 else 'Current' prior_period = periods[1] if len(periods) > 1 else 'Prior' data_started = False for line in lines: if not line.strip() or any(skip in line for skip in [ 'PCL Primary Ledger', 'Profit & Loss', 'Current Period', 'Currency:', 'No specific', 'Page:', 'Date:']): continue # Detect start of data if 'YTD-Actual' in line or 'YTD-Budget' in line or '------' in line \ or '======' in line or any(p in line for p in periods): data_started = True continue if not data_started: continue # Clean the line line_clean = line.strip() if not line_clean: continue # Match account description (allow mixed cases, numbers, symbols) # Updated regex — handles all rows including missing numeric columns account_match = re.match(r'^([A-Za-z0-9\s/&().,-]+?)(?:\s{2,}|$)', line_clean) if not account_match: continue account_name = account_match.group(1).strip() # Extract numeric values including negatives and decimals all_values = re.findall(r'(-?\d{1,3}(?:,\d{3})*(?:\.\d+)?|n/m)', line_clean[len(account_name):]) row = {'Account Description': account_name} column_mapping = [ f'YTD Actual {current_period}', f'% {current_period} (YTD)', f'YTD Budget {current_period}', f'% {current_period} (Budget)', f'YTD Actual {prior_period}', f'% {prior_period}', '% Inc/Dec vs Budget', '% Inc/Dec vs SPLY', f'QTD Actual {current_period}', f'QTD Budget {current_period}', f'QTD Actual {prior_period}' ] for idx, col_name in enumerate(column_mapping): if idx < len(all_values): row[col_name] = all_values[idx] # Only add rows that have either a description or at least one value if account_name or any(all_values): data.append(row) return data, periods # ---------------------------- # Main conversion function # ---------------------------- def convert_txt_to_excel(file_path): try: with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: content = f.read() data, periods = parse_txt_file(content) if not data: return None current_period = periods[0] prior_period = periods[1] if len(periods) > 1 else 'Prior' wb = Workbook() ws = wb.active ws.title = f"P&L {current_period}" headers = [ 'Account Description', f'YTD Actual {current_period}', f'% {current_period} (YTD)', f'YTD Budget {current_period}', f'% {current_period} (Budget)', f'YTD Actual {prior_period}', f'% {prior_period}', '% Inc/Dec vs Budget', '% Inc/Dec vs SPLY', f'QTD Actual {current_period}', f'QTD Budget {current_period}', f'QTD Actual {prior_period}' ] ws.append(headers) # Header styling header_fill = PatternFill(start_color='4472C4', end_color='4472C4', fill_type='solid') header_font = Font(bold=True, color='FFFFFF', size=11) for cell in ws[1]: cell.fill = header_fill cell.font = header_font cell.alignment = Alignment(horizontal='center', vertical='center') # Add data rows for row_data in data: row_values = [] for header in headers: value = row_data.get(header, '') if value and value != 'n/m': try: value = float(value.replace(',', '')) except ValueError: pass row_values.append(value) ws.append(row_values) # Format columns ws.column_dimensions['A'].width = 55 for col in 'BCDEFGHIJKL': ws.column_dimensions[col].width = 18 for row_idx in range(2, ws.max_row + 1): for col_idx in range(2, 13): cell = ws.cell(row=row_idx, column=col_idx) if isinstance(cell.value, (int, float)): cell.number_format = '#,##0.00' cell.alignment = Alignment(horizontal='right') elif cell.value == 'n/m': cell.alignment = Alignment(horizontal='center') # Save to /tmp for Gradio filename = f"PL_{current_period}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx" temp_path = f"/tmp/{filename}" wb.save(temp_path) return temp_path except Exception as e: return None # ---------------------------- # Gradio interface # ---------------------------- iface = gr.Interface( fn=convert_txt_to_excel, inputs=gr.File(label="Upload P&L TXT File", type="filepath"), outputs=gr.File(label="Download Excel File"), title="TXT to Excel Converter", description="Upload your P&L TXT file and download it as a formatted Excel file." ) iface.launch()