Txtfiletoexcel / app.py
akazmi's picture
Update app.py
1cf11ab verified
import gradio as gr
import io
import re
from datetime import datetime
from openpyxl import Workbook
from openpyxl.styles import Font, Alignment, PatternFill
# ----------------------------
# Helper functions
# ----------------------------
def extract_periods_from_header(content: str):
lines = content.split('\n')
for i, line in enumerate(lines):
if 'YTD-Actual' in line or 'YTD-Budget' in line:
search_lines = [line]
if i + 1 < len(lines):
search_lines.append(lines[i + 1])
if i + 2 < len(lines):
search_lines.append(lines[i + 2])
for search_line in search_lines:
periods = re.findall(r'([A-Z][a-z]{2}-\d{2})', search_line)
if periods:
unique_periods = []
for p in periods:
if p not in unique_periods:
unique_periods.append(p)
if unique_periods:
return unique_periods
return ['Sep-25', 'Sep-24']
def parse_txt_file(content: str):
lines = content.split('\n')
data = []
periods = extract_periods_from_header(content)
current_period = periods[0] if len(periods) > 0 else 'Current'
prior_period = periods[1] if len(periods) > 1 else 'Prior'
data_started = False
for line in lines:
if not line.strip() or any(skip in line for skip in [
'PCL Primary Ledger', 'Profit & Loss', 'Current Period',
'Currency:', 'No specific', 'Page:', 'Date:']):
continue
# Detect start of data
if 'YTD-Actual' in line or 'YTD-Budget' in line or '------' in line \
or '======' in line or any(p in line for p in periods):
data_started = True
continue
if not data_started:
continue
# Clean the line
line_clean = line.strip()
if not line_clean:
continue
# Match account description (allow mixed cases, numbers, symbols)
# Updated regex — handles all rows including missing numeric columns
account_match = re.match(r'^([A-Za-z0-9\s/&().,-]+?)(?:\s{2,}|$)', line_clean)
if not account_match:
continue
account_name = account_match.group(1).strip()
# Extract numeric values including negatives and decimals
all_values = re.findall(r'(-?\d{1,3}(?:,\d{3})*(?:\.\d+)?|n/m)', line_clean[len(account_name):])
row = {'Account Description': account_name}
column_mapping = [
f'YTD Actual {current_period}',
f'% {current_period} (YTD)',
f'YTD Budget {current_period}',
f'% {current_period} (Budget)',
f'YTD Actual {prior_period}',
f'% {prior_period}',
'% Inc/Dec vs Budget',
'% Inc/Dec vs SPLY',
f'QTD Actual {current_period}',
f'QTD Budget {current_period}',
f'QTD Actual {prior_period}'
]
for idx, col_name in enumerate(column_mapping):
if idx < len(all_values):
row[col_name] = all_values[idx]
# Only add rows that have either a description or at least one value
if account_name or any(all_values):
data.append(row)
return data, periods
# ----------------------------
# Main conversion function
# ----------------------------
def convert_txt_to_excel(file_path):
try:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
content = f.read()
data, periods = parse_txt_file(content)
if not data:
return None
current_period = periods[0]
prior_period = periods[1] if len(periods) > 1 else 'Prior'
wb = Workbook()
ws = wb.active
ws.title = f"P&L {current_period}"
headers = [
'Account Description',
f'YTD Actual {current_period}',
f'% {current_period} (YTD)',
f'YTD Budget {current_period}',
f'% {current_period} (Budget)',
f'YTD Actual {prior_period}',
f'% {prior_period}',
'% Inc/Dec vs Budget',
'% Inc/Dec vs SPLY',
f'QTD Actual {current_period}',
f'QTD Budget {current_period}',
f'QTD Actual {prior_period}'
]
ws.append(headers)
# Header styling
header_fill = PatternFill(start_color='4472C4', end_color='4472C4', fill_type='solid')
header_font = Font(bold=True, color='FFFFFF', size=11)
for cell in ws[1]:
cell.fill = header_fill
cell.font = header_font
cell.alignment = Alignment(horizontal='center', vertical='center')
# Add data rows
for row_data in data:
row_values = []
for header in headers:
value = row_data.get(header, '')
if value and value != 'n/m':
try:
value = float(value.replace(',', ''))
except ValueError:
pass
row_values.append(value)
ws.append(row_values)
# Format columns
ws.column_dimensions['A'].width = 55
for col in 'BCDEFGHIJKL':
ws.column_dimensions[col].width = 18
for row_idx in range(2, ws.max_row + 1):
for col_idx in range(2, 13):
cell = ws.cell(row=row_idx, column=col_idx)
if isinstance(cell.value, (int, float)):
cell.number_format = '#,##0.00'
cell.alignment = Alignment(horizontal='right')
elif cell.value == 'n/m':
cell.alignment = Alignment(horizontal='center')
# Save to /tmp for Gradio
filename = f"PL_{current_period}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
temp_path = f"/tmp/{filename}"
wb.save(temp_path)
return temp_path
except Exception as e:
return None
# ----------------------------
# Gradio interface
# ----------------------------
iface = gr.Interface(
fn=convert_txt_to_excel,
inputs=gr.File(label="Upload P&L TXT File", type="filepath"),
outputs=gr.File(label="Download Excel File"),
title="TXT to Excel Converter",
description="Upload your P&L TXT file and download it as a formatted Excel file."
)
iface.launch()