Spaces:

akazmi
/

Txtfiletoexcel

Sleeping

App Files Files Community

akazmi commited on Nov 5, 2025

Commit

1cf11ab

verified ·

1 Parent(s): 3e87b5e

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -35

app.py CHANGED Viewed

@@ -29,6 +29,7 @@ def extract_periods_from_header(content: str):
                         return unique_periods
     return ['Sep-25', 'Sep-24']
 def parse_txt_file(content: str):
     lines = content.split('\n')
     data = []
@@ -38,32 +39,36 @@ def parse_txt_file(content: str):
     data_started = False
     for line in lines:
-        if not line.strip() or 'PCL Primary Ledger' in line or 'Profit & Loss' in line \
-           or 'Current Period' in line or 'Currency:' in line or 'No specific' in line \
-           or 'Page:' in line or 'Date:' in line:
             continue
         if 'YTD-Actual' in line or 'YTD-Budget' in line or '------' in line \
            or '======' in line or any(p in line for p in periods):
             data_started = True
             continue
         if not data_started:
             continue
-        line_check = line.replace('-', '').replace('=', '').strip()
-        if not line_check:
             continue
-        # ---- Account name and values ----
-        account_match = re.match(r'^([A-Z\s/(),.\-&]+?)\s{2,}(-?\d)', line)
-        if account_match:
-            account_name = account_match.group(1).strip()
-            numbers_line = line[len(account_match.group(1)):].strip()
-            all_values = re.findall(r'(-?\d{1,3}(?:,\d{3})*(?:\.\d+)?|n/m)', numbers_line)
-        else:
-            # NEW FIX ➜ include rows with no numeric values
-            account_name = line.strip()
-            all_values = []
-        # Always build a row, even if no numbers found
         row = {'Account Description': account_name}
         column_mapping = [
             f'YTD Actual {current_period}',
@@ -81,28 +86,28 @@ def parse_txt_file(content: str):
         for idx, col_name in enumerate(column_mapping):
             if idx < len(all_values):
                 row[col_name] = all_values[idx]
-            else:
-                row[col_name] = ''  # blank if missing numeric
-        data.append(row)
     return data, periods
 # ----------------------------
 # Main conversion function
 # ----------------------------
 def convert_txt_to_excel(file_path):
     try:
-        # Read the uploaded file using the file path
-        with open(file_path, 'r', encoding='utf-8') as f:
             content = f.read()
         data, periods = parse_txt_file(content)
         if not data:
             return None
-        current_period = periods[0] if len(periods) > 0 else 'Current'
         prior_period = periods[1] if len(periods) > 1 else 'Prior'
         wb = Workbook()
@@ -140,18 +145,15 @@ def convert_txt_to_excel(file_path):
                 value = row_data.get(header, '')
                 if value and value != 'n/m':
                     try:
-                        value_clean = value.replace(',', '')
-                        value = float(value_clean)
                     except ValueError:
                         pass
-                elif value == 'n/m':
-                    value = 'n/m'
                 row_values.append(value)
             ws.append(row_values)
-        # Column widths and number formatting
-        ws.column_dimensions['A'].width = 50
-        for col in ['B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L']:
             ws.column_dimensions[col].width = 18
         for row_idx in range(2, ws.max_row + 1):
@@ -163,16 +165,16 @@ def convert_txt_to_excel(file_path):
                 elif cell.value == 'n/m':
                     cell.alignment = Alignment(horizontal='center')
-        # Save to a temporary file for Gradio download
         filename = f"PL_{current_period}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
-        temp_file_path = f"/tmp/{filename}"
-        wb.save(temp_file_path)
-        return temp_file_path
     except Exception as e:
         return None
 # ----------------------------
 # Gradio interface
 # ----------------------------

                         return unique_periods
     return ['Sep-25', 'Sep-24']
 def parse_txt_file(content: str):
     lines = content.split('\n')
     data = []
     data_started = False
     for line in lines:
+        if not line.strip() or any(skip in line for skip in [
+            'PCL Primary Ledger', 'Profit & Loss', 'Current Period',
+            'Currency:', 'No specific', 'Page:', 'Date:']):
             continue
+        # Detect start of data
         if 'YTD-Actual' in line or 'YTD-Budget' in line or '------' in line \
            or '======' in line or any(p in line for p in periods):
             data_started = True
             continue
         if not data_started:
             continue
+        # Clean the line
+        line_clean = line.strip()
+        if not line_clean:
+            continue
+        # Match account description (allow mixed cases, numbers, symbols)
+        # Updated regex — handles all rows including missing numeric columns
+        account_match = re.match(r'^([A-Za-z0-9\s/&().,-]+?)(?:\s{2,}|$)', line_clean)
+        if not account_match:
             continue
+        account_name = account_match.group(1).strip()
+        # Extract numeric values including negatives and decimals
+        all_values = re.findall(r'(-?\d{1,3}(?:,\d{3})*(?:\.\d+)?|n/m)', line_clean[len(account_name):])
         row = {'Account Description': account_name}
         column_mapping = [
             f'YTD Actual {current_period}',
         for idx, col_name in enumerate(column_mapping):
             if idx < len(all_values):
                 row[col_name] = all_values[idx]
+        # Only add rows that have either a description or at least one value
+        if account_name or any(all_values):
+            data.append(row)
     return data, periods
 # ----------------------------
 # Main conversion function
 # ----------------------------
 def convert_txt_to_excel(file_path):
     try:
+        with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
             content = f.read()
         data, periods = parse_txt_file(content)
         if not data:
             return None
+        current_period = periods[0]
         prior_period = periods[1] if len(periods) > 1 else 'Prior'
         wb = Workbook()
                 value = row_data.get(header, '')
                 if value and value != 'n/m':
                     try:
+                        value = float(value.replace(',', ''))
                     except ValueError:
                         pass
                 row_values.append(value)
             ws.append(row_values)
+        # Format columns
+        ws.column_dimensions['A'].width = 55
+        for col in 'BCDEFGHIJKL':
             ws.column_dimensions[col].width = 18
         for row_idx in range(2, ws.max_row + 1):
                 elif cell.value == 'n/m':
                     cell.alignment = Alignment(horizontal='center')
+        # Save to /tmp for Gradio
         filename = f"PL_{current_period}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
+        temp_path = f"/tmp/{filename}"
+        wb.save(temp_path)
+        return temp_path
     except Exception as e:
         return None
 # ----------------------------
 # Gradio interface
 # ----------------------------