akazmi commited on
Commit
1cf11ab
·
verified ·
1 Parent(s): 3e87b5e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -35
app.py CHANGED
@@ -29,6 +29,7 @@ def extract_periods_from_header(content: str):
29
  return unique_periods
30
  return ['Sep-25', 'Sep-24']
31
 
 
32
  def parse_txt_file(content: str):
33
  lines = content.split('\n')
34
  data = []
@@ -38,32 +39,36 @@ def parse_txt_file(content: str):
38
  data_started = False
39
 
40
  for line in lines:
41
- if not line.strip() or 'PCL Primary Ledger' in line or 'Profit & Loss' in line \
42
- or 'Current Period' in line or 'Currency:' in line or 'No specific' in line \
43
- or 'Page:' in line or 'Date:' in line:
44
  continue
 
 
45
  if 'YTD-Actual' in line or 'YTD-Budget' in line or '------' in line \
46
  or '======' in line or any(p in line for p in periods):
47
  data_started = True
48
  continue
 
49
  if not data_started:
50
  continue
51
- line_check = line.replace('-', '').replace('=', '').strip()
52
- if not line_check:
 
 
 
 
 
 
 
 
53
  continue
54
 
55
- # ---- Account name and values ----
56
- account_match = re.match(r'^([A-Z\s/(),.\-&]+?)\s{2,}(-?\d)', line)
57
- if account_match:
58
- account_name = account_match.group(1).strip()
59
- numbers_line = line[len(account_match.group(1)):].strip()
60
- all_values = re.findall(r'(-?\d{1,3}(?:,\d{3})*(?:\.\d+)?|n/m)', numbers_line)
61
- else:
62
- # NEW FIX ➜ include rows with no numeric values
63
- account_name = line.strip()
64
- all_values = []
65
-
66
- # Always build a row, even if no numbers found
67
  row = {'Account Description': account_name}
68
  column_mapping = [
69
  f'YTD Actual {current_period}',
@@ -81,28 +86,28 @@ def parse_txt_file(content: str):
81
  for idx, col_name in enumerate(column_mapping):
82
  if idx < len(all_values):
83
  row[col_name] = all_values[idx]
84
- else:
85
- row[col_name] = '' # blank if missing numeric
86
 
87
- data.append(row)
 
 
88
 
89
  return data, periods
90
 
 
91
  # ----------------------------
92
  # Main conversion function
93
  # ----------------------------
94
 
95
  def convert_txt_to_excel(file_path):
96
  try:
97
- # Read the uploaded file using the file path
98
- with open(file_path, 'r', encoding='utf-8') as f:
99
  content = f.read()
100
 
101
  data, periods = parse_txt_file(content)
102
  if not data:
103
  return None
104
 
105
- current_period = periods[0] if len(periods) > 0 else 'Current'
106
  prior_period = periods[1] if len(periods) > 1 else 'Prior'
107
 
108
  wb = Workbook()
@@ -140,18 +145,15 @@ def convert_txt_to_excel(file_path):
140
  value = row_data.get(header, '')
141
  if value and value != 'n/m':
142
  try:
143
- value_clean = value.replace(',', '')
144
- value = float(value_clean)
145
  except ValueError:
146
  pass
147
- elif value == 'n/m':
148
- value = 'n/m'
149
  row_values.append(value)
150
  ws.append(row_values)
151
 
152
- # Column widths and number formatting
153
- ws.column_dimensions['A'].width = 50
154
- for col in ['B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L']:
155
  ws.column_dimensions[col].width = 18
156
 
157
  for row_idx in range(2, ws.max_row + 1):
@@ -163,16 +165,16 @@ def convert_txt_to_excel(file_path):
163
  elif cell.value == 'n/m':
164
  cell.alignment = Alignment(horizontal='center')
165
 
166
- # Save to a temporary file for Gradio download
167
  filename = f"PL_{current_period}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
168
- temp_file_path = f"/tmp/{filename}"
169
- wb.save(temp_file_path)
170
-
171
- return temp_file_path
172
 
173
  except Exception as e:
174
  return None
175
 
 
176
  # ----------------------------
177
  # Gradio interface
178
  # ----------------------------
 
29
  return unique_periods
30
  return ['Sep-25', 'Sep-24']
31
 
32
+
33
  def parse_txt_file(content: str):
34
  lines = content.split('\n')
35
  data = []
 
39
  data_started = False
40
 
41
  for line in lines:
42
+ if not line.strip() or any(skip in line for skip in [
43
+ 'PCL Primary Ledger', 'Profit & Loss', 'Current Period',
44
+ 'Currency:', 'No specific', 'Page:', 'Date:']):
45
  continue
46
+
47
+ # Detect start of data
48
  if 'YTD-Actual' in line or 'YTD-Budget' in line or '------' in line \
49
  or '======' in line or any(p in line for p in periods):
50
  data_started = True
51
  continue
52
+
53
  if not data_started:
54
  continue
55
+
56
+ # Clean the line
57
+ line_clean = line.strip()
58
+ if not line_clean:
59
+ continue
60
+
61
+ # Match account description (allow mixed cases, numbers, symbols)
62
+ # Updated regex — handles all rows including missing numeric columns
63
+ account_match = re.match(r'^([A-Za-z0-9\s/&().,-]+?)(?:\s{2,}|$)', line_clean)
64
+ if not account_match:
65
  continue
66
 
67
+ account_name = account_match.group(1).strip()
68
+
69
+ # Extract numeric values including negatives and decimals
70
+ all_values = re.findall(r'(-?\d{1,3}(?:,\d{3})*(?:\.\d+)?|n/m)', line_clean[len(account_name):])
71
+
 
 
 
 
 
 
 
72
  row = {'Account Description': account_name}
73
  column_mapping = [
74
  f'YTD Actual {current_period}',
 
86
  for idx, col_name in enumerate(column_mapping):
87
  if idx < len(all_values):
88
  row[col_name] = all_values[idx]
 
 
89
 
90
+ # Only add rows that have either a description or at least one value
91
+ if account_name or any(all_values):
92
+ data.append(row)
93
 
94
  return data, periods
95
 
96
+
97
  # ----------------------------
98
  # Main conversion function
99
  # ----------------------------
100
 
101
  def convert_txt_to_excel(file_path):
102
  try:
103
+ with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
 
104
  content = f.read()
105
 
106
  data, periods = parse_txt_file(content)
107
  if not data:
108
  return None
109
 
110
+ current_period = periods[0]
111
  prior_period = periods[1] if len(periods) > 1 else 'Prior'
112
 
113
  wb = Workbook()
 
145
  value = row_data.get(header, '')
146
  if value and value != 'n/m':
147
  try:
148
+ value = float(value.replace(',', ''))
 
149
  except ValueError:
150
  pass
 
 
151
  row_values.append(value)
152
  ws.append(row_values)
153
 
154
+ # Format columns
155
+ ws.column_dimensions['A'].width = 55
156
+ for col in 'BCDEFGHIJKL':
157
  ws.column_dimensions[col].width = 18
158
 
159
  for row_idx in range(2, ws.max_row + 1):
 
165
  elif cell.value == 'n/m':
166
  cell.alignment = Alignment(horizontal='center')
167
 
168
+ # Save to /tmp for Gradio
169
  filename = f"PL_{current_period}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
170
+ temp_path = f"/tmp/{filename}"
171
+ wb.save(temp_path)
172
+ return temp_path
 
173
 
174
  except Exception as e:
175
  return None
176
 
177
+
178
  # ----------------------------
179
  # Gradio interface
180
  # ----------------------------