Sahil Garg commited on
Commit
5fd0efa
·
1 Parent(s): 9542994

/pnl_from_notes working perfectly

Browse files
Files changed (4) hide show
  1. app/api.py +13 -6
  2. pnlbs/csv_json_pnl.py +33 -16
  3. pnlbs/pnl_note.py +82 -19
  4. pnlbs/sircodepnl.py +39 -10
app/api.py CHANGED
@@ -369,23 +369,30 @@ async def pnl_from_notes(file: UploadFile = File(...)):
369
  if os.getenv("OPENROUTER_API_KEY"):
370
  env["OPENROUTER_API_KEY"] = os.getenv("OPENROUTER_API_KEY")
371
  env["INPUT_FILE"] = "clean_financial_data_pnl.json"
372
- cwd = "C:/SAHIL/NOTES"
373
 
374
  # Run sircodepnl.py
375
  run_subprocess("pnlbs/sircodepnl.py", [input_excel_path], env, cwd)
376
- logger.info(f"Files in csv_notes_pnl/: {os.listdir('csv_notes_pnl') if os.path.exists('csv_notes_pnl') else 'csv_notes_pnl does not exist'}")
 
377
 
378
  # Run csv_json_pnl.py
379
  run_subprocess("pnlbs/csv_json_pnl.py", [], env, cwd)
380
- logger.info(f"clean_financial_data_pnl.json exists: {os.path.exists('clean_financial_data_pnl.json')}")
 
381
 
382
  # Run pnl_note.py
383
  result = run_subprocess("pnlbs/pnl_note.py", [], env, cwd)
384
  output_file = extract_output_file(result.stdout)
385
- if not output_file or not os.path.exists(output_file):
 
 
 
 
 
386
  debug_msg = f"\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}"
387
  logger.error(f"Could not determine output file from pnl_note.py output.{debug_msg}")
388
  raise HTTPException(status_code=500, detail=f"Could not determine output file from pnl_note.py output.{debug_msg}")
389
 
390
- logger.info(f"Pipeline completed. Output file: {output_file}")
391
- return {"message": "Profit and Loss statement generated successfully.", "file": output_file}
 
369
  if os.getenv("OPENROUTER_API_KEY"):
370
  env["OPENROUTER_API_KEY"] = os.getenv("OPENROUTER_API_KEY")
371
  env["INPUT_FILE"] = "clean_financial_data_pnl.json"
372
+ cwd = "C:/SAHIL/production_notes"
373
 
374
  # Run sircodepnl.py
375
  run_subprocess("pnlbs/sircodepnl.py", [input_excel_path], env, cwd)
376
+ csv_notes_pnl_path = os.path.join(cwd, 'csv_notes_pnl')
377
+ logger.info(f"Files in {csv_notes_pnl_path}/: {os.listdir(csv_notes_pnl_path) if os.path.exists(csv_notes_pnl_path) else f'{csv_notes_pnl_path} does not exist'}")
378
 
379
  # Run csv_json_pnl.py
380
  run_subprocess("pnlbs/csv_json_pnl.py", [], env, cwd)
381
+ json_path = os.path.join(cwd, 'clean_financial_data_pnl.json')
382
+ logger.info(f"clean_financial_data_pnl.json exists: {os.path.exists(json_path)}")
383
 
384
  # Run pnl_note.py
385
  result = run_subprocess("pnlbs/pnl_note.py", [], env, cwd)
386
  output_file = extract_output_file(result.stdout)
387
+ # If output_file is not absolute, resolve relative to cwd
388
+ if output_file and not os.path.isabs(output_file):
389
+ output_file_path = os.path.join(cwd, output_file)
390
+ else:
391
+ output_file_path = output_file
392
+ if not output_file or not os.path.exists(output_file_path):
393
  debug_msg = f"\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}"
394
  logger.error(f"Could not determine output file from pnl_note.py output.{debug_msg}")
395
  raise HTTPException(status_code=500, detail=f"Could not determine output file from pnl_note.py output.{debug_msg}")
396
 
397
+ logger.info(f"Pipeline completed. Output file: {output_file_path}")
398
+ return {"message": "Profit and Loss statement generated successfully.", "file": output_file_path}
pnlbs/csv_json_pnl.py CHANGED
@@ -41,7 +41,8 @@ class FinancialData(BaseModel):
41
 
42
  class FinancialCSVMapper:
43
  def __init__(self, csv_folder_path: str = settings.csv_folder_path):
44
- self.csv_folder_path = csv_folder_path
 
45
 
46
  def clean_value(self, value: Any) -> Optional[Union[float, int, str]]:
47
  """
@@ -284,16 +285,18 @@ class FinancialCSVMapper:
284
  }
285
 
286
  def process_all_csvs(self) -> Dict[str, Any]:
287
- """
288
- Process all CSV files and create meaningful financial JSON.
289
- Returns the structured financial data.
290
- """
291
  if not os.path.exists(self.csv_folder_path):
292
- logger.error(f"Folder {self.csv_folder_path} not found")
293
- return {"error": f"Folder {self.csv_folder_path} not found"}
294
-
 
 
 
295
  csv_files = [f for f in os.listdir(self.csv_folder_path) if f.endswith('.csv')]
296
-
297
  if not csv_files:
298
  logger.error(f"No CSV files found in {self.csv_folder_path}")
299
  return {"error": f"No CSV files found in {self.csv_folder_path}"}
@@ -345,16 +348,30 @@ class FinancialCSVMapper:
345
  Process all CSVs and save meaningful financial JSON.
346
  Returns the output file path.
347
  """
 
 
348
  financial_data = self.process_all_csvs()
349
-
350
- with open(output_path, 'w', encoding='utf-8') as f:
351
- json.dump(financial_data, f, indent=2, ensure_ascii=False, default=str)
352
-
353
- logger.info(f"Clean financial JSON created: {output_path}")
 
 
354
  return output_path
355
 
356
  # Usage
357
  if __name__ == "__main__":
358
- mapper = FinancialCSVMapper(settings.csv_folder_path)
359
- output_file = mapper.save_to_json(settings.output_json)
 
 
 
 
 
 
 
 
 
 
360
  logger.info(f"Clean financial JSON created: {output_file}")
 
41
 
42
  class FinancialCSVMapper:
43
  def __init__(self, csv_folder_path: str = settings.csv_folder_path):
44
+ # Always use absolute path for folder
45
+ self.csv_folder_path = os.path.abspath(csv_folder_path)
46
 
47
  def clean_value(self, value: Any) -> Optional[Union[float, int, str]]:
48
  """
 
285
  }
286
 
287
  def process_all_csvs(self) -> Dict[str, Any]:
288
+ logger.info(f"Current working directory: {os.getcwd()}")
289
+ logger.info(f"Looking for CSVs in: {self.csv_folder_path}")
290
+ # Ensure CSV folder exists
 
291
  if not os.path.exists(self.csv_folder_path):
292
+ try:
293
+ os.makedirs(self.csv_folder_path, exist_ok=True)
294
+ logger.info(f"Created missing CSV folder: {self.csv_folder_path}")
295
+ except Exception as e:
296
+ logger.error(f"Failed to create CSV folder '{self.csv_folder_path}': {e}")
297
+ return {"error": f"Failed to create CSV folder '{self.csv_folder_path}': {e}"}
298
  csv_files = [f for f in os.listdir(self.csv_folder_path) if f.endswith('.csv')]
299
+ logger.info(f"CSV files found: {csv_files}")
300
  if not csv_files:
301
  logger.error(f"No CSV files found in {self.csv_folder_path}")
302
  return {"error": f"No CSV files found in {self.csv_folder_path}"}
 
348
  Process all CSVs and save meaningful financial JSON.
349
  Returns the output file path.
350
  """
351
+ # Always use absolute path for output JSON
352
+ output_path = os.path.abspath(output_path)
353
  financial_data = self.process_all_csvs()
354
+ try:
355
+ with open(output_path, 'w', encoding='utf-8') as f:
356
+ json.dump(financial_data, f, indent=2, ensure_ascii=False, default=str)
357
+ logger.info(f"Clean financial JSON created: {output_path}")
358
+ except Exception as e:
359
+ logger.error(f"Failed to write JSON file '{output_path}': {e}")
360
+ raise
361
  return output_path
362
 
363
  # Usage
364
  if __name__ == "__main__":
365
+ import sys
366
+ logger.info(f"Current working directory: {os.getcwd()}")
367
+ csv_folder = settings.csv_folder_path
368
+ output_json = settings.output_json
369
+ if len(sys.argv) > 1:
370
+ csv_folder = sys.argv[1]
371
+ logger.info(f"CSV folder path from argument: {os.path.abspath(csv_folder)}")
372
+ if len(sys.argv) > 2:
373
+ output_json = sys.argv[2]
374
+ logger.info(f"Output JSON path from argument: {os.path.abspath(output_json)}")
375
+ mapper = FinancialCSVMapper(csv_folder)
376
+ output_file = mapper.save_to_json(output_json)
377
  logger.info(f"Clean financial JSON created: {output_file}")
pnlbs/pnl_note.py CHANGED
@@ -65,19 +65,68 @@ class PnLGenerator:
65
  logger.warning(f"{item_key} not found in data")
66
  return 0.0, 0.0
67
  item_data = self.financial_data[item_key]
68
- total_2024 = 0.0
69
- total_2023 = 0.0
70
- if isinstance(item_data, dict):
71
- for category, values in item_data.items():
72
- if isinstance(values, list) and len(values) >= 2:
73
- total_2024 += float(values[0] or 0)
74
- total_2023 += float(values[1] or 0)
75
- elif isinstance(values, (int, float)):
76
- total_2024 += float(values)
77
- elif isinstance(item_data, list) and len(item_data) >= 2:
78
- total_2024 = float(item_data[0] or 0)
79
- total_2023 = float(item_data[1] or 0)
80
- return total_2024, total_2023
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
  def get_revenue_data(self) -> Tuple[float, float]:
83
  """Extract revenue from operations data."""
@@ -285,6 +334,7 @@ class PnLGenerator:
285
  try:
286
  wb.save(output_file)
287
  logger.info(f"P&L Statement generated successfully: {output_file}")
 
288
  self.print_financial_summary(
289
  total_revenue_2024, total_revenue_2023,
290
  total_expenses_2024, total_expenses_2023,
@@ -298,6 +348,7 @@ class PnLGenerator:
298
  try:
299
  wb.save(fallback_file)
300
  logger.info(f"P&L Statement saved to: {fallback_file}")
 
301
  return True
302
  except Exception as e:
303
  logger.error(f"Failed to save: {str(e)}")
@@ -330,22 +381,34 @@ def main() -> None:
330
  """Main function to run the P&L generator."""
331
  logger.info("P&L STATEMENT GENERATOR FROM JSON")
332
  logger.info("=" * 50)
 
 
333
  json_file: Optional[str] = None
334
  for file in settings.json_files:
335
  if os.path.exists(file):
336
  json_file = file
 
337
  break
338
  if not json_file:
339
- json_file = input("Enter the path to your JSON file: ").strip()
 
 
 
 
340
  generator = PnLGenerator(json_file)
341
  if generator.load_financial_data():
342
  output_path = settings.output_file
 
 
 
343
  logger.info(f"Output file: {output_path}")
344
- if generator.generate_pnl_statement(output_path):
345
- logger.info("P&L STATEMENT GENERATION COMPLETED SUCCESSFULLY!")
346
- logger.info(f"Output file: {output_path}")
347
- else:
348
- logger.error("Failed to generate P&L statement")
 
 
349
  else:
350
  logger.error("Failed to load financial data")
351
 
 
65
  logger.warning(f"{item_key} not found in data")
66
  return 0.0, 0.0
67
  item_data = self.financial_data[item_key]
68
+ def recursive_sum(data):
69
+ sum_2024, sum_2023 = 0.0, 0.0
70
+ if isinstance(data, dict):
71
+ for k, v in data.items():
72
+ # Skip metadata
73
+ if k == "_metadata":
74
+ continue
75
+ s24, s23 = recursive_sum(v)
76
+ sum_2024 += s24
77
+ sum_2023 += s23
78
+ elif isinstance(data, list):
79
+ # If list contains only numbers, try to use first two as 2024/2023
80
+ nums = [x for x in data if isinstance(x, (int, float))]
81
+ if len(nums) >= 2:
82
+ sum_2024 += float(nums[0] or 0)
83
+ sum_2023 += float(nums[1] or 0)
84
+ elif len(nums) == 1:
85
+ sum_2024 += float(nums[0] or 0)
86
+ # Otherwise, skip non-numeric entries
87
+ elif isinstance(data, (int, float)):
88
+ sum_2024 += float(data)
89
+ elif isinstance(data, str):
90
+ # Try to parse as float
91
+ try:
92
+ val = float(data)
93
+ sum_2024 += val
94
+ except Exception:
95
+ pass
96
+ return sum_2024, sum_2023
97
+
98
+ # Special handling for date-based dicts
99
+ def sum_dates(data):
100
+ sum_2024, sum_2023 = 0.0, 0.0
101
+ if isinstance(data, dict):
102
+ for k, v in data.items():
103
+ if k == "_metadata":
104
+ continue
105
+ if isinstance(v, dict):
106
+ # If keys look like dates, sum by year
107
+ for date_key, val in v.items():
108
+ if "2024" in date_key:
109
+ try:
110
+ sum_2024 += float(val)
111
+ except Exception:
112
+ pass
113
+ elif "2023" in date_key:
114
+ try:
115
+ sum_2023 += float(val)
116
+ except Exception:
117
+ pass
118
+ else:
119
+ s24, s23 = recursive_sum(v)
120
+ sum_2024 += s24
121
+ sum_2023 += s23
122
+ return sum_2024, sum_2023
123
+
124
+ # Try date-based sum first, fallback to recursive
125
+ s24, s23 = sum_dates(item_data)
126
+ if s24 == 0.0 and s23 == 0.0:
127
+ s24, s23 = recursive_sum(item_data)
128
+ logger.info(f"Extracted for {item_key}: 2024={s24}, 2023={s23}")
129
+ return s24, s23
130
 
131
  def get_revenue_data(self) -> Tuple[float, float]:
132
  """Extract revenue from operations data."""
 
334
  try:
335
  wb.save(output_file)
336
  logger.info(f"P&L Statement generated successfully: {output_file}")
337
+ print(f"Output file: {os.path.abspath(output_file)}") # For API subprocess parsing
338
  self.print_financial_summary(
339
  total_revenue_2024, total_revenue_2023,
340
  total_expenses_2024, total_expenses_2023,
 
348
  try:
349
  wb.save(fallback_file)
350
  logger.info(f"P&L Statement saved to: {fallback_file}")
351
+ print(f"Output file: {os.path.abspath(fallback_file)}") # For API subprocess parsing
352
  return True
353
  except Exception as e:
354
  logger.error(f"Failed to save: {str(e)}")
 
381
  """Main function to run the P&L generator."""
382
  logger.info("P&L STATEMENT GENERATOR FROM JSON")
383
  logger.info("=" * 50)
384
+ import sys
385
+ logger.info(f"Current working directory: {os.getcwd()}")
386
  json_file: Optional[str] = None
387
  for file in settings.json_files:
388
  if os.path.exists(file):
389
  json_file = file
390
+ logger.info(f"Found input JSON file: {json_file}")
391
  break
392
  if not json_file:
393
+ if len(sys.argv) > 1:
394
+ json_file = sys.argv[1]
395
+ logger.info(f"Input JSON file from argument: {json_file}")
396
+ else:
397
+ json_file = input("Enter the path to your JSON file: ").strip()
398
  generator = PnLGenerator(json_file)
399
  if generator.load_financial_data():
400
  output_path = settings.output_file
401
+ if len(sys.argv) > 2:
402
+ output_path = sys.argv[2]
403
+ logger.info(f"Output Excel path from argument: {output_path}")
404
  logger.info(f"Output file: {output_path}")
405
+ try:
406
+ if generator.generate_pnl_statement(output_path):
407
+ logger.info(f"P&L Statement generated successfully: {os.path.abspath(output_path)}")
408
+ else:
409
+ logger.error("Failed to generate P&L statement.")
410
+ except Exception as e:
411
+ logger.error(f"Error writing Excel file: {e}")
412
  else:
413
  logger.error("Failed to load financial data")
414
 
pnlbs/sircodepnl.py CHANGED
@@ -18,16 +18,25 @@ class Settings(BaseSettings):
18
 
19
  settings = Settings()
20
 
 
 
 
 
 
 
 
 
 
 
21
  class NoteCSVInfo(BaseModel):
22
  name: str
23
  rows: int
24
 
25
- def clean_note(sheet_name: str, skiprows: int = settings.skiprows) -> pd.DataFrame:
26
  """
27
  Parse and clean a sheet from the Excel file.
28
  Drops empty rows and columns, resets index.
29
  """
30
- xls = pd.ExcelFile(settings.excel_file_path)
31
  df = xls.parse(sheet_name, skiprows=skiprows)
32
  df = df.dropna(how='all').dropna(axis=1, how='all').reset_index(drop=True)
33
  return df
@@ -36,20 +45,40 @@ def export_note_to_csv(df: pd.DataFrame, filename: str, output_folder: str) -> N
36
  """
37
  Export DataFrame to CSV and return info.
38
  """
39
- # Ensure output folder exists
40
- os.makedirs(output_folder, exist_ok=True)
41
- output_path = os.path.join(output_folder, filename)
 
 
 
 
 
 
42
  df.to_csv(output_path, index=False)
 
43
  return NoteCSVInfo(name=filename, rows=df.shape[0])
44
 
45
  def main() -> None:
46
  """
47
  Main function to extract P&L notes from Excel and export as CSV.
48
  """
49
- logger.info("Loading Excel file: %s", settings.excel_file_path)
50
- note_16_23_df = clean_note(settings.note_16_23_sheet, settings.skiprows)
51
-
52
- os.makedirs(settings.output_folder, exist_ok=True)
 
 
 
 
 
 
 
 
 
53
  info_16_23 = export_note_to_csv(note_16_23_df, "Note_16_to_23_Full.csv", settings.output_folder)
54
-
55
  logger.info(f"Extracted rows: Note 16-23 = {info_16_23.rows} rows")
 
 
 
 
 
 
18
 
19
  settings = Settings()
20
 
21
+ def get_xls(excel_file_path: str) -> pd.ExcelFile:
22
+ try:
23
+ xls = pd.ExcelFile(excel_file_path)
24
+ logger.info(f"Loaded Excel file: {excel_file_path}")
25
+ logger.info(f"Available sheets: {xls.sheet_names}")
26
+ return xls
27
+ except Exception as e:
28
+ logger.error(f"Failed to load Excel file '{excel_file_path}': {e}")
29
+ raise
30
+
31
  class NoteCSVInfo(BaseModel):
32
  name: str
33
  rows: int
34
 
35
+ def clean_note(xls, sheet_name: str, skiprows: int = settings.skiprows) -> pd.DataFrame:
36
  """
37
  Parse and clean a sheet from the Excel file.
38
  Drops empty rows and columns, resets index.
39
  """
 
40
  df = xls.parse(sheet_name, skiprows=skiprows)
41
  df = df.dropna(how='all').dropna(axis=1, how='all').reset_index(drop=True)
42
  return df
 
45
  """
46
  Export DataFrame to CSV and return info.
47
  """
48
+ # Always use absolute path for output folder
49
+ abs_output_folder = os.path.abspath(output_folder)
50
+ try:
51
+ os.makedirs(abs_output_folder, exist_ok=True)
52
+ logger.info(f"Output folder ensured: {abs_output_folder}")
53
+ except Exception as e:
54
+ logger.error(f"Failed to create output folder '{abs_output_folder}': {e}")
55
+ raise
56
+ output_path = os.path.join(abs_output_folder, filename)
57
  df.to_csv(output_path, index=False)
58
+ logger.info(f"CSV file written to: {output_path}")
59
  return NoteCSVInfo(name=filename, rows=df.shape[0])
60
 
61
  def main() -> None:
62
  """
63
  Main function to extract P&L notes from Excel and export as CSV.
64
  """
65
+ import sys
66
+ logger.info(f"Current working directory: {os.getcwd()}")
67
+ excel_file_path = settings.excel_file_path
68
+ if len(sys.argv) > 1:
69
+ excel_file_path = sys.argv[1]
70
+ logger.info(f"Excel file path from argument: {excel_file_path}")
71
+ xls = get_xls(excel_file_path)
72
+ if settings.note_16_23_sheet not in xls.sheet_names:
73
+ logger.error(f"Sheet '{settings.note_16_23_sheet}' not found in Excel file. Available sheets: {xls.sheet_names}")
74
+ return
75
+ note_16_23_df = clean_note(xls, settings.note_16_23_sheet, settings.skiprows)
76
+ logger.info(f"Loaded DataFrame shape: {note_16_23_df.shape}")
77
+ logger.info(f"First few rows:\n{note_16_23_df.head()}\n")
78
  info_16_23 = export_note_to_csv(note_16_23_df, "Note_16_to_23_Full.csv", settings.output_folder)
 
79
  logger.info(f"Extracted rows: Note 16-23 = {info_16_23.rows} rows")
80
+ abs_output_folder = os.path.abspath(settings.output_folder)
81
+ logger.info(f"CSV output path: {os.path.join(abs_output_folder, 'Note_16_to_23_Full.csv')}")
82
+
83
+ if __name__ == "__main__":
84
+ main()