Spaces:
Sleeping
Sleeping
Sahil Garg
commited on
Commit
Β·
bc7f19f
1
Parent(s):
d8fe452
cashflow generation from notes
Browse files- .gitignore +2 -0
- app/api.py +52 -2
- cf/cf_generation.py +338 -0
- cf/cf_middlestep.py +484 -0
- cf/csv_json_cf.py +323 -0
- cf/sircodecf.py +89 -0
.gitignore
CHANGED
|
@@ -19,6 +19,8 @@ csv_notes_pnl/
|
|
| 19 |
csv_notes_bs/
|
| 20 |
clean_financial_data_bs.json
|
| 21 |
clean_financial_data_pnl.json
|
|
|
|
|
|
|
| 22 |
generated_notes*/
|
| 23 |
balancesheet_excel/
|
| 24 |
cashflow_excel/
|
|
|
|
| 19 |
csv_notes_bs/
|
| 20 |
clean_financial_data_bs.json
|
| 21 |
clean_financial_data_pnl.json
|
| 22 |
+
clean_financial_data_cfs.json
|
| 23 |
+
extracted_cfs_data.json
|
| 24 |
generated_notes*/
|
| 25 |
balancesheet_excel/
|
| 26 |
cashflow_excel/
|
app/api.py
CHANGED
|
@@ -361,8 +361,6 @@ async def bs_from_notes(file: UploadFile = File(...)):
|
|
| 361 |
)
|
| 362 |
|
| 363 |
|
| 364 |
-
|
| 365 |
-
|
| 366 |
@router.post("/pnl_from_notes")
|
| 367 |
async def pnl_from_notes(file: UploadFile = File(...)):
|
| 368 |
"""
|
|
@@ -405,6 +403,58 @@ async def pnl_from_notes(file: UploadFile = File(...)):
|
|
| 405 |
logger.error(f"Could not determine output file from pnl_note.py output.{debug_msg}")
|
| 406 |
raise HTTPException(status_code=500, detail=f"Could not determine output file from pnl_note.py output.{debug_msg}")
|
| 407 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 408 |
logger.info(f"Pipeline completed. Output file: {output_file_path}")
|
| 409 |
return FileResponse(
|
| 410 |
output_file_path,
|
|
|
|
| 361 |
)
|
| 362 |
|
| 363 |
|
|
|
|
|
|
|
| 364 |
@router.post("/pnl_from_notes")
|
| 365 |
async def pnl_from_notes(file: UploadFile = File(...)):
|
| 366 |
"""
|
|
|
|
| 403 |
logger.error(f"Could not determine output file from pnl_note.py output.{debug_msg}")
|
| 404 |
raise HTTPException(status_code=500, detail=f"Could not determine output file from pnl_note.py output.{debug_msg}")
|
| 405 |
|
| 406 |
+
logger.info(f"Pipeline completed. Output file: {output_file_path}")
|
| 407 |
+
return FileResponse(
|
| 408 |
+
output_file_path,
|
| 409 |
+
filename=os.path.basename(output_file_path),
|
| 410 |
+
media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
| 411 |
+
)
|
| 412 |
+
|
| 413 |
+
|
| 414 |
+
@router.post("/cf_from_notes")
|
| 415 |
+
async def cf_from_notes(file: UploadFile = File(...)):
|
| 416 |
+
"""
|
| 417 |
+
Accepts an Excel file, runs the full pipeline (sircodecf.py -> csv_json_cf.py -> cf_middlestep.py -> cf_generation.py),
|
| 418 |
+
and returns the path to the generated Cash Flow Excel file.
|
| 419 |
+
"""
|
| 420 |
+
os.makedirs("input", exist_ok=True)
|
| 421 |
+
input_excel_path = os.path.join("input", file.filename)
|
| 422 |
+
with open(input_excel_path, "wb") as buffer:
|
| 423 |
+
shutil.copyfileobj(file.file, buffer)
|
| 424 |
+
logger.info(f"Uploaded Excel saved to: {input_excel_path}")
|
| 425 |
+
logger.info(f"Files in input/: {os.listdir('input')}")
|
| 426 |
+
|
| 427 |
+
env = os.environ.copy()
|
| 428 |
+
cwd = os.getenv("PROJECT_ROOT", os.getcwd())
|
| 429 |
+
|
| 430 |
+
# Step 1: Run sircodecf.py
|
| 431 |
+
run_subprocess("cf/sircodecf.py", [input_excel_path], env, cwd)
|
| 432 |
+
csv_notes_cfs_path = os.path.join(cwd, 'csv_notes_cfs')
|
| 433 |
+
logger.info(f"Files in {csv_notes_cfs_path}/: {os.listdir(csv_notes_cfs_path) if os.path.exists(csv_notes_cfs_path) else f'{csv_notes_cfs_path} does not exist'}")
|
| 434 |
+
|
| 435 |
+
# Step 2: Run csv_json_cf.py
|
| 436 |
+
run_subprocess("cf/csv_json_cf.py", [], env, cwd)
|
| 437 |
+
json_path = os.path.join(cwd, 'clean_financial_data_cfs.json')
|
| 438 |
+
logger.info(f"clean_financial_data_cfs.json exists: {os.path.exists(json_path)}")
|
| 439 |
+
|
| 440 |
+
# Step 3: Run cf_middlestep.py
|
| 441 |
+
run_subprocess("cf/cf_middlestep.py", [], env, cwd)
|
| 442 |
+
extracted_json_path = os.path.join(cwd, 'extracted_cfs_data.json')
|
| 443 |
+
logger.info(f"extracted_cfs_data.json exists: {os.path.exists(extracted_json_path)}")
|
| 444 |
+
|
| 445 |
+
# Step 4: Run cf_generation.py
|
| 446 |
+
result = run_subprocess("cf/cf_generation.py", [], env, cwd)
|
| 447 |
+
# The output Excel file is typically named 'cash_flow_statement.xlsx' or similar
|
| 448 |
+
output_file = "cash_flow_statement.xlsx"
|
| 449 |
+
output_file_path = os.path.join(cwd, output_file)
|
| 450 |
+
if not os.path.exists(output_file_path):
|
| 451 |
+
# Try plural version if not found
|
| 452 |
+
output_file_path = os.path.join(cwd, "cash_flow_statements.xlsx")
|
| 453 |
+
if not os.path.exists(output_file_path):
|
| 454 |
+
debug_msg = f"\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}"
|
| 455 |
+
logger.error(f"Could not determine output file from cf_generation.py output.{debug_msg}")
|
| 456 |
+
raise HTTPException(status_code=500, detail=f"Could not determine output file from cf_generation.py output.{debug_msg}")
|
| 457 |
+
|
| 458 |
logger.info(f"Pipeline completed. Output file: {output_file_path}")
|
| 459 |
return FileResponse(
|
| 460 |
output_file_path,
|
cf/cf_generation.py
ADDED
|
@@ -0,0 +1,338 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import os
|
| 3 |
+
import logging
|
| 4 |
+
from typing import Optional, Dict, Any
|
| 5 |
+
from openpyxl import Workbook
|
| 6 |
+
from openpyxl.styles import Font, Alignment, Border, Side, PatternFill
|
| 7 |
+
|
| 8 |
+
logging.basicConfig(
|
| 9 |
+
level=logging.INFO,
|
| 10 |
+
format='%(asctime)s %(levelname)s %(name)s %(message)s',
|
| 11 |
+
)
|
| 12 |
+
logger = logging.getLogger("cf_generation")
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class CashFlowStatementGenerator:
|
| 16 |
+
"""
|
| 17 |
+
Generates a Cash Flow Statement Excel file from extracted financial data.
|
| 18 |
+
"""
|
| 19 |
+
|
| 20 |
+
def __init__(self, extracted_data_file: Optional[str] = None, extracted_data: Optional[Dict[str, Any]] = None):
|
| 21 |
+
"""
|
| 22 |
+
Initialize with extracted financial data.
|
| 23 |
+
Args:
|
| 24 |
+
extracted_data_file: Path to JSON file with extracted data.
|
| 25 |
+
extracted_data: Data dict (if already loaded).
|
| 26 |
+
Raises:
|
| 27 |
+
ValueError: If neither data file nor dict is provided.
|
| 28 |
+
"""
|
| 29 |
+
if extracted_data_file:
|
| 30 |
+
try:
|
| 31 |
+
with open(extracted_data_file, 'r') as f:
|
| 32 |
+
self.data = json.load(f)
|
| 33 |
+
logger.info(f"Loaded data from {extracted_data_file}")
|
| 34 |
+
except Exception as e:
|
| 35 |
+
logger.error(f"Failed to load data from {extracted_data_file}: {e}")
|
| 36 |
+
raise
|
| 37 |
+
elif extracted_data:
|
| 38 |
+
self.data = extracted_data
|
| 39 |
+
logger.info("Loaded data from provided dictionary.")
|
| 40 |
+
else:
|
| 41 |
+
logger.error("Either extracted_data_file or extracted_data must be provided.")
|
| 42 |
+
raise ValueError("Either extracted_data_file or extracted_data must be provided")
|
| 43 |
+
|
| 44 |
+
@staticmethod
|
| 45 |
+
def format_amount(amount: Any) -> float:
|
| 46 |
+
"""
|
| 47 |
+
Format amount for display - return numeric value, formatting handled by Excel.
|
| 48 |
+
Args:
|
| 49 |
+
amount: Value to format.
|
| 50 |
+
Returns:
|
| 51 |
+
float: Numeric value (0 if invalid).
|
| 52 |
+
"""
|
| 53 |
+
if amount is None or amount == '' or amount == '-':
|
| 54 |
+
return 0.0
|
| 55 |
+
try:
|
| 56 |
+
return float(amount)
|
| 57 |
+
except (ValueError, TypeError):
|
| 58 |
+
return 0.0
|
| 59 |
+
|
| 60 |
+
def generate_cash_flow_statement_xlsx(self, output_filename: Optional[str] = None) -> Dict[str, Any]:
|
| 61 |
+
"""
|
| 62 |
+
Generate the complete Cash Flow Statement in Excel format with openpyxl formatting.
|
| 63 |
+
Args:
|
| 64 |
+
output_filename: Output Excel file name (from env or default).
|
| 65 |
+
Returns:
|
| 66 |
+
dict: Summary and verification of generated statement.
|
| 67 |
+
"""
|
| 68 |
+
output_filename = output_filename or os.getenv("CFS_OUTPUT_FILE", "cash_flow_statements.xlsx")
|
| 69 |
+
try:
|
| 70 |
+
pl_data = self.data['profit_and_loss']
|
| 71 |
+
wc_data = self.data['working_capital']
|
| 72 |
+
inv_data = self.data['investing_activities']
|
| 73 |
+
fin_data = self.data['financing_activities']
|
| 74 |
+
cash_data = self.data['cash_and_equivalents']
|
| 75 |
+
except KeyError as e:
|
| 76 |
+
logger.error(f"Missing key in input data: {e}")
|
| 77 |
+
raise
|
| 78 |
+
|
| 79 |
+
# Calculations
|
| 80 |
+
pbt_current = self.format_amount(pl_data['profit_before_tax']['current'])
|
| 81 |
+
pbt_previous = self.format_amount(pl_data['profit_before_tax']['previous'])
|
| 82 |
+
dep_current = self.format_amount(pl_data['depreciation']['current'])
|
| 83 |
+
dep_previous = self.format_amount(pl_data['depreciation']['previous'])
|
| 84 |
+
int_inc_current = self.format_amount(pl_data['interest_income']['current'])
|
| 85 |
+
int_inc_previous = self.format_amount(pl_data['interest_income']['previous'])
|
| 86 |
+
op_profit_current = pbt_current + dep_current - int_inc_current
|
| 87 |
+
op_profit_previous = pbt_previous + dep_previous - int_inc_previous
|
| 88 |
+
tr_change = self.format_amount(wc_data['trade_receivables']['change'])
|
| 89 |
+
inv_change = self.format_amount(wc_data['inventories']['change'])
|
| 90 |
+
oca_change = self.format_amount(wc_data['other_current_assets']['change'])
|
| 91 |
+
stla_change = self.format_amount(wc_data['short_term_loans_advances']['change'])
|
| 92 |
+
cwip_change = 0.0
|
| 93 |
+
ltla_change = self.format_amount(wc_data['long_term_loans_advances']['change'])
|
| 94 |
+
stp_change = self.format_amount(wc_data['short_term_provisions']['change'])
|
| 95 |
+
tp_change = self.format_amount(wc_data['trade_payables']['change'])
|
| 96 |
+
ocl_change = self.format_amount(wc_data['other_current_liabilities']['change'])
|
| 97 |
+
total_wc_change = (
|
| 98 |
+
tr_change + inv_change + oca_change + stla_change +
|
| 99 |
+
cwip_change + ltla_change + stp_change + tp_change + ocl_change
|
| 100 |
+
)
|
| 101 |
+
cash_from_operations = op_profit_current + total_wc_change
|
| 102 |
+
tax_paid = float(os.getenv("CFS_TAX_PAID", 179.27))
|
| 103 |
+
net_operating_cash_flow = cash_from_operations - tax_paid
|
| 104 |
+
asset_purchases = self.format_amount(inv_data['asset_purchases']['total'])
|
| 105 |
+
asset_sales = self.format_amount(inv_data['asset_sales']['total'])
|
| 106 |
+
interest_income = self.format_amount(inv_data['interest_income']['current'])
|
| 107 |
+
net_investing_cash_flow = -asset_purchases + asset_sales + interest_income
|
| 108 |
+
dividend_paid = self.format_amount(fin_data['dividend_paid']['current'])
|
| 109 |
+
borrowing_change = self.format_amount(fin_data['long_term_borrowings']['change'])
|
| 110 |
+
cmltd_repayment = abs(self.format_amount(fin_data['current_maturities']['change']))
|
| 111 |
+
net_financing_cash_flow = -dividend_paid + borrowing_change - cmltd_repayment
|
| 112 |
+
net_change = net_operating_cash_flow + net_investing_cash_flow + net_financing_cash_flow
|
| 113 |
+
cash_beginning = self.format_amount(cash_data['total']['previous'])
|
| 114 |
+
cash_ending = self.format_amount(cash_data['total']['current'])
|
| 115 |
+
|
| 116 |
+
cfs_data = [
|
| 117 |
+
['Particulars', 'March 31, 2024', 'March 31, 2023'],
|
| 118 |
+
['', '', ''],
|
| 119 |
+
['Cash flow from operating activities', '', ''],
|
| 120 |
+
['Profit before taxation', pbt_current, pbt_previous],
|
| 121 |
+
['', '', ''],
|
| 122 |
+
['Adjustment for:', '', ''],
|
| 123 |
+
['Add: Depreciation and Amortisation Expense', dep_current, dep_previous],
|
| 124 |
+
['Less: Interest income', -int_inc_current, -int_inc_previous],
|
| 125 |
+
['Operating profit before working capital changes', op_profit_current, op_profit_previous],
|
| 126 |
+
['', '', ''],
|
| 127 |
+
['Movements in working capital:', '', ''],
|
| 128 |
+
['(Increase)/Decrease in Trade Receivables', tr_change, ''],
|
| 129 |
+
['(Increase)/Decrease in Inventories', inv_change, ''],
|
| 130 |
+
['(Increase)/Decrease in Other Current Assets', oca_change, ''],
|
| 131 |
+
['(Increase)/Decrease in Short Term Loans & Advances', stla_change, ''],
|
| 132 |
+
['(Increase)/Decrease in Capital Work in Progress', cwip_change, ''],
|
| 133 |
+
['(Increase)/Decrease in Long Term Loans & Advances', ltla_change, ''],
|
| 134 |
+
['Increase/(Decrease) in Short Term Provisions', stp_change, ''],
|
| 135 |
+
['Increase/(Decrease) in Trade Payables', tp_change, ''],
|
| 136 |
+
['Increase/(Decrease) in Other Current Liabilities', ocl_change, ''],
|
| 137 |
+
['Cash used in operations', cash_from_operations, ''],
|
| 138 |
+
['Less: Direct taxes paid (net of refunds)', -tax_paid, ''],
|
| 139 |
+
['Net cash flow from operating activities (A)', net_operating_cash_flow, ''],
|
| 140 |
+
['', '', ''],
|
| 141 |
+
['Cash flows from investing activities', '', ''],
|
| 142 |
+
['Purchase of Assets', -asset_purchases if asset_purchases > 0 else '', ''],
|
| 143 |
+
['Sale of Assets', asset_sales if asset_sales > 0 else '', ''],
|
| 144 |
+
['Interest income', interest_income, ''],
|
| 145 |
+
['Net cash flow from investing activities (B)', net_investing_cash_flow, ''],
|
| 146 |
+
['', '', ''],
|
| 147 |
+
['Cash flows from financing activities', '', ''],
|
| 148 |
+
['Dividend paid', -dividend_paid if dividend_paid > 0 else '', ''],
|
| 149 |
+
['Long Term Borrowings', borrowing_change if borrowing_change > 0 else '', ''],
|
| 150 |
+
['Repayment of borrowings', -abs(borrowing_change) if borrowing_change < 0 else '', ''],
|
| 151 |
+
['Net cash flow from financing activities (C)', net_financing_cash_flow, ''],
|
| 152 |
+
['', '', ''],
|
| 153 |
+
['Net increase/(decrease) in cash and cash equivalents (A+B+C)', net_change, ''],
|
| 154 |
+
['Cash and cash equivalents at the beginning of the year', cash_beginning, ''],
|
| 155 |
+
['Cash and cash equivalents at the end of the year', cash_ending, cash_beginning],
|
| 156 |
+
['', '', ''],
|
| 157 |
+
['Components of cash and cash equivalents', '', ''],
|
| 158 |
+
['Cash on hand', self.format_amount(cash_data['cash_on_hand']['current']), self.format_amount(cash_data['cash_on_hand']['previous'])],
|
| 159 |
+
['With banks in Current Accounts', self.format_amount(cash_data['bank_balances']['current']), self.format_amount(cash_data['bank_balances']['previous'])],
|
| 160 |
+
['With banks in Fixed Deposits', self.format_amount(cash_data['fixed_deposits']['current']), self.format_amount(cash_data['fixed_deposits']['previous'])],
|
| 161 |
+
['Total cash and cash equivalents (Refer note 13)', cash_ending, cash_beginning]
|
| 162 |
+
]
|
| 163 |
+
|
| 164 |
+
wb = Workbook()
|
| 165 |
+
ws = wb.active
|
| 166 |
+
ws.title = "Cash Flow Statement"
|
| 167 |
+
|
| 168 |
+
# Styles
|
| 169 |
+
title_font = Font(bold=True, size=14, color="FFFFFF")
|
| 170 |
+
subtitle_font = Font(bold=True, size=12)
|
| 171 |
+
header_font = Font(bold=True, size=11)
|
| 172 |
+
section_font = Font(bold=True, size=11)
|
| 173 |
+
normal_font = Font(size=10)
|
| 174 |
+
bold_font = Font(bold=True, size=10)
|
| 175 |
+
thin_border = Border(left=Side(style="thin"), right=Side(style="thin"), top=Side(style="thin"), bottom=Side(style="thin"))
|
| 176 |
+
center_align = Alignment(horizontal="center", vertical="center")
|
| 177 |
+
left_align = Alignment(horizontal="left", vertical="center")
|
| 178 |
+
right_align = Alignment(horizontal="right", vertical="center")
|
| 179 |
+
title_fill = PatternFill(start_color="366092", end_color="366092", fill_type="solid")
|
| 180 |
+
subtitle_fill = PatternFill(start_color="D7E4BC", end_color="D7E4BC", fill_type="solid")
|
| 181 |
+
header_fill = PatternFill(start_color="F2F2F2", end_color="F2F2F2", fill_type="solid")
|
| 182 |
+
section_fill = PatternFill(start_color="E7E6E6", end_color="E7E6E6", fill_type="solid")
|
| 183 |
+
total_fill = PatternFill(start_color="F0F0F0", end_color="F0F0F0", fill_type="solid")
|
| 184 |
+
|
| 185 |
+
# Set column widths
|
| 186 |
+
ws.column_dimensions["A"].width = 55
|
| 187 |
+
ws.column_dimensions["B"].width = 18
|
| 188 |
+
ws.column_dimensions["C"].width = 18
|
| 189 |
+
|
| 190 |
+
# Title row
|
| 191 |
+
ws.merge_cells("A1:C1")
|
| 192 |
+
ws["A1"] = "CASH FLOW STATEMENT"
|
| 193 |
+
ws["A1"].font = title_font
|
| 194 |
+
ws["A1"].alignment = center_align
|
| 195 |
+
ws["A1"].fill = title_fill
|
| 196 |
+
|
| 197 |
+
ws.merge_cells("A2:C2")
|
| 198 |
+
ws["A2"] = "For the year ended March 31, 2024"
|
| 199 |
+
ws["A2"].font = subtitle_font
|
| 200 |
+
ws["A2"].alignment = center_align
|
| 201 |
+
ws["A2"].fill = subtitle_fill
|
| 202 |
+
|
| 203 |
+
ws.merge_cells("A3:C3")
|
| 204 |
+
ws["A3"] = "(All amounts in Lakhs)"
|
| 205 |
+
ws["A3"].font = normal_font
|
| 206 |
+
ws["A3"].alignment = center_align
|
| 207 |
+
ws["A3"].fill = subtitle_fill
|
| 208 |
+
|
| 209 |
+
# Header row
|
| 210 |
+
ws["A5"] = "Particulars"
|
| 211 |
+
ws["B5"] = "March 31, 2024"
|
| 212 |
+
ws["C5"] = "March 31, 2023"
|
| 213 |
+
for col in ["A", "B", "C"]:
|
| 214 |
+
ws[f"{col}5"].font = header_font
|
| 215 |
+
ws[f"{col}5"].alignment = center_align
|
| 216 |
+
ws[f"{col}5"].fill = header_fill
|
| 217 |
+
ws[f"{col}5"].border = thin_border
|
| 218 |
+
|
| 219 |
+
# Write data rows
|
| 220 |
+
excel_row = 6
|
| 221 |
+
for row_data in cfs_data[1:]:
|
| 222 |
+
particulars, current_val, previous_val = row_data
|
| 223 |
+
cell_a = ws.cell(row=excel_row, column=1, value=particulars)
|
| 224 |
+
cell_b = ws.cell(row=excel_row, column=2, value=current_val)
|
| 225 |
+
cell_c = ws.cell(row=excel_row, column=3, value=previous_val)
|
| 226 |
+
|
| 227 |
+
is_section = any(section in str(particulars).lower() for section in [
|
| 228 |
+
'cash flow from operating', 'cash flows from investing',
|
| 229 |
+
'cash flows from financing', 'adjustment for:',
|
| 230 |
+
'movements in working capital:', 'components of cash'
|
| 231 |
+
])
|
| 232 |
+
is_total = any(keyword in str(particulars).lower() for keyword in [
|
| 233 |
+
'net cash flow', 'operating profit before working',
|
| 234 |
+
'cash used in operations', 'net increase', 'total cash'
|
| 235 |
+
])
|
| 236 |
+
|
| 237 |
+
if is_section and str(particulars).strip():
|
| 238 |
+
cell_a.font = section_font
|
| 239 |
+
cell_a.fill = section_fill
|
| 240 |
+
elif str(particulars).strip():
|
| 241 |
+
cell_a.font = normal_font
|
| 242 |
+
else:
|
| 243 |
+
cell_a.font = normal_font
|
| 244 |
+
|
| 245 |
+
cell_a.alignment = left_align
|
| 246 |
+
cell_a.border = thin_border
|
| 247 |
+
for cell, value in zip([cell_b, cell_c], [current_val, previous_val]):
|
| 248 |
+
if value == '' or value is None:
|
| 249 |
+
cell.value = ''
|
| 250 |
+
elif isinstance(value, (int, float)) and value != 0:
|
| 251 |
+
cell.number_format = '#,##0.00'
|
| 252 |
+
if is_total:
|
| 253 |
+
cell.font = bold_font
|
| 254 |
+
cell.fill = total_fill
|
| 255 |
+
else:
|
| 256 |
+
cell.font = normal_font
|
| 257 |
+
else:
|
| 258 |
+
cell.value = ''
|
| 259 |
+
cell.alignment = right_align
|
| 260 |
+
cell.border = thin_border
|
| 261 |
+
excel_row += 1
|
| 262 |
+
|
| 263 |
+
try:
|
| 264 |
+
wb.save(output_filename)
|
| 265 |
+
logger.info(f"Cash Flow Statement Excel file saved to {output_filename}")
|
| 266 |
+
except Exception as e:
|
| 267 |
+
logger.error(f"Failed to save Excel file: {e}")
|
| 268 |
+
raise
|
| 269 |
+
|
| 270 |
+
return {
|
| 271 |
+
'operating_cash_flow': net_operating_cash_flow,
|
| 272 |
+
'investing_cash_flow': net_investing_cash_flow,
|
| 273 |
+
'financing_cash_flow': net_financing_cash_flow,
|
| 274 |
+
'net_change_in_cash': net_change,
|
| 275 |
+
'cash_beginning': cash_beginning,
|
| 276 |
+
'cash_ending': cash_ending,
|
| 277 |
+
'verification': {
|
| 278 |
+
'calculated_net_change': net_change,
|
| 279 |
+
'actual_cash_change': cash_ending - cash_beginning,
|
| 280 |
+
'difference': net_change - (cash_ending - cash_beginning)
|
| 281 |
+
},
|
| 282 |
+
'output_file': output_filename,
|
| 283 |
+
'detailed_calculations': {
|
| 284 |
+
'profit_before_tax': {'current': pbt_current, 'previous': pbt_previous},
|
| 285 |
+
'depreciation': {'current': dep_current, 'previous': dep_previous},
|
| 286 |
+
'interest_income': {'current': int_inc_current, 'previous': int_inc_previous},
|
| 287 |
+
'operating_profit_before_wc': {'current': op_profit_current, 'previous': op_profit_previous},
|
| 288 |
+
'working_capital_changes': {
|
| 289 |
+
'trade_receivables': tr_change,
|
| 290 |
+
'inventories': inv_change,
|
| 291 |
+
'other_current_assets': oca_change,
|
| 292 |
+
'short_term_loans_advances': stla_change,
|
| 293 |
+
'long_term_loans_advances': ltla_change,
|
| 294 |
+
'short_term_provisions': stp_change,
|
| 295 |
+
'trade_payables': tp_change,
|
| 296 |
+
'other_current_liabilities': ocl_change,
|
| 297 |
+
'total': total_wc_change
|
| 298 |
+
},
|
| 299 |
+
'cash_from_operations': cash_from_operations,
|
| 300 |
+
'tax_paid': tax_paid
|
| 301 |
+
}
|
| 302 |
+
}
|
| 303 |
+
|
| 304 |
+
|
| 305 |
+
def main():
|
| 306 |
+
"""
|
| 307 |
+
Main entry point for generating the Cash Flow Statement.
|
| 308 |
+
"""
|
| 309 |
+
extracted_file = os.getenv("CFS_EXTRACTED_FILE", "extracted_cfs_data.json")
|
| 310 |
+
output_file = os.getenv("CFS_OUTPUT_FILE", "cash_flow_statements.xlsx")
|
| 311 |
+
|
| 312 |
+
if not os.path.exists(extracted_file):
|
| 313 |
+
logger.error(f"Extracted data file '{extracted_file}' not found. Please run the Financial Data Extractor first.")
|
| 314 |
+
return
|
| 315 |
+
|
| 316 |
+
try:
|
| 317 |
+
cfs_generator = CashFlowStatementGenerator(extracted_data_file=extracted_file)
|
| 318 |
+
cfs_summary = cfs_generator.generate_cash_flow_statement_xlsx(output_file)
|
| 319 |
+
logger.info("Cash Flow Statement generation completed successfully.")
|
| 320 |
+
logger.info(f"File created: {cfs_summary['output_file']}")
|
| 321 |
+
logger.info(f"Operating Cash Flow: βΉ{cfs_summary['operating_cash_flow']:,.2f} Lakhs")
|
| 322 |
+
logger.info(f"Investing Cash Flow: βΉ{cfs_summary['investing_cash_flow']:,.2f} Lakhs")
|
| 323 |
+
logger.info(f"Financing Cash Flow: βΉ{cfs_summary['financing_cash_flow']:,.2f} Lakhs")
|
| 324 |
+
logger.info(f"Net Change in Cash: βΉ{cfs_summary['net_change_in_cash']:,.2f} Lakhs")
|
| 325 |
+
verification = cfs_summary['verification']
|
| 326 |
+
logger.info(f"Verification - Calculated Net Change: βΉ{verification['calculated_net_change']:,.2f} Lakhs, "
|
| 327 |
+
f"Actual Change: βΉ{verification['actual_cash_change']:,.2f} Lakhs, "
|
| 328 |
+
f"Difference: βΉ{verification['difference']:,.2f} Lakhs")
|
| 329 |
+
if abs(verification['difference']) < 1:
|
| 330 |
+
logger.info("Cash Flow Statement balances correctly!")
|
| 331 |
+
else:
|
| 332 |
+
logger.warning("Cash Flow Statement has balancing difference - review calculations.")
|
| 333 |
+
except Exception as e:
|
| 334 |
+
logger.error(f"Error during Cash Flow Statement generation: {e}")
|
| 335 |
+
|
| 336 |
+
|
| 337 |
+
if __name__ == "__main__":
|
| 338 |
+
main()
|
cf/cf_middlestep.py
ADDED
|
@@ -0,0 +1,484 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import os
|
| 3 |
+
import logging
|
| 4 |
+
from typing import Any, Dict, Optional
|
| 5 |
+
from datetime import datetime
|
| 6 |
+
from openpyxl import Workbook
|
| 7 |
+
from openpyxl.styles import Font, Alignment, Border, Side, PatternFill
|
| 8 |
+
|
| 9 |
+
class FinancialDataExtractor:
|
| 10 |
+
def __init__(self, json_data: Any):
|
| 11 |
+
"""Initialize with the raw company financial data JSON"""
|
| 12 |
+
if isinstance(json_data, str):
|
| 13 |
+
self.raw_data = json.loads(json_data)
|
| 14 |
+
else:
|
| 15 |
+
self.raw_data = json_data
|
| 16 |
+
|
| 17 |
+
self.financial_data = self.raw_data['company_financial_data']
|
| 18 |
+
self.current_year = "2024-03-31 00:00:00"
|
| 19 |
+
self.previous_year = "2023-03-31 00:00:00"
|
| 20 |
+
self.extracted_data = {}
|
| 21 |
+
|
| 22 |
+
def safe_get_value(self, data_dict: dict, *path_parts, year: Optional[str] = None, default: Any = 0) -> Any:
|
| 23 |
+
"""Safely extract values from nested dictionary"""
|
| 24 |
+
try:
|
| 25 |
+
current = data_dict
|
| 26 |
+
for part in path_parts:
|
| 27 |
+
if isinstance(current, dict) and part in current:
|
| 28 |
+
current = current[part]
|
| 29 |
+
else:
|
| 30 |
+
return default
|
| 31 |
+
|
| 32 |
+
if year and isinstance(current, dict) and year in current:
|
| 33 |
+
value = current[year]
|
| 34 |
+
return float(value) if isinstance(value, (int, float, str)) and str(value).replace('.', '').replace('-', '').isdigit() else default
|
| 35 |
+
elif isinstance(current, (int, float)):
|
| 36 |
+
return float(current)
|
| 37 |
+
elif isinstance(current, list) and len(current) > 0:
|
| 38 |
+
# For lists, try to extract numeric values
|
| 39 |
+
for item in current:
|
| 40 |
+
if isinstance(item, (int, float)):
|
| 41 |
+
return float(item)
|
| 42 |
+
return default
|
| 43 |
+
|
| 44 |
+
return default
|
| 45 |
+
except (KeyError, TypeError, ValueError, AttributeError):
|
| 46 |
+
return default
|
| 47 |
+
|
| 48 |
+
def extract_profit_and_loss_data(self) -> Dict[str, Any]:
|
| 49 |
+
"""Extract P&L related data for CFS calculations"""
|
| 50 |
+
pl_data = {}
|
| 51 |
+
|
| 52 |
+
# Profit after tax (Note 28)
|
| 53 |
+
pl_data['profit_after_tax'] = {
|
| 54 |
+
'current': self.safe_get_value(self.financial_data, 'other_data', '28. Earnings per Share', 'i) Profit after tax', year=self.current_year),
|
| 55 |
+
'previous': self.safe_get_value(self.financial_data, 'other_data', '28. Earnings per Share', 'i) Profit after tax', year=self.previous_year)
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
# Tax provision (Note 8)
|
| 59 |
+
tax_provision_data = self.safe_get_value(self.financial_data, 'current_liabilities', '8. Short Term Provisions', 'Provision for Taxation')
|
| 60 |
+
if isinstance(tax_provision_data, list) and len(tax_provision_data) >= 2:
|
| 61 |
+
pl_data['tax_provision'] = {
|
| 62 |
+
'current': float(tax_provision_data[0]),
|
| 63 |
+
'previous': float(tax_provision_data[1])
|
| 64 |
+
}
|
| 65 |
+
else:
|
| 66 |
+
pl_data['tax_provision'] = {'current': 179.27262, 'previous': 692.25399}
|
| 67 |
+
|
| 68 |
+
# Calculate Profit Before Tax
|
| 69 |
+
pl_data['profit_before_tax'] = {
|
| 70 |
+
'current': pl_data['profit_after_tax']['current'] + pl_data['tax_provision']['current'],
|
| 71 |
+
'previous': pl_data['profit_after_tax']['previous'] + pl_data['tax_provision']['previous']
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
# Depreciation (Note 21)
|
| 75 |
+
pl_data['depreciation'] = {
|
| 76 |
+
'current': self.safe_get_value(self.financial_data, 'other_data', '21. Depreciation and amortisation expense', 'Depreciation & amortisation', year=self.current_year),
|
| 77 |
+
'previous': self.safe_get_value(self.financial_data, 'other_data', '21. Depreciation and amortisation expense', 'Depreciation & amortisation', year=self.previous_year)
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
# Interest income (Note 17)
|
| 81 |
+
pl_data['interest_income'] = {
|
| 82 |
+
'current': self.safe_get_value(self.financial_data, 'other_data', '17. Other income', 'Interest income', year=self.current_year),
|
| 83 |
+
'previous': self.safe_get_value(self.financial_data, 'other_data', '17. Other income', 'Interest income', year=self.previous_year)
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
return pl_data
|
| 87 |
+
|
| 88 |
+
def extract_working_capital_data(self) -> Dict[str, Any]:
|
| 89 |
+
"""Extract working capital components"""
|
| 90 |
+
wc_data = {}
|
| 91 |
+
|
| 92 |
+
# Trade Receivables (Note 12)
|
| 93 |
+
tr_current = (
|
| 94 |
+
self.safe_get_value(self.financial_data, 'current_assets', '12. Trade receivables', 'Outstanding for a period exceeding six months from the date they are due for payment', year=self.current_year) +
|
| 95 |
+
self.safe_get_value(self.financial_data, 'current_assets', '12. Trade receivables', 'Other receivables', year=self.current_year)
|
| 96 |
+
)
|
| 97 |
+
tr_previous = (
|
| 98 |
+
self.safe_get_value(self.financial_data, 'current_assets', '12. Trade receivables', 'Outstanding for a period exceeding six months from the date they are due for payment', year=self.previous_year) +
|
| 99 |
+
self.safe_get_value(self.financial_data, 'current_assets', '12. Trade receivables', 'Other receivables', year=self.previous_year)
|
| 100 |
+
)
|
| 101 |
+
wc_data['trade_receivables'] = {
|
| 102 |
+
'current': tr_current,
|
| 103 |
+
'previous': tr_previous,
|
| 104 |
+
'change': tr_previous - tr_current # Decrease is positive for cash flow
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
# Inventories (Note 11)
|
| 108 |
+
inv_current = self.safe_get_value(self.financial_data, 'current_assets', '11. Inventories', 'Consumables', year=self.current_year)
|
| 109 |
+
inv_previous = self.safe_get_value(self.financial_data, 'current_assets', '11. Inventories', 'Consumables', year=self.previous_year)
|
| 110 |
+
wc_data['inventories'] = {
|
| 111 |
+
'current': inv_current,
|
| 112 |
+
'previous': inv_previous,
|
| 113 |
+
'change': inv_previous - inv_current # Decrease is positive for cash flow
|
| 114 |
+
}
|
| 115 |
+
|
| 116 |
+
# Other Current Assets (Note 15)
|
| 117 |
+
oca_current = self.safe_get_value(self.financial_data, 'other_data', '15. Other Current Assets', 'Interest accrued on fixed deposits', year=self.current_year)
|
| 118 |
+
oca_previous = self.safe_get_value(self.financial_data, 'other_data', '15. Other Current Assets', 'Interest accrued on fixed deposits', year=self.previous_year)
|
| 119 |
+
wc_data['other_current_assets'] = {
|
| 120 |
+
'current': oca_current,
|
| 121 |
+
'previous': oca_previous,
|
| 122 |
+
'change': oca_previous - oca_current # Decrease is positive for cash flow
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
# Short Term Loans & Advances (Note 14)
|
| 126 |
+
stla_current = (
|
| 127 |
+
self.safe_get_value(self.financial_data, 'loans_and_advances', '14. Short Term Loans and Advances', 'Prepaid Expenses', year=self.current_year) +
|
| 128 |
+
self.safe_get_value(self.financial_data, 'loans_and_advances', '14. Short Term Loans and Advances', 'Other Advances', year=self.current_year) +
|
| 129 |
+
self.safe_get_value(self.financial_data, 'loans_and_advances', '14. Short Term Loans and Advances', 'Advance tax', year=self.current_year) +
|
| 130 |
+
self.safe_get_value(self.financial_data, 'loans_and_advances', '14. Short Term Loans and Advances', 'Balances with statutory/government authorities', year=self.current_year)
|
| 131 |
+
)
|
| 132 |
+
stla_previous = (
|
| 133 |
+
self.safe_get_value(self.financial_data, 'loans_and_advances', '14. Short Term Loans and Advances', 'Prepaid Expenses', year=self.previous_year) +
|
| 134 |
+
self.safe_get_value(self.financial_data, 'loans_and_advances', '14. Short Term Loans and Advances', 'Other Advances', year=self.previous_year) +
|
| 135 |
+
self.safe_get_value(self.financial_data, 'loans_and_advances', '14. Short Term Loans and Advances', 'Advance tax', year=self.previous_year) +
|
| 136 |
+
self.safe_get_value(self.financial_data, 'loans_and_advances', '14. Short Term Loans and Advances', 'Balances with statutory/government authorities', year=self.previous_year)
|
| 137 |
+
)
|
| 138 |
+
wc_data['short_term_loans_advances'] = {
|
| 139 |
+
'current': stla_current,
|
| 140 |
+
'previous': stla_previous,
|
| 141 |
+
'change': stla_previous - stla_current # Decrease is positive for cash flow
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
# Long Term Loans & Advances (Note 10)
|
| 145 |
+
ltla_current = self.safe_get_value(self.financial_data, 'loans_and_advances', '10. Long Term Loans and advances', 'Long Term - Security Deposits', year=self.current_year)
|
| 146 |
+
ltla_previous = self.safe_get_value(self.financial_data, 'loans_and_advances', '10. Long Term Loans and advances', 'Long Term - Security Deposits', year=self.previous_year)
|
| 147 |
+
wc_data['long_term_loans_advances'] = {
|
| 148 |
+
'current': ltla_current,
|
| 149 |
+
'previous': ltla_previous,
|
| 150 |
+
'change': ltla_previous - ltla_current # Decrease is positive for cash flow
|
| 151 |
+
}
|
| 152 |
+
|
| 153 |
+
# Trade Payables (Note 6)
|
| 154 |
+
tp_current = (
|
| 155 |
+
self.safe_get_value(self.financial_data, 'current_liabilities', '6. Trade Payables', 'For Capital expenditure', year=self.current_year) +
|
| 156 |
+
self.safe_get_value(self.financial_data, 'current_liabilities', '6. Trade Payables', 'For other expenses', year=self.current_year) +
|
| 157 |
+
self.safe_get_value(self.financial_data, 'current_liabilities', '6. Trade Payables', 'Sundry Creditors', year=self.current_year)
|
| 158 |
+
)
|
| 159 |
+
tp_previous = (
|
| 160 |
+
self.safe_get_value(self.financial_data, 'current_liabilities', '6. Trade Payables', 'For Capital expenditure', year=self.previous_year) +
|
| 161 |
+
self.safe_get_value(self.financial_data, 'current_liabilities', '6. Trade Payables', 'For other expenses', year=self.previous_year) +
|
| 162 |
+
self.safe_get_value(self.financial_data, 'current_liabilities', '6. Trade Payables', 'Sundry Creditors', year=self.previous_year)
|
| 163 |
+
)
|
| 164 |
+
wc_data['trade_payables'] = {
|
| 165 |
+
'current': tp_current,
|
| 166 |
+
'previous': tp_previous,
|
| 167 |
+
'change': tp_current - tp_previous # Increase is positive for cash flow
|
| 168 |
+
}
|
| 169 |
+
|
| 170 |
+
# Other Current Liabilities (Note 7)
|
| 171 |
+
ocl_current = (
|
| 172 |
+
self.safe_get_value(self.financial_data, 'current_liabilities', '7. Other Current Liabilities', 'Outstanding Liabilities for Expenses', year=self.current_year) +
|
| 173 |
+
self.safe_get_value(self.financial_data, 'current_liabilities', '7. Other Current Liabilities', 'Statutory dues', year=self.current_year)
|
| 174 |
+
)
|
| 175 |
+
ocl_previous = (
|
| 176 |
+
self.safe_get_value(self.financial_data, 'current_liabilities', '7. Other Current Liabilities', 'Outstanding Liabilities for Expenses', year=self.previous_year) +
|
| 177 |
+
self.safe_get_value(self.financial_data, 'current_liabilities', '7. Other Current Liabilities', 'Statutory dues', year=self.previous_year)
|
| 178 |
+
)
|
| 179 |
+
wc_data['other_current_liabilities'] = {
|
| 180 |
+
'current': ocl_current,
|
| 181 |
+
'previous': ocl_previous,
|
| 182 |
+
'change': ocl_current - ocl_previous # Increase is positive for cash flow
|
| 183 |
+
}
|
| 184 |
+
|
| 185 |
+
# Short Term Provisions (Note 8)
|
| 186 |
+
stp_data = self.safe_get_value(self.financial_data, 'current_liabilities', '8. Short Term Provisions', 'Provision for Taxation', default=[179.27262, 692.25399])
|
| 187 |
+
if isinstance(stp_data, list) and len(stp_data) >= 2:
|
| 188 |
+
wc_data['short_term_provisions'] = {
|
| 189 |
+
'current': float(stp_data[0]),
|
| 190 |
+
'previous': float(stp_data[1]),
|
| 191 |
+
'change': float(stp_data[0]) - float(stp_data[1]) # Change in provision
|
| 192 |
+
}
|
| 193 |
+
else:
|
| 194 |
+
wc_data['short_term_provisions'] = {
|
| 195 |
+
'current': 179.27262,
|
| 196 |
+
'previous': 692.25399,
|
| 197 |
+
'change': 179.27262 - 692.25399
|
| 198 |
+
}
|
| 199 |
+
|
| 200 |
+
return wc_data
|
| 201 |
+
|
| 202 |
+
def extract_investing_data(self) -> Dict[str, Any]:
|
| 203 |
+
"""Extract investing activities data"""
|
| 204 |
+
investing_data = {}
|
| 205 |
+
|
| 206 |
+
# Fixed Asset Additions (Note 9)
|
| 207 |
+
tangible_additions = self.safe_get_value(self.financial_data, 'fixed_assets', 'tangible_assets', '', 'gross_carrying_value', 'additions')
|
| 208 |
+
intangible_additions = self.safe_get_value(self.financial_data, 'fixed_assets', 'intangible_assets', '', 'gross_carrying_value', 'additions')
|
| 209 |
+
|
| 210 |
+
investing_data['asset_purchases'] = {
|
| 211 |
+
'tangible_additions': tangible_additions,
|
| 212 |
+
'intangible_additions': intangible_additions,
|
| 213 |
+
'total': tangible_additions + intangible_additions
|
| 214 |
+
}
|
| 215 |
+
|
| 216 |
+
# Asset Deletions/Sales
|
| 217 |
+
tangible_deletions = self.safe_get_value(self.financial_data, 'fixed_assets', 'tangible_assets', '', 'gross_carrying_value', 'deletions')
|
| 218 |
+
intangible_deletions = self.safe_get_value(self.financial_data, 'fixed_assets', 'intangible_assets', '', 'gross_carrying_value', 'deletions')
|
| 219 |
+
|
| 220 |
+
investing_data['asset_sales'] = {
|
| 221 |
+
'tangible_deletions': tangible_deletions,
|
| 222 |
+
'intangible_deletions': intangible_deletions,
|
| 223 |
+
'total': tangible_deletions + (intangible_deletions if intangible_deletions else 0)
|
| 224 |
+
}
|
| 225 |
+
|
| 226 |
+
# Interest Income (already extracted in P&L data)
|
| 227 |
+
investing_data['interest_income'] = {
|
| 228 |
+
'current': self.safe_get_value(self.financial_data, 'other_data', '17. Other income', 'Interest income', year=self.current_year),
|
| 229 |
+
'previous': self.safe_get_value(self.financial_data, 'other_data', '17. Other income', 'Interest income', year=self.previous_year)
|
| 230 |
+
}
|
| 231 |
+
|
| 232 |
+
return investing_data
|
| 233 |
+
|
| 234 |
+
def extract_financing_data(self) -> Dict[str, Any]:
|
| 235 |
+
"""Extract financing activities data"""
|
| 236 |
+
financing_data = {}
|
| 237 |
+
|
| 238 |
+
# Dividend Paid (Note 3 - Reserves and Surplus)
|
| 239 |
+
dividend_data = self.safe_get_value(self.financial_data, 'reserves_and_surplus', 'Less: Dividend Paid', default=[162.7563, 0])
|
| 240 |
+
if isinstance(dividend_data, list) and len(dividend_data) >= 2:
|
| 241 |
+
financing_data['dividend_paid'] = {
|
| 242 |
+
'current': float(dividend_data[0]) if dividend_data[0] else 0,
|
| 243 |
+
'previous': float(dividend_data[1]) if dividend_data[1] else 0
|
| 244 |
+
}
|
| 245 |
+
else:
|
| 246 |
+
financing_data['dividend_paid'] = {'current': 162.7563, 'previous': 0}
|
| 247 |
+
|
| 248 |
+
# Long Term Borrowings (Note 4)
|
| 249 |
+
# Calculate total borrowings for both years
|
| 250 |
+
borrowings_current = 0
|
| 251 |
+
borrowings_previous = 0
|
| 252 |
+
|
| 253 |
+
# APSFC Loan
|
| 254 |
+
apsfc_data = self.safe_get_value(self.financial_data, 'borrowings', '4. Long-Term Borrowings', 'Andhra Pradesh State Financial Corporation', default=[197.9979, 276.4194])
|
| 255 |
+
if isinstance(apsfc_data, list) and len(apsfc_data) >= 2:
|
| 256 |
+
borrowings_current += float(apsfc_data[0])
|
| 257 |
+
borrowings_previous += float(apsfc_data[1])
|
| 258 |
+
|
| 259 |
+
# ICICI Bank Loan
|
| 260 |
+
icici_data = self.safe_get_value(self.financial_data, 'borrowings', '4. Long-Term Borrowings', 'Loan From ICICI Bank 603090031420', default=[683.5714632, 12428568])
|
| 261 |
+
if isinstance(icici_data, list) and len(icici_data) >= 2:
|
| 262 |
+
borrowings_current += float(icici_data[0])
|
| 263 |
+
borrowings_previous += float(icici_data[1]) if icici_data[1] < 1000000 else 0 # Filter out unrealistic values
|
| 264 |
+
|
| 265 |
+
# Daimler Loan
|
| 266 |
+
daimler_data = self.safe_get_value(self.financial_data, 'borrowings', '4. Long-Term Borrowings', 'Diamler Financial Services India Private Limited', default=[32.89343, 44.94277])
|
| 267 |
+
if isinstance(daimler_data, list) and len(daimler_data) >= 2:
|
| 268 |
+
borrowings_current += float(daimler_data[0])
|
| 269 |
+
borrowings_previous += float(daimler_data[1])
|
| 270 |
+
|
| 271 |
+
financing_data['long_term_borrowings'] = {
|
| 272 |
+
'current': borrowings_current,
|
| 273 |
+
'previous': borrowings_previous,
|
| 274 |
+
'change': borrowings_current - borrowings_previous
|
| 275 |
+
}
|
| 276 |
+
|
| 277 |
+
# Current Maturities of Long Term Debt (Note 7)
|
| 278 |
+
cmltd_data = self.safe_get_value(self.financial_data, 'current_liabilities', '7. Other Current Liabilities', 'Current Maturities of Long Term Borrowings', default=[139.20441, 136.08612])
|
| 279 |
+
if isinstance(cmltd_data, list) and len(cmltd_data) >= 2:
|
| 280 |
+
financing_data['current_maturities'] = {
|
| 281 |
+
'current': float(cmltd_data[0]),
|
| 282 |
+
'previous': float(cmltd_data[1]),
|
| 283 |
+
'change': float(cmltd_data[0]) - float(cmltd_data[1])
|
| 284 |
+
}
|
| 285 |
+
else:
|
| 286 |
+
financing_data['current_maturities'] = {'current': 139.20441, 'previous': 136.08612, 'change': 3.11829}
|
| 287 |
+
|
| 288 |
+
return financing_data
|
| 289 |
+
|
| 290 |
+
def extract_cash_data(self) -> Dict[str, Any]:
|
| 291 |
+
"""Extract cash and cash equivalents data"""
|
| 292 |
+
cash_data = {}
|
| 293 |
+
|
| 294 |
+
# Cash on hand
|
| 295 |
+
cash_hand_current = self.safe_get_value(self.financial_data, 'current_assets', '13. Cash and bank balances', 'Cash on hand', year=self.current_year)
|
| 296 |
+
cash_hand_previous = self.safe_get_value(self.financial_data, 'current_assets', '13. Cash and bank balances', 'Cash on hand', year=self.previous_year)
|
| 297 |
+
|
| 298 |
+
# Bank balances
|
| 299 |
+
bank_current = self.safe_get_value(self.financial_data, 'current_assets', '13. Cash and bank balances', 'Balances with banks in current accounts', year=self.current_year)
|
| 300 |
+
bank_previous = self.safe_get_value(self.financial_data, 'current_assets', '13. Cash and bank balances', 'Balances with banks in current accounts', year=self.previous_year)
|
| 301 |
+
|
| 302 |
+
# Fixed deposits
|
| 303 |
+
fd_current = self.safe_get_value(self.financial_data, 'current_assets', '13. Cash and bank balances', 'Fixed Deposits with ICICI Bank', year=self.current_year)
|
| 304 |
+
fd_previous = self.safe_get_value(self.financial_data, 'current_assets', '13. Cash and bank balances', 'Fixed Deposits with ICICI Bank', year=self.previous_year)
|
| 305 |
+
|
| 306 |
+
cash_data = {
|
| 307 |
+
'cash_on_hand': {'current': cash_hand_current, 'previous': cash_hand_previous},
|
| 308 |
+
'bank_balances': {'current': bank_current, 'previous': bank_previous},
|
| 309 |
+
'fixed_deposits': {'current': fd_current, 'previous': fd_previous},
|
| 310 |
+
'total': {
|
| 311 |
+
'current': cash_hand_current + bank_current + fd_current,
|
| 312 |
+
'previous': cash_hand_previous + bank_previous + fd_previous
|
| 313 |
+
}
|
| 314 |
+
}
|
| 315 |
+
|
| 316 |
+
cash_data['net_change'] = cash_data['total']['current'] - cash_data['total']['previous']
|
| 317 |
+
|
| 318 |
+
return cash_data
|
| 319 |
+
|
| 320 |
+
def extract_all_data(self) -> Dict[str, Any]:
|
| 321 |
+
"""Extract all required data for CFS generation"""
|
| 322 |
+
self.extracted_data = {
|
| 323 |
+
'profit_and_loss': self.extract_profit_and_loss_data(),
|
| 324 |
+
'working_capital': self.extract_working_capital_data(),
|
| 325 |
+
'investing_activities': self.extract_investing_data(),
|
| 326 |
+
'financing_activities': self.extract_financing_data(),
|
| 327 |
+
'cash_and_equivalents': self.extract_cash_data(),
|
| 328 |
+
'extraction_metadata': {
|
| 329 |
+
'extracted_on': datetime.now().isoformat(),
|
| 330 |
+
'current_year': self.current_year,
|
| 331 |
+
'previous_year': self.previous_year
|
| 332 |
+
}
|
| 333 |
+
}
|
| 334 |
+
|
| 335 |
+
return self.extracted_data
|
| 336 |
+
|
| 337 |
+
def save_extracted_data(self, filename: str = "extracted_cfs_data.json") -> str:
|
| 338 |
+
"""Save extracted data to JSON file"""
|
| 339 |
+
with open(filename, 'w') as f:
|
| 340 |
+
json.dump(self.extracted_data, f, indent=2, default=str)
|
| 341 |
+
return filename
|
| 342 |
+
|
| 343 |
+
|
| 344 |
+
def print_data_extraction_summary(extracted_data: Dict[str, Any]) -> None:
|
| 345 |
+
"""Print summary of extracted data for verification"""
|
| 346 |
+
print("\n" + "="*60)
|
| 347 |
+
print("DATA EXTRACTION SUMMARY")
|
| 348 |
+
print("="*60)
|
| 349 |
+
|
| 350 |
+
pl_data = extracted_data['profit_and_loss']
|
| 351 |
+
print(f"Profit After Tax (Current): Rs{pl_data['profit_after_tax']['current']:,.2f} Lakhs")
|
| 352 |
+
print(f"Tax Provision (Current): Rs{pl_data['tax_provision']['current']:,.2f} Lakhs")
|
| 353 |
+
print(f"Profit Before Tax (Calculated): Rs{pl_data['profit_before_tax']['current']:,.2f} Lakhs")
|
| 354 |
+
print(f"Depreciation (Current): Rs{pl_data['depreciation']['current']:,.2f} Lakhs")
|
| 355 |
+
print(f"Interest Income (Current): Rs{pl_data['interest_income']['current']:,.2f} Lakhs")
|
| 356 |
+
|
| 357 |
+
cash_data = extracted_data['cash_and_equivalents']
|
| 358 |
+
print(f"\nCash at Beginning: Rs{cash_data['total']['previous']:,.2f} Lakhs")
|
| 359 |
+
print(f"Cash at End: Rs{cash_data['total']['current']:,.2f} Lakhs")
|
| 360 |
+
print(f"Net Cash Change: Rs{cash_data['net_change']:,.2f} Lakhs")
|
| 361 |
+
|
| 362 |
+
def validate_cfs_data(extracted_data: Dict[str, Any]) -> Dict[str, Any]:
|
| 363 |
+
"""Validate the extracted data for completeness and accuracy"""
|
| 364 |
+
validation_results = {
|
| 365 |
+
'missing_data': [],
|
| 366 |
+
'warnings': [],
|
| 367 |
+
'data_quality': 'Good'
|
| 368 |
+
}
|
| 369 |
+
|
| 370 |
+
# Check for missing critical data
|
| 371 |
+
pl_data = extracted_data['profit_and_loss']
|
| 372 |
+
if pl_data['profit_after_tax']['current'] == 0:
|
| 373 |
+
validation_results['missing_data'].append('Profit After Tax')
|
| 374 |
+
|
| 375 |
+
if pl_data['depreciation']['current'] == 0:
|
| 376 |
+
validation_results['warnings'].append('Depreciation appears to be zero')
|
| 377 |
+
|
| 378 |
+
# Check cash flow consistency
|
| 379 |
+
cash_data = extracted_data['cash_and_equivalents']
|
| 380 |
+
if abs(cash_data['net_change']) > cash_data['total']['previous']:
|
| 381 |
+
validation_results['warnings'].append('Large cash change relative to opening balance')
|
| 382 |
+
|
| 383 |
+
if validation_results['missing_data']:
|
| 384 |
+
validation_results['data_quality'] = 'Poor'
|
| 385 |
+
elif validation_results['warnings']:
|
| 386 |
+
validation_results['data_quality'] = 'Fair'
|
| 387 |
+
|
| 388 |
+
return validation_results
|
| 389 |
+
|
| 390 |
+
def main_data_extraction(json_file_path: Optional[str] = None) -> Optional[Dict[str, Any]]:
|
| 391 |
+
"""Main function to extract financial data and generate analysis files"""
|
| 392 |
+
|
| 393 |
+
logger = logging.getLogger("cf_middlestep")
|
| 394 |
+
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
|
| 395 |
+
# Use environment variable or fallback
|
| 396 |
+
if json_file_path is None:
|
| 397 |
+
json_file_path = os.environ.get("CFS_JSON_INPUT", "clean_financial_data_cfs.json")
|
| 398 |
+
logger.info("="*80)
|
| 399 |
+
logger.info("FINANCIAL DATA EXTRACTION AND ANALYSIS")
|
| 400 |
+
logger.info("="*80)
|
| 401 |
+
# Step 1: Load raw JSON data
|
| 402 |
+
logger.info("1. Loading raw financial data...")
|
| 403 |
+
try:
|
| 404 |
+
with open(json_file_path, 'r') as f:
|
| 405 |
+
raw_data = json.load(f)
|
| 406 |
+
logger.info(f" Successfully loaded data from {json_file_path}")
|
| 407 |
+
except FileNotFoundError:
|
| 408 |
+
logger.error(f"File {json_file_path} not found")
|
| 409 |
+
return None
|
| 410 |
+
except json.JSONDecodeError:
|
| 411 |
+
logger.error(f"Invalid JSON format in {json_file_path}")
|
| 412 |
+
return None
|
| 413 |
+
# Step 2: Extract and process data
|
| 414 |
+
logger.info("2. Extracting and processing financial data...")
|
| 415 |
+
extractor = FinancialDataExtractor(raw_data)
|
| 416 |
+
extracted_data = extractor.extract_all_data()
|
| 417 |
+
# Step 3: Validate extracted data
|
| 418 |
+
logger.info("3. Validating extracted data...")
|
| 419 |
+
validation_results = validate_cfs_data(extracted_data)
|
| 420 |
+
logger.info(f"Data Quality: {validation_results['data_quality']}")
|
| 421 |
+
if validation_results['missing_data']:
|
| 422 |
+
logger.warning(f"Missing Data: {', '.join(validation_results['missing_data'])}")
|
| 423 |
+
if validation_results['warnings']:
|
| 424 |
+
logger.warning(f"Warnings: {', '.join(validation_results['warnings'])}")
|
| 425 |
+
# Step 4: Save extracted data
|
| 426 |
+
logger.info("4. Saving extracted data...")
|
| 427 |
+
extracted_file = extractor.save_extracted_data(os.environ.get("CFS_JSON_OUTPUT", "extracted_cfs_data.json"))
|
| 428 |
+
logger.info(f"Extracted data saved to {extracted_file}")
|
| 429 |
+
# Step 5: Print summary
|
| 430 |
+
print_data_extraction_summary(extracted_data)
|
| 431 |
+
logger.info("FILES CREATED:")
|
| 432 |
+
logger.info(f"1. {extracted_file} - Processed financial data (JSON)")
|
| 433 |
+
logger.info("NEXT STEP:")
|
| 434 |
+
logger.info("Use the 'extracted_cfs_data.json' file as input for the Cash Flow Statement Generator")
|
| 435 |
+
return {
|
| 436 |
+
'extracted_data_file': extracted_file,
|
| 437 |
+
'extracted_data': extracted_data,
|
| 438 |
+
'validation_results': validation_results
|
| 439 |
+
}
|
| 440 |
+
|
| 441 |
+
def debug_json_structure(json_file_path: str = "clean_financial_data_cfs.json") -> None:
|
| 442 |
+
"""Debug function to explore the JSON structure"""
|
| 443 |
+
try:
|
| 444 |
+
with open(json_file_path, 'r') as f:
|
| 445 |
+
data = json.load(f)
|
| 446 |
+
|
| 447 |
+
print("JSON STRUCTURE ANALYSIS")
|
| 448 |
+
print("="*50)
|
| 449 |
+
|
| 450 |
+
def print_structure(obj, level=0, max_level=3):
|
| 451 |
+
indent = " " * level
|
| 452 |
+
if level > max_level:
|
| 453 |
+
return
|
| 454 |
+
|
| 455 |
+
if isinstance(obj, dict):
|
| 456 |
+
for key, value in obj.items():
|
| 457 |
+
if isinstance(value, dict):
|
| 458 |
+
print(f"{indent}{key}: (dict with {len(value)} keys)")
|
| 459 |
+
print_structure(value, level + 1, max_level)
|
| 460 |
+
elif isinstance(value, list):
|
| 461 |
+
print(f"{indent}{key}: (list with {len(value)} items)")
|
| 462 |
+
else:
|
| 463 |
+
print(f"{indent}{key}: {type(value).__name__}")
|
| 464 |
+
|
| 465 |
+
financial_data = data.get('company_financial_data', {})
|
| 466 |
+
print_structure(financial_data)
|
| 467 |
+
|
| 468 |
+
except Exception as e:
|
| 469 |
+
print(f"Error analyzing JSON structure: {e}")
|
| 470 |
+
|
| 471 |
+
# Example usage and testing
|
| 472 |
+
if __name__ == "__main__":
|
| 473 |
+
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
|
| 474 |
+
logger = logging.getLogger("cf_middlestep")
|
| 475 |
+
logger.info("Starting Financial Data Extraction Process...")
|
| 476 |
+
input_file = os.environ.get("CFS_JSON_INPUT", "clean_financial_data_cfs.json")
|
| 477 |
+
if os.path.exists(input_file):
|
| 478 |
+
extraction_results = main_data_extraction(input_file)
|
| 479 |
+
if extraction_results:
|
| 480 |
+
logger.info("DATA EXTRACTION COMPLETED SUCCESSFULLY!")
|
| 481 |
+
logger.info("Ready for Cash Flow Statement generation using extracted_cfs_data.json")
|
| 482 |
+
else:
|
| 483 |
+
logger.error(f"Input file '{input_file}' not found in current directory")
|
| 484 |
+
logger.error("Please ensure the JSON file is in the same directory as this script")
|
cf/csv_json_cf.py
ADDED
|
@@ -0,0 +1,323 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import json
|
| 4 |
+
import os
|
| 5 |
+
import re
|
| 6 |
+
import logging
|
| 7 |
+
from datetime import datetime
|
| 8 |
+
from typing import Dict, List, Any, Optional, Union
|
| 9 |
+
from pydantic import BaseModel, Field
|
| 10 |
+
from pydantic_settings import BaseSettings
|
| 11 |
+
|
| 12 |
+
# Configure logging
|
| 13 |
+
logging.basicConfig(level=logging.INFO)
|
| 14 |
+
logger = logging.getLogger(__name__)
|
| 15 |
+
|
| 16 |
+
# Settings for CSV to JSON conversion for Cashflow
|
| 17 |
+
class Settings(BaseSettings):
|
| 18 |
+
csv_folder_path: str = Field(default="csv_notes_cfs", env="CSV_CF_FOLDER_PATH")
|
| 19 |
+
output_json: str = Field(default="clean_financial_data_cfs.json", env="OUTPUT_CF_JSON")
|
| 20 |
+
|
| 21 |
+
settings = Settings()
|
| 22 |
+
|
| 23 |
+
class FinancialCSVMapper:
|
| 24 |
+
def __init__(self, csv_folder_path: str = settings.csv_folder_path):
|
| 25 |
+
self.csv_folder_path = csv_folder_path
|
| 26 |
+
|
| 27 |
+
def clean_value(self, value: Any) -> Optional[Union[float, int, str]]:
|
| 28 |
+
"""
|
| 29 |
+
Clean and convert values appropriately.
|
| 30 |
+
Returns None for empty or NaN values.
|
| 31 |
+
"""
|
| 32 |
+
if pd.isna(value) or value == '':
|
| 33 |
+
return None
|
| 34 |
+
value_str = str(value).strip()
|
| 35 |
+
cleaned_num = re.sub(r'[\s,βΉ]', '', value_str)
|
| 36 |
+
try:
|
| 37 |
+
if '.' in cleaned_num:
|
| 38 |
+
return float(cleaned_num)
|
| 39 |
+
else:
|
| 40 |
+
return int(cleaned_num)
|
| 41 |
+
except (ValueError, TypeError):
|
| 42 |
+
return value_str
|
| 43 |
+
|
| 44 |
+
def identify_note_sections(self, df: pd.DataFrame) -> Dict[str, Dict]:
|
| 45 |
+
"""Identify and extract note sections (2. Share capital, 3. Reserves, etc.)"""
|
| 46 |
+
sections = {}
|
| 47 |
+
current_section = None
|
| 48 |
+
current_data = []
|
| 49 |
+
|
| 50 |
+
for idx, row in df.iterrows():
|
| 51 |
+
first_col = str(row.iloc[0]) if not pd.isna(row.iloc[0]) else ""
|
| 52 |
+
|
| 53 |
+
# Check if this is a new section header (starts with number and dot)
|
| 54 |
+
if re.match(r'^\d+\.?\s+[A-Za-z]', first_col):
|
| 55 |
+
# Save previous section
|
| 56 |
+
if current_section and current_data:
|
| 57 |
+
sections[current_section] = self.parse_section_data(current_data)
|
| 58 |
+
|
| 59 |
+
# Start new section
|
| 60 |
+
current_section = first_col.strip()
|
| 61 |
+
current_data = []
|
| 62 |
+
else:
|
| 63 |
+
# Add row to current section
|
| 64 |
+
if current_section:
|
| 65 |
+
row_data = [self.clean_value(cell) for cell in row]
|
| 66 |
+
if any(cell is not None for cell in row_data): # Skip empty rows
|
| 67 |
+
current_data.append(row_data)
|
| 68 |
+
|
| 69 |
+
# Handle last section
|
| 70 |
+
if current_section and current_data:
|
| 71 |
+
sections[current_section] = self.parse_section_data(current_data)
|
| 72 |
+
|
| 73 |
+
return sections
|
| 74 |
+
|
| 75 |
+
def parse_section_data(self, rows: List[List]) -> Dict:
|
| 76 |
+
"""Parse section data into meaningful structure"""
|
| 77 |
+
if not rows:
|
| 78 |
+
return {}
|
| 79 |
+
|
| 80 |
+
section_data = {}
|
| 81 |
+
|
| 82 |
+
# Find date headers (usually in first or second row)
|
| 83 |
+
date_row = None
|
| 84 |
+
for i, row in enumerate(rows[:3]):
|
| 85 |
+
for cell in row:
|
| 86 |
+
if cell and isinstance(cell, str) and re.search(r'\d{4}-\d{2}-\d{2}', str(cell)):
|
| 87 |
+
date_row = i
|
| 88 |
+
break
|
| 89 |
+
if date_row is not None:
|
| 90 |
+
break
|
| 91 |
+
|
| 92 |
+
# Extract dates if found
|
| 93 |
+
dates = []
|
| 94 |
+
if date_row is not None:
|
| 95 |
+
dates = [cell for cell in rows[date_row] if cell and re.search(r'\d{4}-\d{2}-\d{2}', str(cell))]
|
| 96 |
+
|
| 97 |
+
# Process data rows
|
| 98 |
+
for row in rows:
|
| 99 |
+
if not row or not row[0]:
|
| 100 |
+
continue
|
| 101 |
+
|
| 102 |
+
key = str(row[0]).strip()
|
| 103 |
+
|
| 104 |
+
# Skip header/date rows
|
| 105 |
+
if date_row is not None and row == rows[date_row]:
|
| 106 |
+
continue
|
| 107 |
+
if any(date in str(cell) for cell in row for date in dates if date):
|
| 108 |
+
continue
|
| 109 |
+
|
| 110 |
+
# Extract values (non-None values after the key)
|
| 111 |
+
values = [cell for cell in row[1:] if cell is not None]
|
| 112 |
+
|
| 113 |
+
if values:
|
| 114 |
+
if len(values) == 1:
|
| 115 |
+
section_data[key] = values[0]
|
| 116 |
+
else:
|
| 117 |
+
# If we have dates, map values to dates
|
| 118 |
+
if dates and len(values) <= len(dates):
|
| 119 |
+
section_data[key] = {dates[i]: values[i] for i in range(len(values))}
|
| 120 |
+
else:
|
| 121 |
+
section_data[key] = values
|
| 122 |
+
|
| 123 |
+
# Add dates to metadata if found
|
| 124 |
+
if dates:
|
| 125 |
+
section_data["_metadata"] = {"reporting_dates": dates}
|
| 126 |
+
|
| 127 |
+
return section_data
|
| 128 |
+
|
| 129 |
+
def parse_fixed_assets(self, df: pd.DataFrame) -> Dict:
|
| 130 |
+
"""Parse fixed assets table (Note 9) with proper structure"""
|
| 131 |
+
fixed_assets = {
|
| 132 |
+
"tangible_assets": {},
|
| 133 |
+
"intangible_assets": {},
|
| 134 |
+
"totals": {}
|
| 135 |
+
}
|
| 136 |
+
|
| 137 |
+
current_category = None
|
| 138 |
+
|
| 139 |
+
for idx, row in df.iterrows():
|
| 140 |
+
first_col = self.clean_value(row.iloc[0])
|
| 141 |
+
|
| 142 |
+
# Skip header rows
|
| 143 |
+
if not first_col or "Particulars" in str(first_col) or "Gross Carrying" in str(first_col):
|
| 144 |
+
continue
|
| 145 |
+
|
| 146 |
+
# Identify categories
|
| 147 |
+
if "Tangible Assets" in str(first_col):
|
| 148 |
+
current_category = "tangible"
|
| 149 |
+
continue
|
| 150 |
+
elif "Intangible Assets" in str(first_col):
|
| 151 |
+
current_category = "intangible"
|
| 152 |
+
continue
|
| 153 |
+
elif "Total" in str(first_col) or "Grand Total" in str(first_col):
|
| 154 |
+
current_category = "totals"
|
| 155 |
+
|
| 156 |
+
# Extract asset data
|
| 157 |
+
if current_category and len(row) > 1:
|
| 158 |
+
asset_name = str(first_col).strip()
|
| 159 |
+
|
| 160 |
+
# Remove numbering (1, 2, 3, etc.)
|
| 161 |
+
asset_name = re.sub(r'^\d+\s*', '', asset_name)
|
| 162 |
+
|
| 163 |
+
asset_data = {
|
| 164 |
+
"gross_carrying_value": {
|
| 165 |
+
"opening": self.clean_value(row.iloc[2]) if len(row) > 2 else None,
|
| 166 |
+
"additions": self.clean_value(row.iloc[3]) if len(row) > 3 else None,
|
| 167 |
+
"deletions": self.clean_value(row.iloc[4]) if len(row) > 4 else None,
|
| 168 |
+
"closing": self.clean_value(row.iloc[5]) if len(row) > 5 else None
|
| 169 |
+
},
|
| 170 |
+
"accumulated_depreciation": {
|
| 171 |
+
"opening": self.clean_value(row.iloc[6]) if len(row) > 6 else None,
|
| 172 |
+
"for_the_year": self.clean_value(row.iloc[7]) if len(row) > 7 else None,
|
| 173 |
+
"deletions": self.clean_value(row.iloc[8]) if len(row) > 8 else None,
|
| 174 |
+
"closing": self.clean_value(row.iloc[9]) if len(row) > 9 else None
|
| 175 |
+
},
|
| 176 |
+
"net_carrying_value": {
|
| 177 |
+
"closing": self.clean_value(row.iloc[10]) if len(row) > 10 else None,
|
| 178 |
+
"opening": self.clean_value(row.iloc[11]) if len(row) > 11 else None
|
| 179 |
+
}
|
| 180 |
+
}
|
| 181 |
+
|
| 182 |
+
if current_category == "tangible":
|
| 183 |
+
fixed_assets["tangible_assets"][asset_name] = asset_data
|
| 184 |
+
elif current_category == "intangible":
|
| 185 |
+
fixed_assets["intangible_assets"][asset_name] = asset_data
|
| 186 |
+
elif current_category == "totals":
|
| 187 |
+
fixed_assets["totals"][asset_name] = asset_data
|
| 188 |
+
|
| 189 |
+
return fixed_assets
|
| 190 |
+
|
| 191 |
+
def parse_trade_receivables_aging(self, df: pd.DataFrame) -> Dict:
|
| 192 |
+
"""Parse trade receivables aging analysis"""
|
| 193 |
+
aging_data = {}
|
| 194 |
+
current_year = None
|
| 195 |
+
|
| 196 |
+
for idx, row in df.iterrows():
|
| 197 |
+
first_col = str(row.iloc[0]) if not pd.isna(row.iloc[0]) else ""
|
| 198 |
+
|
| 199 |
+
# Identify year sections
|
| 200 |
+
if "2024" in first_col:
|
| 201 |
+
current_year = "2024"
|
| 202 |
+
continue
|
| 203 |
+
elif "2023" in first_col:
|
| 204 |
+
current_year = "2023"
|
| 205 |
+
continue
|
| 206 |
+
|
| 207 |
+
# Parse aging buckets
|
| 208 |
+
if current_year and "Considered good" in first_col:
|
| 209 |
+
aging_data[current_year] = {
|
| 210 |
+
"0_6_months": self.clean_value(row.iloc[1]) if len(row) > 1 else None,
|
| 211 |
+
"6_12_months": self.clean_value(row.iloc[2]) if len(row) > 2 else None,
|
| 212 |
+
"1_2_years": self.clean_value(row.iloc[3]) if len(row) > 3 else None,
|
| 213 |
+
"2_3_years": self.clean_value(row.iloc[4]) if len(row) > 4 else None,
|
| 214 |
+
"more_than_3_years": self.clean_value(row.iloc[5]) if len(row) > 5 else None,
|
| 215 |
+
"total": self.clean_value(row.iloc[6]) if len(row) > 6 else None
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
return aging_data
|
| 219 |
+
|
| 220 |
+
def process_single_csv(self, file_path: str) -> Dict[str, Any]:
|
| 221 |
+
"""
|
| 222 |
+
Process a single CSV file with intelligent parsing.
|
| 223 |
+
Returns a dictionary of processed data.
|
| 224 |
+
"""
|
| 225 |
+
try:
|
| 226 |
+
df = pd.read_csv(file_path, encoding='utf-8')
|
| 227 |
+
filename = os.path.basename(file_path)
|
| 228 |
+
result = {
|
| 229 |
+
"file_name": filename,
|
| 230 |
+
"processing_date": datetime.now().isoformat()
|
| 231 |
+
}
|
| 232 |
+
# Special handling for different note types
|
| 233 |
+
if "Note_9" in filename:
|
| 234 |
+
result["fixed_assets"] = self.parse_fixed_assets(df)
|
| 235 |
+
elif "Note_2_to_8" in filename or "Note_10_to_15" in filename:
|
| 236 |
+
result["notes"] = self.identify_note_sections(df)
|
| 237 |
+
if any("Age wise analysis" in str(cell) for row in df.values for cell in row):
|
| 238 |
+
result["trade_receivables_aging"] = self.parse_trade_receivables_aging(df)
|
| 239 |
+
else:
|
| 240 |
+
result["notes"] = self.identify_note_sections(df)
|
| 241 |
+
return result
|
| 242 |
+
except Exception as e:
|
| 243 |
+
logger.error(f"Error processing {file_path}: {e}")
|
| 244 |
+
return {
|
| 245 |
+
"file_name": os.path.basename(file_path),
|
| 246 |
+
"error": str(e),
|
| 247 |
+
"processing_date": datetime.now().isoformat()
|
| 248 |
+
}
|
| 249 |
+
|
| 250 |
+
def process_all_csvs(self) -> Dict[str, Any]:
|
| 251 |
+
"""
|
| 252 |
+
Process all CSV files and create meaningful financial JSON.
|
| 253 |
+
Returns the structured financial data.
|
| 254 |
+
"""
|
| 255 |
+
if not os.path.exists(self.csv_folder_path):
|
| 256 |
+
logger.error(f"Folder {self.csv_folder_path} not found")
|
| 257 |
+
return {"error": f"Folder {self.csv_folder_path} not found"}
|
| 258 |
+
csv_files = [f for f in os.listdir(self.csv_folder_path) if f.endswith('.csv')]
|
| 259 |
+
if not csv_files:
|
| 260 |
+
logger.error(f"No CSV files found in {self.csv_folder_path}")
|
| 261 |
+
return {"error": f"No CSV files found in {self.csv_folder_path}"}
|
| 262 |
+
# Structure similar to csv_json_bs.py
|
| 263 |
+
financial_data = {
|
| 264 |
+
"company_financial_data": {
|
| 265 |
+
"processing_summary": {
|
| 266 |
+
"total_files": len(csv_files),
|
| 267 |
+
"processing_date": datetime.now().isoformat(),
|
| 268 |
+
"processed_files": []
|
| 269 |
+
},
|
| 270 |
+
"share_capital": {},
|
| 271 |
+
"reserves_and_surplus": {},
|
| 272 |
+
"borrowings": {},
|
| 273 |
+
"current_liabilities": {},
|
| 274 |
+
"fixed_assets": {},
|
| 275 |
+
"current_assets": {},
|
| 276 |
+
"loans_and_advances": {},
|
| 277 |
+
"other_data": {}
|
| 278 |
+
}
|
| 279 |
+
}
|
| 280 |
+
for csv_file in csv_files:
|
| 281 |
+
file_path = os.path.join(self.csv_folder_path, csv_file)
|
| 282 |
+
file_data = self.process_single_csv(file_path)
|
| 283 |
+
if "error" not in file_data:
|
| 284 |
+
financial_data["company_financial_data"]["processing_summary"]["processed_files"].append(csv_file)
|
| 285 |
+
if "notes" in file_data:
|
| 286 |
+
for note_title, note_data in file_data["notes"].items():
|
| 287 |
+
if "Share capital" in note_title:
|
| 288 |
+
financial_data["company_financial_data"]["share_capital"] = note_data
|
| 289 |
+
elif "Reserves and surplus" in note_title:
|
| 290 |
+
financial_data["company_financial_data"]["reserves_and_surplus"] = note_data
|
| 291 |
+
elif "borrowings" in note_title.lower():
|
| 292 |
+
financial_data["company_financial_data"]["borrowings"][note_title] = note_data
|
| 293 |
+
elif any(x in note_title.lower() for x in ["payables", "liabilities", "provisions"]):
|
| 294 |
+
financial_data["company_financial_data"]["current_liabilities"][note_title] = note_data
|
| 295 |
+
elif any(x in note_title.lower() for x in ["receivables", "cash", "inventories"]):
|
| 296 |
+
financial_data["company_financial_data"]["current_assets"][note_title] = note_data
|
| 297 |
+
elif any(x in note_title.lower() for x in ["loans", "advances"]):
|
| 298 |
+
financial_data["company_financial_data"]["loans_and_advances"][note_title] = note_data
|
| 299 |
+
else:
|
| 300 |
+
financial_data["company_financial_data"]["other_data"][note_title] = note_data
|
| 301 |
+
if "fixed_assets" in file_data:
|
| 302 |
+
financial_data["company_financial_data"]["fixed_assets"] = file_data["fixed_assets"]
|
| 303 |
+
if "trade_receivables_aging" in file_data:
|
| 304 |
+
financial_data["company_financial_data"]["current_assets"]["trade_receivables_aging"] = file_data["trade_receivables_aging"]
|
| 305 |
+
return financial_data
|
| 306 |
+
|
| 307 |
+
|
| 308 |
+
def save_to_json(self, output_path: str = settings.output_json) -> str:
|
| 309 |
+
"""
|
| 310 |
+
Process all CSVs and save meaningful financial JSON.
|
| 311 |
+
Returns the output file path.
|
| 312 |
+
"""
|
| 313 |
+
financial_data = self.process_all_csvs()
|
| 314 |
+
with open(output_path, 'w', encoding='utf-8') as f:
|
| 315 |
+
json.dump(financial_data, f, indent=2, ensure_ascii=False, default=str)
|
| 316 |
+
logger.info(f"Clean cashflow financial JSON created: {output_path}")
|
| 317 |
+
return output_path
|
| 318 |
+
|
| 319 |
+
# Usage
|
| 320 |
+
if __name__ == "__main__":
|
| 321 |
+
mapper = FinancialCSVMapper(settings.csv_folder_path)
|
| 322 |
+
output_file = mapper.save_to_json(settings.output_json)
|
| 323 |
+
logger.info(f"Clean cashflow financial JSON created: {output_file}")
|
cf/sircodecf.py
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import sys
|
| 4 |
+
import logging
|
| 5 |
+
from typing import Optional
|
| 6 |
+
from pydantic import BaseModel, Field
|
| 7 |
+
from pydantic_settings import BaseSettings
|
| 8 |
+
|
| 9 |
+
# Ensure stdout encoding for Unicode
|
| 10 |
+
sys.stdout.reconfigure(encoding='utf-8')
|
| 11 |
+
|
| 12 |
+
# Configure logging
|
| 13 |
+
logging.basicConfig(level=logging.INFO)
|
| 14 |
+
logger = logging.getLogger(__name__)
|
| 15 |
+
|
| 16 |
+
class Settings(BaseSettings):
|
| 17 |
+
"""Settings for Cash Flow Statement CSV extraction, loaded from environment variables or .env file."""
|
| 18 |
+
excel_file_path: str = Field(default="In Lakhs BS_FY 23-24 V5 - Final.xlsx", env="CFS_EXCEL_FILE_PATH")
|
| 19 |
+
output_folder: str = Field(default="csv_notes_cfs", env="CFS_OUTPUT_FOLDER")
|
| 20 |
+
note_16_23_sheet: str = Field(default="Note 16-23", env="CFS_NOTE_16_23_SHEET")
|
| 21 |
+
note_2_8_sheet: str = Field(default="Note 2 - 8", env="CFS_NOTE_2_8_SHEET")
|
| 22 |
+
note_9_sheet: str = Field(default="Note 9", env="CFS_NOTE_9_SHEET")
|
| 23 |
+
note_10_15_sheet: str = Field(default="Note 10-15", env="CFS_NOTE_10_15_SHEET")
|
| 24 |
+
note_24_30_sheet: str = Field(default="Note 24-30", env="CFS_NOTE_24_30_SHEET")
|
| 25 |
+
skiprows: int = Field(default=3, env="CFS_SKIPROWS")
|
| 26 |
+
|
| 27 |
+
settings = Settings()
|
| 28 |
+
|
| 29 |
+
class NoteCSVInfo(BaseModel):
|
| 30 |
+
name: str
|
| 31 |
+
rows: int
|
| 32 |
+
|
| 33 |
+
def clean_note(sheet_name: str, skiprows: int = settings.skiprows) -> pd.DataFrame:
|
| 34 |
+
"""
|
| 35 |
+
Parse and clean a sheet from the Excel file.
|
| 36 |
+
Drops empty rows and columns, resets index.
|
| 37 |
+
"""
|
| 38 |
+
df = xls.parse(sheet_name, skiprows=skiprows)
|
| 39 |
+
df = df.dropna(how='all').dropna(axis=1, how='all').reset_index(drop=True)
|
| 40 |
+
return df
|
| 41 |
+
|
| 42 |
+
def export_note_to_csv(df: pd.DataFrame, filename: str, output_folder: str) -> NoteCSVInfo:
|
| 43 |
+
"""
|
| 44 |
+
Export DataFrame to CSV and return info.
|
| 45 |
+
"""
|
| 46 |
+
output_path = os.path.join(output_folder, filename)
|
| 47 |
+
df.to_csv(output_path, index=False)
|
| 48 |
+
return NoteCSVInfo(name=filename, rows=df.shape[0])
|
| 49 |
+
|
| 50 |
+
def main() -> None:
|
| 51 |
+
"""
|
| 52 |
+
Main function to extract notes from Excel and export as CSVs.
|
| 53 |
+
"""
|
| 54 |
+
# Use command-line argument for Excel file path if provided
|
| 55 |
+
excel_path = settings.excel_file_path
|
| 56 |
+
if len(sys.argv) > 1:
|
| 57 |
+
excel_path = sys.argv[1]
|
| 58 |
+
logger.info(f"Excel file path from argument: {excel_path}")
|
| 59 |
+
else:
|
| 60 |
+
logger.info(f"Excel file path from settings: {excel_path}")
|
| 61 |
+
global xls
|
| 62 |
+
xls = pd.ExcelFile(excel_path)
|
| 63 |
+
|
| 64 |
+
# Clean each sheet
|
| 65 |
+
note_16_23_df = clean_note(settings.note_16_23_sheet, settings.skiprows)
|
| 66 |
+
note_2_8_df = clean_note(settings.note_2_8_sheet, settings.skiprows)
|
| 67 |
+
note_9_df = clean_note(settings.note_9_sheet, settings.skiprows)
|
| 68 |
+
note_10_15_df = clean_note(settings.note_10_15_sheet, settings.skiprows)
|
| 69 |
+
note_24_30_df = clean_note(settings.note_24_30_sheet, settings.skiprows)
|
| 70 |
+
|
| 71 |
+
# Ensure output folder exists
|
| 72 |
+
os.makedirs(settings.output_folder, exist_ok=True)
|
| 73 |
+
|
| 74 |
+
# Export each as CSV in the folder
|
| 75 |
+
info_16_23 = export_note_to_csv(note_16_23_df, "Note_16_to_23_Full.csv", settings.output_folder)
|
| 76 |
+
info_2_8 = export_note_to_csv(note_2_8_df, "Note_2_to_8_Full.csv", settings.output_folder)
|
| 77 |
+
info_9 = export_note_to_csv(note_9_df, "Note_9_Full.csv", settings.output_folder)
|
| 78 |
+
info_10_15 = export_note_to_csv(note_10_15_df, "Note_10_to_15_Full.csv", settings.output_folder)
|
| 79 |
+
info_24_30 = export_note_to_csv(note_24_30_df, "Note_24_to_30_Full.csv", settings.output_folder)
|
| 80 |
+
|
| 81 |
+
# Log confirmation and row counts
|
| 82 |
+
logger.info(f"Extracted rows: Note 16β23 = {info_16_23.rows} rows")
|
| 83 |
+
logger.info(f"Extracted rows: Note 2β8 = {info_2_8.rows} rows")
|
| 84 |
+
logger.info(f"Extracted rows: Note 9 = {info_9.rows} rows")
|
| 85 |
+
logger.info(f"Extracted rows: Note 10β15 = {info_10_15.rows} rows")
|
| 86 |
+
logger.info(f"Extracted rows: Note 24β30 = {info_24_30.rows} rows")
|
| 87 |
+
|
| 88 |
+
if __name__ == "__main__":
|
| 89 |
+
main()
|