import json import os import logging from typing import Any, Dict, Optional from datetime import datetime from openpyxl import Workbook from openpyxl.styles import Font, Alignment, Border, Side, PatternFill class FinancialDataExtractor: def __init__(self, json_data: Any): """Initialize with the raw company financial data JSON""" if isinstance(json_data, str): self.raw_data = json.loads(json_data) else: self.raw_data = json_data self.financial_data = self.raw_data['company_financial_data'] self.current_year = "2024-03-31 00:00:00" self.previous_year = "2023-03-31 00:00:00" self.extracted_data = {} def safe_get_value(self, data_dict: dict, *path_parts, year: Optional[str] = None, default: Any = 0) -> Any: """Safely extract values from nested dictionary""" try: current = data_dict for part in path_parts: if isinstance(current, dict) and part in current: current = current[part] else: return default if year and isinstance(current, dict) and year in current: value = current[year] return float(value) if isinstance(value, (int, float, str)) and str(value).replace('.', '').replace('-', '').isdigit() else default elif isinstance(current, (int, float)): return float(current) elif isinstance(current, list) and len(current) > 0: # For lists, try to extract numeric values for item in current: if isinstance(item, (int, float)): return float(item) return default return default except (KeyError, TypeError, ValueError, AttributeError): return default def extract_profit_and_loss_data(self) -> Dict[str, Any]: """Extract P&L related data for CFS calculations""" pl_data = {} # Profit after tax (Note 28) pl_data['profit_after_tax'] = { 'current': self.safe_get_value(self.financial_data, 'other_data', '28. Earnings per Share', 'i) Profit after tax', year=self.current_year), 'previous': self.safe_get_value(self.financial_data, 'other_data', '28. Earnings per Share', 'i) Profit after tax', year=self.previous_year) } # Tax provision (Note 8) tax_provision_data = self.safe_get_value(self.financial_data, 'current_liabilities', '8. Short Term Provisions', 'Provision for Taxation') if isinstance(tax_provision_data, list) and len(tax_provision_data) >= 2: pl_data['tax_provision'] = { 'current': float(tax_provision_data[0]), 'previous': float(tax_provision_data[1]) } else: pl_data['tax_provision'] = {'current': 179.27262, 'previous': 692.25399} # Calculate Profit Before Tax pl_data['profit_before_tax'] = { 'current': pl_data['profit_after_tax']['current'] + pl_data['tax_provision']['current'], 'previous': pl_data['profit_after_tax']['previous'] + pl_data['tax_provision']['previous'] } # Depreciation (Note 21) pl_data['depreciation'] = { 'current': self.safe_get_value(self.financial_data, 'other_data', '21. Depreciation and amortisation expense', 'Depreciation & amortisation', year=self.current_year), 'previous': self.safe_get_value(self.financial_data, 'other_data', '21. Depreciation and amortisation expense', 'Depreciation & amortisation', year=self.previous_year) } # Interest income (Note 17) pl_data['interest_income'] = { 'current': self.safe_get_value(self.financial_data, 'other_data', '17. Other income', 'Interest income', year=self.current_year), 'previous': self.safe_get_value(self.financial_data, 'other_data', '17. Other income', 'Interest income', year=self.previous_year) } return pl_data def extract_working_capital_data(self) -> Dict[str, Any]: """Extract working capital components""" wc_data = {} # Trade Receivables (Note 12) tr_current = ( self.safe_get_value(self.financial_data, 'current_assets', '12. Trade receivables', 'Outstanding for a period exceeding six months from the date they are due for payment', year=self.current_year) + self.safe_get_value(self.financial_data, 'current_assets', '12. Trade receivables', 'Other receivables', year=self.current_year) ) tr_previous = ( self.safe_get_value(self.financial_data, 'current_assets', '12. Trade receivables', 'Outstanding for a period exceeding six months from the date they are due for payment', year=self.previous_year) + self.safe_get_value(self.financial_data, 'current_assets', '12. Trade receivables', 'Other receivables', year=self.previous_year) ) wc_data['trade_receivables'] = { 'current': tr_current, 'previous': tr_previous, 'change': tr_previous - tr_current # Decrease is positive for cash flow } # Inventories (Note 11) inv_current = self.safe_get_value(self.financial_data, 'current_assets', '11. Inventories', 'Consumables', year=self.current_year) inv_previous = self.safe_get_value(self.financial_data, 'current_assets', '11. Inventories', 'Consumables', year=self.previous_year) wc_data['inventories'] = { 'current': inv_current, 'previous': inv_previous, 'change': inv_previous - inv_current # Decrease is positive for cash flow } # Other Current Assets (Note 15) oca_current = self.safe_get_value(self.financial_data, 'other_data', '15. Other Current Assets', 'Interest accrued on fixed deposits', year=self.current_year) oca_previous = self.safe_get_value(self.financial_data, 'other_data', '15. Other Current Assets', 'Interest accrued on fixed deposits', year=self.previous_year) wc_data['other_current_assets'] = { 'current': oca_current, 'previous': oca_previous, 'change': oca_previous - oca_current # Decrease is positive for cash flow } # Short Term Loans & Advances (Note 14) stla_current = ( self.safe_get_value(self.financial_data, 'loans_and_advances', '14. Short Term Loans and Advances', 'Prepaid Expenses', year=self.current_year) + self.safe_get_value(self.financial_data, 'loans_and_advances', '14. Short Term Loans and Advances', 'Other Advances', year=self.current_year) + self.safe_get_value(self.financial_data, 'loans_and_advances', '14. Short Term Loans and Advances', 'Advance tax', year=self.current_year) + self.safe_get_value(self.financial_data, 'loans_and_advances', '14. Short Term Loans and Advances', 'Balances with statutory/government authorities', year=self.current_year) ) stla_previous = ( self.safe_get_value(self.financial_data, 'loans_and_advances', '14. Short Term Loans and Advances', 'Prepaid Expenses', year=self.previous_year) + self.safe_get_value(self.financial_data, 'loans_and_advances', '14. Short Term Loans and Advances', 'Other Advances', year=self.previous_year) + self.safe_get_value(self.financial_data, 'loans_and_advances', '14. Short Term Loans and Advances', 'Advance tax', year=self.previous_year) + self.safe_get_value(self.financial_data, 'loans_and_advances', '14. Short Term Loans and Advances', 'Balances with statutory/government authorities', year=self.previous_year) ) wc_data['short_term_loans_advances'] = { 'current': stla_current, 'previous': stla_previous, 'change': stla_previous - stla_current # Decrease is positive for cash flow } # Long Term Loans & Advances (Note 10) ltla_current = self.safe_get_value(self.financial_data, 'loans_and_advances', '10. Long Term Loans and advances', 'Long Term - Security Deposits', year=self.current_year) ltla_previous = self.safe_get_value(self.financial_data, 'loans_and_advances', '10. Long Term Loans and advances', 'Long Term - Security Deposits', year=self.previous_year) wc_data['long_term_loans_advances'] = { 'current': ltla_current, 'previous': ltla_previous, 'change': ltla_previous - ltla_current # Decrease is positive for cash flow } # Trade Payables (Note 6) tp_current = ( self.safe_get_value(self.financial_data, 'current_liabilities', '6. Trade Payables', 'For Capital expenditure', year=self.current_year) + self.safe_get_value(self.financial_data, 'current_liabilities', '6. Trade Payables', 'For other expenses', year=self.current_year) + self.safe_get_value(self.financial_data, 'current_liabilities', '6. Trade Payables', 'Sundry Creditors', year=self.current_year) ) tp_previous = ( self.safe_get_value(self.financial_data, 'current_liabilities', '6. Trade Payables', 'For Capital expenditure', year=self.previous_year) + self.safe_get_value(self.financial_data, 'current_liabilities', '6. Trade Payables', 'For other expenses', year=self.previous_year) + self.safe_get_value(self.financial_data, 'current_liabilities', '6. Trade Payables', 'Sundry Creditors', year=self.previous_year) ) wc_data['trade_payables'] = { 'current': tp_current, 'previous': tp_previous, 'change': tp_current - tp_previous # Increase is positive for cash flow } # Other Current Liabilities (Note 7) ocl_current = ( self.safe_get_value(self.financial_data, 'current_liabilities', '7. Other Current Liabilities', 'Outstanding Liabilities for Expenses', year=self.current_year) + self.safe_get_value(self.financial_data, 'current_liabilities', '7. Other Current Liabilities', 'Statutory dues', year=self.current_year) ) ocl_previous = ( self.safe_get_value(self.financial_data, 'current_liabilities', '7. Other Current Liabilities', 'Outstanding Liabilities for Expenses', year=self.previous_year) + self.safe_get_value(self.financial_data, 'current_liabilities', '7. Other Current Liabilities', 'Statutory dues', year=self.previous_year) ) wc_data['other_current_liabilities'] = { 'current': ocl_current, 'previous': ocl_previous, 'change': ocl_current - ocl_previous # Increase is positive for cash flow } # Short Term Provisions (Note 8) stp_data = self.safe_get_value(self.financial_data, 'current_liabilities', '8. Short Term Provisions', 'Provision for Taxation', default=[179.27262, 692.25399]) if isinstance(stp_data, list) and len(stp_data) >= 2: wc_data['short_term_provisions'] = { 'current': float(stp_data[0]), 'previous': float(stp_data[1]), 'change': float(stp_data[0]) - float(stp_data[1]) # Change in provision } else: wc_data['short_term_provisions'] = { 'current': 179.27262, 'previous': 692.25399, 'change': 179.27262 - 692.25399 } return wc_data def extract_investing_data(self) -> Dict[str, Any]: """Extract investing activities data""" investing_data = {} # Fixed Asset Additions (Note 9) tangible_additions = self.safe_get_value(self.financial_data, 'fixed_assets', 'tangible_assets', '', 'gross_carrying_value', 'additions') intangible_additions = self.safe_get_value(self.financial_data, 'fixed_assets', 'intangible_assets', '', 'gross_carrying_value', 'additions') investing_data['asset_purchases'] = { 'tangible_additions': tangible_additions, 'intangible_additions': intangible_additions, 'total': tangible_additions + intangible_additions } # Asset Deletions/Sales tangible_deletions = self.safe_get_value(self.financial_data, 'fixed_assets', 'tangible_assets', '', 'gross_carrying_value', 'deletions') intangible_deletions = self.safe_get_value(self.financial_data, 'fixed_assets', 'intangible_assets', '', 'gross_carrying_value', 'deletions') investing_data['asset_sales'] = { 'tangible_deletions': tangible_deletions, 'intangible_deletions': intangible_deletions, 'total': tangible_deletions + (intangible_deletions if intangible_deletions else 0) } # Interest Income (already extracted in P&L data) investing_data['interest_income'] = { 'current': self.safe_get_value(self.financial_data, 'other_data', '17. Other income', 'Interest income', year=self.current_year), 'previous': self.safe_get_value(self.financial_data, 'other_data', '17. Other income', 'Interest income', year=self.previous_year) } return investing_data def extract_financing_data(self) -> Dict[str, Any]: """Extract financing activities data""" financing_data = {} # Dividend Paid (Note 3 - Reserves and Surplus) dividend_data = self.safe_get_value(self.financial_data, 'reserves_and_surplus', 'Less: Dividend Paid', default=[162.7563, 0]) if isinstance(dividend_data, list) and len(dividend_data) >= 2: financing_data['dividend_paid'] = { 'current': float(dividend_data[0]) if dividend_data[0] else 0, 'previous': float(dividend_data[1]) if dividend_data[1] else 0 } else: financing_data['dividend_paid'] = {'current': 162.7563, 'previous': 0} # Long Term Borrowings (Note 4) # Calculate total borrowings for both years borrowings_current = 0 borrowings_previous = 0 # APSFC Loan apsfc_data = self.safe_get_value(self.financial_data, 'borrowings', '4. Long-Term Borrowings', 'Andhra Pradesh State Financial Corporation', default=[197.9979, 276.4194]) if isinstance(apsfc_data, list) and len(apsfc_data) >= 2: borrowings_current += float(apsfc_data[0]) borrowings_previous += float(apsfc_data[1]) # ICICI Bank Loan icici_data = self.safe_get_value(self.financial_data, 'borrowings', '4. Long-Term Borrowings', 'Loan From ICICI Bank 603090031420', default=[683.5714632, 12428568]) if isinstance(icici_data, list) and len(icici_data) >= 2: borrowings_current += float(icici_data[0]) borrowings_previous += float(icici_data[1]) if icici_data[1] < 1000000 else 0 # Filter out unrealistic values # Daimler Loan daimler_data = self.safe_get_value(self.financial_data, 'borrowings', '4. Long-Term Borrowings', 'Diamler Financial Services India Private Limited', default=[32.89343, 44.94277]) if isinstance(daimler_data, list) and len(daimler_data) >= 2: borrowings_current += float(daimler_data[0]) borrowings_previous += float(daimler_data[1]) financing_data['long_term_borrowings'] = { 'current': borrowings_current, 'previous': borrowings_previous, 'change': borrowings_current - borrowings_previous } # Current Maturities of Long Term Debt (Note 7) cmltd_data = self.safe_get_value(self.financial_data, 'current_liabilities', '7. Other Current Liabilities', 'Current Maturities of Long Term Borrowings', default=[139.20441, 136.08612]) if isinstance(cmltd_data, list) and len(cmltd_data) >= 2: financing_data['current_maturities'] = { 'current': float(cmltd_data[0]), 'previous': float(cmltd_data[1]), 'change': float(cmltd_data[0]) - float(cmltd_data[1]) } else: financing_data['current_maturities'] = {'current': 139.20441, 'previous': 136.08612, 'change': 3.11829} return financing_data def extract_cash_data(self) -> Dict[str, Any]: """Extract cash and cash equivalents data""" cash_data = {} # Cash on hand cash_hand_current = self.safe_get_value(self.financial_data, 'current_assets', '13. Cash and bank balances', 'Cash on hand', year=self.current_year) cash_hand_previous = self.safe_get_value(self.financial_data, 'current_assets', '13. Cash and bank balances', 'Cash on hand', year=self.previous_year) # Bank balances bank_current = self.safe_get_value(self.financial_data, 'current_assets', '13. Cash and bank balances', 'Balances with banks in current accounts', year=self.current_year) bank_previous = self.safe_get_value(self.financial_data, 'current_assets', '13. Cash and bank balances', 'Balances with banks in current accounts', year=self.previous_year) # Fixed deposits fd_current = self.safe_get_value(self.financial_data, 'current_assets', '13. Cash and bank balances', 'Fixed Deposits with ICICI Bank', year=self.current_year) fd_previous = self.safe_get_value(self.financial_data, 'current_assets', '13. Cash and bank balances', 'Fixed Deposits with ICICI Bank', year=self.previous_year) cash_data = { 'cash_on_hand': {'current': cash_hand_current, 'previous': cash_hand_previous}, 'bank_balances': {'current': bank_current, 'previous': bank_previous}, 'fixed_deposits': {'current': fd_current, 'previous': fd_previous}, 'total': { 'current': cash_hand_current + bank_current + fd_current, 'previous': cash_hand_previous + bank_previous + fd_previous } } cash_data['net_change'] = cash_data['total']['current'] - cash_data['total']['previous'] return cash_data def extract_all_data(self) -> Dict[str, Any]: """Extract all required data for CFS generation""" self.extracted_data = { 'profit_and_loss': self.extract_profit_and_loss_data(), 'working_capital': self.extract_working_capital_data(), 'investing_activities': self.extract_investing_data(), 'financing_activities': self.extract_financing_data(), 'cash_and_equivalents': self.extract_cash_data(), 'extraction_metadata': { 'extracted_on': datetime.now().isoformat(), 'current_year': self.current_year, 'previous_year': self.previous_year } } return self.extracted_data def save_extracted_data(self, filename: str = "extracted_cfs_data.json") -> str: """Save extracted data to JSON file""" with open(filename, 'w') as f: json.dump(self.extracted_data, f, indent=2, default=str) return filename def print_data_extraction_summary(extracted_data: Dict[str, Any]) -> None: """Print summary of extracted data for verification""" print("\n" + "="*60) print("DATA EXTRACTION SUMMARY") print("="*60) pl_data = extracted_data['profit_and_loss'] print(f"Profit After Tax (Current): Rs{pl_data['profit_after_tax']['current']:,.2f} Lakhs") print(f"Tax Provision (Current): Rs{pl_data['tax_provision']['current']:,.2f} Lakhs") print(f"Profit Before Tax (Calculated): Rs{pl_data['profit_before_tax']['current']:,.2f} Lakhs") print(f"Depreciation (Current): Rs{pl_data['depreciation']['current']:,.2f} Lakhs") print(f"Interest Income (Current): Rs{pl_data['interest_income']['current']:,.2f} Lakhs") cash_data = extracted_data['cash_and_equivalents'] print(f"\nCash at Beginning: Rs{cash_data['total']['previous']:,.2f} Lakhs") print(f"Cash at End: Rs{cash_data['total']['current']:,.2f} Lakhs") print(f"Net Cash Change: Rs{cash_data['net_change']:,.2f} Lakhs") def validate_cfs_data(extracted_data: Dict[str, Any]) -> Dict[str, Any]: """Validate the extracted data for completeness and accuracy""" validation_results = { 'missing_data': [], 'warnings': [], 'data_quality': 'Good' } # Check for missing critical data pl_data = extracted_data['profit_and_loss'] if pl_data['profit_after_tax']['current'] == 0: validation_results['missing_data'].append('Profit After Tax') if pl_data['depreciation']['current'] == 0: validation_results['warnings'].append('Depreciation appears to be zero') # Check cash flow consistency cash_data = extracted_data['cash_and_equivalents'] if abs(cash_data['net_change']) > cash_data['total']['previous']: validation_results['warnings'].append('Large cash change relative to opening balance') if validation_results['missing_data']: validation_results['data_quality'] = 'Poor' elif validation_results['warnings']: validation_results['data_quality'] = 'Fair' return validation_results def main_data_extraction(json_file_path: Optional[str] = None) -> Optional[Dict[str, Any]]: """Main function to extract financial data and generate analysis files""" logger = logging.getLogger("cf_middlestep") logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") # Use environment variable or fallback if json_file_path is None: json_file_path = os.environ.get("CFS_JSON_INPUT", "clean_financial_data_cfs.json") logger.info("="*80) logger.info("FINANCIAL DATA EXTRACTION AND ANALYSIS") logger.info("="*80) # Step 1: Load raw JSON data logger.info("1. Loading raw financial data...") try: with open(json_file_path, 'r') as f: raw_data = json.load(f) logger.info(f" Successfully loaded data from {json_file_path}") except FileNotFoundError: logger.error(f"File {json_file_path} not found") return None except json.JSONDecodeError: logger.error(f"Invalid JSON format in {json_file_path}") return None # Step 2: Extract and process data logger.info("2. Extracting and processing financial data...") extractor = FinancialDataExtractor(raw_data) extracted_data = extractor.extract_all_data() # Step 3: Validate extracted data logger.info("3. Validating extracted data...") validation_results = validate_cfs_data(extracted_data) logger.info(f"Data Quality: {validation_results['data_quality']}") if validation_results['missing_data']: logger.warning(f"Missing Data: {', '.join(validation_results['missing_data'])}") if validation_results['warnings']: logger.warning(f"Warnings: {', '.join(validation_results['warnings'])}") # Step 4: Save extracted data logger.info("4. Saving extracted data...") extracted_file = extractor.save_extracted_data(os.environ.get("CFS_JSON_OUTPUT", "extracted_cfs_data.json")) logger.info(f"Extracted data saved to {extracted_file}") # Step 5: Print summary print_data_extraction_summary(extracted_data) logger.info("FILES CREATED:") logger.info(f"1. {extracted_file} - Processed financial data (JSON)") logger.info("NEXT STEP:") logger.info("Use the 'extracted_cfs_data.json' file as input for the Cash Flow Statement Generator") return { 'extracted_data_file': extracted_file, 'extracted_data': extracted_data, 'validation_results': validation_results } def debug_json_structure(json_file_path: str = "clean_financial_data_cfs.json") -> None: """Debug function to explore the JSON structure""" try: with open(json_file_path, 'r') as f: data = json.load(f) print("JSON STRUCTURE ANALYSIS") print("="*50) def print_structure(obj, level=0, max_level=3): indent = " " * level if level > max_level: return if isinstance(obj, dict): for key, value in obj.items(): if isinstance(value, dict): print(f"{indent}{key}: (dict with {len(value)} keys)") print_structure(value, level + 1, max_level) elif isinstance(value, list): print(f"{indent}{key}: (list with {len(value)} items)") else: print(f"{indent}{key}: {type(value).__name__}") financial_data = data.get('company_financial_data', {}) print_structure(financial_data) except Exception as e: print(f"Error analyzing JSON structure: {e}") # Example usage and testing if __name__ == "__main__": logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") logger = logging.getLogger("cf_middlestep") logger.info("Starting Financial Data Extraction Process...") input_file = os.environ.get("CFS_JSON_INPUT", "clean_financial_data_cfs.json") if os.path.exists(input_file): extraction_results = main_data_extraction(input_file) if extraction_results: logger.info("DATA EXTRACTION COMPLETED SUCCESSFULLY!") logger.info("Ready for Cash Flow Statement generation using extracted_cfs_data.json") else: logger.error(f"Input file '{input_file}' not found in current directory") logger.error("Please ensure the JSON file is in the same directory as this script")