""" Enhanced Balance Sheet Generator Production-ready script for extracting, analyzing, and generating Excel balance sheets from financial JSON data. """ import os import json import re import logging from datetime import datetime from typing import Any, Dict, List, Optional from openpyxl import Workbook from openpyxl.styles import Font, Border, Side, Alignment import requests from dotenv import load_dotenv from pydantic import BaseModel, Field, ValidationError from pydantic_settings import BaseSettings load_dotenv() # Configure logging logging.basicConfig( level=logging.INFO, format='%(asctime)s %(levelname)s %(name)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S' ) logger = logging.getLogger(__name__) class Settings(BaseSettings): """Application settings loaded from environment variables or .env file.""" api_key: str = Field(default_factory=lambda: os.getenv("OPENROUTER_API_KEY", ""), env="OPENROUTER_API_KEY") input_file: str = Field(default="data/clean_financial_data_bs.json", env="INPUT_FILE") output_dir: str = Field(default="data/output", env="BL_OUTPUT_DIR") settings = Settings() class BalanceSheetItem(BaseModel): category: str subcategory: Optional[str] = "" name: str note: Optional[str] = "" value_2024: float value_2023: float class BalanceSheetTotals(BaseModel): shareholders_funds_2024: float = 0.0 shareholders_funds_2023: float = 0.0 non_current_liabilities_2024: float = 0.0 non_current_liabilities_2023: float = 0.0 current_liabilities_2024: float = 0.0 current_liabilities_2023: float = 0.0 non_current_assets_2024: float = 0.0 non_current_assets_2023: float = 0.0 current_assets_2024: float = 0.0 current_assets_2023: float = 0.0 total_equity_liabilities_2024: float = 0.0 total_equity_liabilities_2023: float = 0.0 total_assets_2024: float = 0.0 total_assets_2023: float = 0.0 balance_difference_2024: float = 0.0 balance_difference_2023: float = 0.0 class EnhancedBalanceSheetGenerator: def __init__(self, api_key: str): self.api_key = api_key self.base_url = "https://openrouter.ai/api/v1/chat/completions" # Enhanced mapping with multiple patterns self.field_mappings = { # Share Capital patterns 'share_capital': [ 'share capital', 'equity share', 'paid up', 'issued shares', 'authorised shares', 'subscribed', 'fully paid' ], # Reserves patterns 'reserves_surplus': [ 'reserves and surplus', 'reserves', 'surplus', 'retained earnings', 'profit and loss', 'general reserves', 'closing balance' ], # Long term borrowings 'long_term_borrowings': [ 'long term borrowings', 'long-term borrowings', 'borrowings', 'debt', 'loans', 'financial corporation', 'bank loan' ], # Deferred tax 'deferred_tax': [ 'deferred tax', 'tax liability', 'deferred tax liability' ], # Trade payables 'trade_payables': [ 'trade payables', 'payables', 'creditors', 'sundry creditors', 'capital expenditure', 'other expenses' ], # Other current liabilities 'other_current_liabilities': [ 'other current liabilities', 'current maturities', 'outstanding liabilities', 'statutory dues', 'accrued expenses' ], # Short term provisions 'short_term_provisions': [ 'short term provisions', 'provisions', 'provision for taxation', 'tax provision' ], # Fixed assets - Tangible 'tangible_assets': [ 'tangible assets', 'property plant', 'fixed assets', 'buildings', 'plant', 'equipment', 'net carrying value' ], # Fixed assets - Intangible 'intangible_assets': [ 'intangible assets', 'software', 'goodwill', 'intangible' ], # Long term loans and advances 'long_term_loans_advances': [ 'long term loans', 'security deposits', 'long term advances' ], # Inventories 'inventories': [ 'inventories', 'stock', 'consumables', 'raw materials' ], # Trade receivables 'trade_receivables': [ 'trade receivables', 'receivables', 'debtors', 'outstanding', 'other receivables' ], # Cash and bank 'cash_bank': [ 'cash and bank', 'cash', 'bank balances', 'current accounts', 'cash on hand', 'fixed deposits' ], # Short term loans and advances 'short_term_loans_advances': [ 'short term loans', 'prepaid expenses', 'other advances', 'advance tax', 'statutory authorities' ], # Other current assets 'other_current_assets': [ 'other current assets', 'accrued income', 'interest accrued' ] } def safe_float(self, value: Any) -> float: """Convert various value formats to float.""" if not value or str(value).strip() in ['-', '--', 'None', '', 'null']: return 0.0 # Handle strings if isinstance(value, str): # Remove currency symbols and brackets cleaned = re.sub(r'[₹,Rs\.\s\(\)]', '', value) # Handle negative values in brackets if '(' in str(value) and ')' in str(value): cleaned = '-' + cleaned.replace('(', '').replace(')', '') try: return float(cleaned) except Exception: return 0.0 # Handle numeric values try: return float(value) except Exception: return 0.0 def get_value_flexible(self, data: Any, date_key_2024: str = "2024-03-31 00:00:00", date_key_2023: str = "2023-03-31 00:00:00") -> tuple[float, float]: """Flexibly extract values from either list or dictionary format.""" if isinstance(data, dict): # Dictionary format - extract by date keys val_2024 = self.safe_float(data.get(date_key_2024, 0)) val_2023 = self.safe_float(data.get(date_key_2023, 0)) return val_2024, val_2023 elif isinstance(data, list): # List format - assume first element is 2024, second is 2023 val_2024 = self.safe_float(data[0]) if len(data) > 0 else 0.0 val_2023 = self.safe_float(data[1]) if len(data) > 1 else 0.0 return val_2024, val_2023 else: # Single value or other format val = self.safe_float(data) return val, 0.0 # Assume it's 2024 value, 2023 is 0 def call_ai_for_analysis(self, data_summary: str) -> Dict[str, Any]: """Use AI to analyze and extract balance sheet data""" prompt = f""" You are a financial analyst. Extract balance sheet data from the following JSON data and create a properly structured balance sheet. CRITICAL REQUIREMENTS: 1. Extract ALL line items with their 2024 and 2023 values 2. Calculate missing totals where needed 3. Ensure the balance sheet balances (Assets = Equity + Liabilities) 4. Return ONLY valid JSON in the exact format specified below Expected Balance Sheet Structure: - EQUITY AND LIABILITIES - Shareholders' funds (Share capital, Reserves and surplus) - Non-Current liabilities (Long term borrowings, Deferred tax liability) - Current liabilities (Trade payables, Other current liabilities, Short term provisions) - ASSETS - Non-current assets (Fixed assets - Tangible/Intangible, Long term loans and advances) - Current assets (Inventories, Trade receivables, Cash and bank balances, Short-term loans and advances, Other current assets) Data to analyze: {data_summary} Return ONLY this JSON format: {{ "balance_sheet_items": [ {{ "category": "Shareholders' funds", "subcategory": "", "name": "Share capital", "note": "2", "value_2024": 542.52, "value_2023": 542.52 }}, {{ "category": "Shareholders' funds", "subcategory": "", "name": "Reserves and surplus", "note": "3", "value_2024": 3152.39, "value_2023": 2642.87 }}, {{ "category": "Non-Current liabilities", "subcategory": "", "name": "Long term borrowings", "note": "4", "value_2024": 914.46, "value_2023": 321.36 }} ], "totals": {{ "shareholders_funds_2024": 3694.91, "shareholders_funds_2023": 3185.39, "total_equity_liabilities_2024": 5246.10, "total_equity_liabilities_2023": 4725.23, "total_assets_2024": 5246.10, "total_assets_2023": 4725.23 }} }} """ headers = { "Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json" } payload = { "model": "anthropic/claude-3.5-sonnet", "messages": [{"role": "user", "content": prompt}], "temperature": 0.1, "max_tokens": 4000 } try: response = requests.post(self.base_url, headers=headers, json=payload, timeout=60) content = response.json()['choices'][0]['message']['content'] # Clean the response content = re.sub(r'```(?:json)?\s*', '', content).strip('`').strip() return json.loads(content) except Exception as e: logger.error(f"AI analysis failed: {e}") return {"balance_sheet_items": [], "totals": {}} def extract_from_json_structure(self, json_data: Dict[str, Any]) -> List[Dict[str, Any]]: """Direct extraction from the structured JSON data with flexible list/dict support""" items = [] company_data = json_data.get("company_financial_data", {}) # Extract Share Capital share_capital = company_data.get("share_capital", {}) total_share_capital = share_capital.get("Total issued, subscribed and fully paid-up share capital", {}) if total_share_capital: val_2024, val_2023 = self.get_value_flexible(total_share_capital) if val_2024 or val_2023: items.append({ "category": "Shareholders' funds", "name": "Share capital", "note": "2", "value_2024": val_2024, "value_2023": val_2023 }) # Extract Reserves and Surplus reserves = company_data.get("reserves_and_surplus", {}) closing_balance = reserves.get("Balance, at the end of the year", {}) if closing_balance: val_2024, val_2023 = self.get_value_flexible(closing_balance) if val_2024 or val_2023: items.append({ "category": "Shareholders' funds", "name": "Reserves and surplus", "note": "3", "value_2024": val_2024, "value_2023": val_2023 }) # Extract Long-term Borrowings borrowings = company_data.get("borrowings", {}).get("4. Long-Term Borrowings", {}) total_borrowings_2024 = 0 total_borrowings_2023 = 0 for key, value in borrowings.items(): if key != "_metadata" and value is not None: val_2024, val_2023 = self.get_value_flexible(value) total_borrowings_2024 += val_2024 total_borrowings_2023 += val_2023 if total_borrowings_2024 or total_borrowings_2023: items.append({ "category": "Non-Current liabilities", "name": "Long term borrowings", "note": "4", "value_2024": total_borrowings_2024, "value_2023": total_borrowings_2023 }) # Extract Deferred Tax deferred_tax = company_data.get("other_data", {}).get("5. Deferred Tax Liability / (Asset)", {}) if deferred_tax: dtl = deferred_tax.get("Deferred tax liability", {}) val_2024, val_2023 = self.get_value_flexible(dtl) if val_2024 or val_2023: items.append({ "category": "Non-Current liabilities", "name": "Deferred Tax Liability (Net)", "note": "5", "value_2024": val_2024, "value_2023": val_2023 }) # Extract Current Liabilities current_liabilities = company_data.get("current_liabilities", {}) # Trade Payables trade_payables = current_liabilities.get("6. Trade Payables", {}) tp_2024 = tp_2023 = 0 for key, value in trade_payables.items(): if key not in ["_metadata", "Particulars", "Disputed dues"] and value is not None: val_2024, val_2023 = self.get_value_flexible(value) tp_2024 += val_2024 tp_2023 += val_2023 if tp_2024 or tp_2023: items.append({ "category": "Current liabilities", "name": "Trade payables", "note": "6", "value_2024": tp_2024, "value_2023": tp_2023 }) # Other Current Liabilities other_cl = current_liabilities.get("7. Other Current Liabilities", {}) ocl_2024 = ocl_2023 = 0 for key, value in other_cl.items(): if key != "_metadata" and value is not None: val_2024, val_2023 = self.get_value_flexible(value) ocl_2024 += val_2024 ocl_2023 += val_2023 if ocl_2024 or ocl_2023: items.append({ "category": "Current liabilities", "name": "Other current liabilities", "note": "7", "value_2024": ocl_2024, "value_2023": ocl_2023 }) # Short Term Provisions provisions = current_liabilities.get("8. Short Term Provisions", {}) prov_2024 = prov_2023 = 0 for key, value in provisions.items(): if key != "_metadata" and value is not None: val_2024, val_2023 = self.get_value_flexible(value) prov_2024 += val_2024 prov_2023 += val_2023 if prov_2024 or prov_2023: items.append({ "category": "Current liabilities", "name": "Short term provisions", "note": "8", "value_2024": prov_2024, "value_2023": prov_2023 }) # Extract Fixed Assets fixed_assets = company_data.get("fixed_assets", {}) # Tangible Assets tangible = fixed_assets.get("tangible_assets", {}).get("", {}) if tangible: net_carrying = tangible.get("net_carrying_value", {}) if net_carrying: # Handle both dict and list formats for net carrying value if isinstance(net_carrying, dict): val_2024 = self.safe_float(net_carrying.get("closing", 0)) val_2023 = self.safe_float(net_carrying.get("opening", 0)) else: val_2024, val_2023 = self.get_value_flexible(net_carrying) if val_2024 or val_2023: items.append({ "category": "Non-current assets", "subcategory": "Fixed assets", "name": "Tangible assets", "note": "9", "value_2024": val_2024, "value_2023": val_2023 }) # Intangible Assets intangible = fixed_assets.get("intangible_assets", {}).get("", {}) if intangible: net_carrying = intangible.get("net_carrying_value", {}) if net_carrying: # Handle both dict and list formats for net carrying value if isinstance(net_carrying, dict): val_2024 = self.safe_float(net_carrying.get("closing", 0)) val_2023 = self.safe_float(net_carrying.get("opening", 0)) else: val_2024, val_2023 = self.get_value_flexible(net_carrying) if val_2024 or val_2023: items.append({ "category": "Non-current assets", "subcategory": "Fixed assets", "name": "Intangible assets", "note": "9", "value_2024": val_2024, "value_2023": val_2023 }) # Long Term Loans and Advances lt_loans = company_data.get("loans_and_advances", {}).get("10. Long Term Loans and advances", {}) lt_2024 = lt_2023 = 0 for key, value in lt_loans.items(): if key != "_metadata" and value is not None: val_2024, val_2023 = self.get_value_flexible(value) lt_2024 += val_2024 lt_2023 += val_2023 if lt_2024 or lt_2023: items.append({ "category": "Non-current assets", "name": "Long Term Loans and Advances", "note": "10", "value_2024": lt_2024, "value_2023": lt_2023 }) # Extract Current Assets current_assets = company_data.get("current_assets", {}) # Inventories inventories = current_assets.get("11. Inventories", {}) inv_2024 = inv_2023 = 0 for key, value in inventories.items(): if key != "_metadata" and value is not None: val_2024, val_2023 = self.get_value_flexible(value) inv_2024 += val_2024 inv_2023 += val_2023 if inv_2024 or inv_2023: items.append({ "category": "Current assets", "name": "Inventories", "note": "11", "value_2024": inv_2024, "value_2023": inv_2023 }) # Trade Receivables trade_recv = current_assets.get("12. Trade receivables", {}) tr_2024 = tr_2023 = 0 for key, value in trade_recv.items(): if key not in ["_metadata", "Particulars", "trade_receivables_aging"] and value is not None: val_2024, val_2023 = self.get_value_flexible(value) tr_2024 += val_2024 tr_2023 += val_2023 if tr_2024 or tr_2023: items.append({ "category": "Current assets", "name": "Trade receivables", "note": "12", "value_2024": tr_2024, "value_2023": tr_2023 }) # Cash and Bank Balances cash_bank = current_assets.get("13. Cash and bank balances", {}) cb_2024 = cb_2023 = 0 for key, value in cash_bank.items(): if key != "_metadata" and value is not None: val_2024, val_2023 = self.get_value_flexible(value) cb_2024 += val_2024 cb_2023 += val_2023 if cb_2024 or cb_2023: items.append({ "category": "Current assets", "name": "Cash and bank balances", "note": "13", "value_2024": cb_2024, "value_2023": cb_2023 }) # Short-term Loans and Advances st_loans = company_data.get("loans_and_advances", {}).get("14. Short Term Loans and Advances", {}) st_2024 = st_2023 = 0 for key, value in st_loans.items(): if key != "_metadata" and value is not None: val_2024, val_2023 = self.get_value_flexible(value) st_2024 += val_2024 st_2023 += val_2023 if st_2024 or st_2023: items.append({ "category": "Current assets", "name": "Short-term loans and advances", "note": "14", "value_2024": st_2024, "value_2023": st_2023 }) # Other Current Assets other_ca = company_data.get("other_data", {}).get("15. Other Current Assets", {}) oca_2024 = oca_2023 = 0 for key, value in other_ca.items(): if key != "_metadata" and value is not None: val_2024, val_2023 = self.get_value_flexible(value) oca_2024 += val_2024 oca_2023 += val_2023 if oca_2024 or oca_2023: items.append({ "category": "Current assets", "name": "Other current assets", "note": "15", "value_2024": oca_2024, "value_2023": oca_2023 }) return items def calculate_totals(self, items: List[Dict[str, Any]]) -> BalanceSheetTotals: """Calculate section totals and verify balance""" totals = {} # Group by categories categories = {} for item in items: cat = item["category"] if cat not in categories: categories[cat] = {"2024": 0, "2023": 0} categories[cat]["2024"] += item["value_2024"] categories[cat]["2023"] += item["value_2023"] # Calculate major totals shareholders_funds_2024 = categories.get("Shareholders' funds", {}).get("2024", 0) shareholders_funds_2023 = categories.get("Shareholders' funds", {}).get("2023", 0) non_current_liab_2024 = categories.get("Non-Current liabilities", {}).get("2024", 0) non_current_liab_2023 = categories.get("Non-Current liabilities", {}).get("2023", 0) current_liab_2024 = categories.get("Current liabilities", {}).get("2024", 0) current_liab_2023 = categories.get("Current liabilities", {}).get("2023", 0) non_current_assets_2024 = categories.get("Non-current assets", {}).get("2024", 0) non_current_assets_2023 = categories.get("Non-current assets", {}).get("2023", 0) current_assets_2024 = categories.get("Current assets", {}).get("2024", 0) current_assets_2023 = categories.get("Current assets", {}).get("2023", 0) total_equity_liab_2024 = shareholders_funds_2024 + non_current_liab_2024 + current_liab_2024 total_equity_liab_2023 = shareholders_funds_2023 + non_current_liab_2023 + current_liab_2023 total_assets_2024 = non_current_assets_2024 + current_assets_2024 total_assets_2023 = non_current_assets_2023 + current_assets_2023 return BalanceSheetTotals( shareholders_funds_2024=shareholders_funds_2024, shareholders_funds_2023=shareholders_funds_2023, non_current_liabilities_2024=non_current_liab_2024, non_current_liabilities_2023=non_current_liab_2023, current_liabilities_2024=current_liab_2024, current_liabilities_2023=current_liab_2023, non_current_assets_2024=non_current_assets_2024, non_current_assets_2023=non_current_assets_2023, current_assets_2024=current_assets_2024, current_assets_2023=current_assets_2023, total_equity_liabilities_2024=total_equity_liab_2024, total_equity_liabilities_2023=total_equity_liab_2023, total_assets_2024=total_assets_2024, total_assets_2023=total_assets_2023, balance_difference_2024=abs(total_assets_2024 - total_equity_liab_2024), balance_difference_2023=abs(total_assets_2023 - total_equity_liab_2023) ) def generate_balance_sheet_excel(self, items: List[Dict[str, Any]], totals: BalanceSheetTotals, output_dir: str = "output") -> str: """Generate formatted Excel balance sheet""" os.makedirs(output_dir, exist_ok=True) wb = Workbook() ws = wb.active ws.title = "Balance Sheet" # Set column widths ws.column_dimensions["A"].width = 40 ws.column_dimensions["B"].width = 8 ws.column_dimensions["C"].width = 15 ws.column_dimensions["D"].width = 15 # Styles bold_font = Font(bold=True) thin_border = Border( left=Side(style='thin'), right=Side(style='thin'), top=Side(style='thin'), bottom=Side(style='thin') ) row = 1 def add_row(desc, note, val_2024, val_2023, bold=False, indent=0, border=False): nonlocal row # Description cell_a = ws.cell(row=row, column=1, value=" " * indent + desc) if bold: cell_a.font = bold_font if border: cell_a.border = thin_border # Note cell_b = ws.cell(row=row, column=2, value=note) if bold: cell_b.font = bold_font if border: cell_b.border = thin_border # Values for col, val in [(3, val_2024), (4, val_2023)]: cell = ws.cell(row=row, column=col) if val != 0: cell.value = val cell.number_format = '#,##0.00' if bold: cell.font = bold_font if border: cell.border = thin_border cell.alignment = Alignment(horizontal='right') row += 1 # Header add_row("Balance Sheet as at March 31, 2024", "", 0, 0, True) add_row("", "", 0, 0) add_row("(In Lakhs)", "", 0, 0) add_row("", "Notes", "March 31, 2024", "March 31, 2023", True) add_row("", "", 0, 0) # EQUITY AND LIABILITIES add_row("EQUITY AND LIABILITIES", "", 0, 0, True) # Shareholders' funds add_row("Shareholders' funds", "", 0, 0, True) shareholders_items = [item for item in items if item["category"] == "Shareholders' funds"] for item in shareholders_items: add_row(item["name"], item["note"], item["value_2024"], item["value_2023"]) add_row("", "", totals.shareholders_funds_2024, totals.shareholders_funds_2023, True) add_row("", "", 0, 0) # Non-Current liabilities add_row("Non-Current liabilities", "", 0, 0, True) non_current_liab_items = [item for item in items if item["category"] == "Non-Current liabilities"] for item in non_current_liab_items: add_row(item["name"], item["note"], item["value_2024"], item["value_2023"]) add_row("", "", totals.non_current_liabilities_2024, totals.non_current_liabilities_2023, True) add_row("", "", 0, 0) # Current liabilities add_row("Current liabilities", "", 0, 0, True) current_liab_items = [item for item in items if item["category"] == "Current liabilities"] for item in current_liab_items: add_row(item["name"], item["note"], item["value_2024"], item["value_2023"]) add_row("", "", totals.current_liabilities_2024, totals.current_liabilities_2023, True) add_row("", "", 0, 0) # TOTAL EQUITY & LIABILITIES add_row("TOTAL", "", totals.total_equity_liabilities_2024, totals.total_equity_liabilities_2023, True, 0, True) add_row("", "", 0, 0) # ASSETS add_row("ASSETS", "", 0, 0, True) # Non-current assets add_row("Non-current assets", "", 0, 0, True) # Fixed assets fixed_asset_items = [item for item in items if item.get("subcategory") == "Fixed assets"] if fixed_asset_items: add_row("Fixed assets", "", 0, 0, True, 1) fixed_total_2024 = fixed_total_2023 = 0 for item in fixed_asset_items: add_row(item["name"], item["note"], item["value_2024"], item["value_2023"], False, 2) fixed_total_2024 += item["value_2024"] fixed_total_2023 += item["value_2023"] add_row("", "", fixed_total_2024, fixed_total_2023, True, 2) # Other non-current assets other_non_current = [item for item in items if item["category"] == "Non-current assets" and item.get("subcategory") != "Fixed assets"] for item in other_non_current: add_row(item["name"], item["note"], item["value_2024"], item["value_2023"], False, 1) add_row("", "", totals.non_current_assets_2024, totals.non_current_assets_2023, True) add_row("", "", 0, 0) # Current assets add_row("Current assets", "", 0, 0, True) current_asset_items = [item for item in items if item["category"] == "Current assets"] for item in current_asset_items: add_row(item["name"], item["note"], item["value_2024"], item["value_2023"], False, 1) add_row("", "", totals.current_assets_2024, totals.current_assets_2023, True) add_row("", "", 0, 0) # TOTAL ASSETS add_row("TOTAL", "", totals.total_assets_2024, totals.total_assets_2023, True, 0, True) # Add balance verification add_row("", "", 0, 0) balance_2024 = totals.balance_difference_2024 balance_2023 = totals.balance_difference_2023 if balance_2024 < 0.01 and balance_2023 < 0.01: add_row(" Balance Sheet is BALANCED", "", 0, 0, True) else: add_row(f" Balance Difference: {balance_2024:.2f} | {balance_2023:.2f}", "", 0, 0, True) # Save file timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_file = os.path.join(output_dir, f"balance_sheet_{timestamp}.xlsx") wb.save(output_file) logger.info(f"Output file: {output_file}") print(f"Output file: {os.path.abspath(output_file)}") # For API subprocess parsing return output_file def process(self, input_file: str, output_dir: str = "output") -> Optional[str]: """Main processing function""" try: logger.info(f"Processing: {input_file}") # Load JSON data with open(input_file, 'r', encoding='utf-8') as f: json_data = json.load(f) logger.info("Extracting data from JSON structure...") # Method 1: Direct extraction from structured JSON items = self.extract_from_json_structure(json_data) logger.info(f"Extracted {len(items)} items from JSON structure") # Method 2: AI-assisted extraction if needed if len(items) < 10: # If we don't have enough items logger.info("Using AI for additional extraction...") # Create summary for AI summary = json.dumps(json_data, indent=2)[:8000] # Limit size ai_result = self.call_ai_for_analysis(summary) ai_items = ai_result.get("balance_sheet_items", []) logger.info(f"AI extracted {len(ai_items)} additional items") # Merge items (avoid duplicates) existing_names = {item["name"].lower() for item in items} for ai_item in ai_items: if ai_item["name"].lower() not in existing_names: items.append(ai_item) if not items: logger.error("No balance sheet items extracted") return None # Calculate totals totals = self.calculate_totals(items) # Display summary logger.info(f"\n BALANCE SHEET SUMMARY:") logger.info(f"Total Items Extracted: {len(items)}") logger.info(f"Assets 2024: Rs. {totals.total_assets_2024:,.2f} Lakhs") logger.info(f"Equity & Liabilities 2024: Rs. {totals.total_equity_liabilities_2024:,.2f} Lakhs") logger.info(f"Balance Difference 2024: Rs. {totals.balance_difference_2024:,.2f} Lakhs") logger.info(f"Assets 2023: Rs. {totals.total_assets_2023:,.2f} Lakhs") logger.info(f"Equity & Liabilities 2023: Rs. {totals.total_equity_liabilities_2023:,.2f} Lakhs") logger.info(f"Balance Difference 2023: Rs. {totals.balance_difference_2023:,.2f} Lakhs") # Check if balanced is_balanced_2024 = totals.balance_difference_2024 < 0.01 is_balanced_2023 = totals.balance_difference_2023 < 0.01 if is_balanced_2024 and is_balanced_2023: logger.info("Balance Sheet is PERFECTLY BALANCED!") else: logger.warning("Balance Sheet has differences - may need adjustment") # Generate Excel output_file = self.generate_balance_sheet_excel(items, totals, output_dir) logger.info(f"SUCCESS: Generated {output_file}") return output_file except Exception as e: logger.error(f"Error processing file: {e}", exc_info=True) return None def main() -> None: """ Main function for running the balance sheet generator. Accepts input file and output directory from command-line arguments or environment variables. Handles errors gracefully and logs all major events. """ logger.info("ENHANCED BALANCE SHEET GENERATOR v2.0 started.") import sys api_key = settings.api_key input_file = settings.input_file output_dir = settings.output_dir if len(sys.argv) > 1: input_file = sys.argv[1] if len(sys.argv) > 2: output_dir = sys.argv[2] if not api_key: logger.error("Missing OPENROUTER_API_KEY environment variable. Please set your OpenRouter API key in the .env file.") return if not os.path.exists(input_file): logger.error(f"Input file not found: {input_file}. Please ensure your JSON data file exists.") return generator = EnhancedBalanceSheetGenerator(api_key) try: result = generator.process(input_file, output_dir) if result: abs_path = os.path.abspath(result) if os.path.exists(abs_path): logger.info(f"COMPLETED SUCCESSFULLY! Output file: {abs_path}") print(f"Output file: {abs_path}") # For API subprocess parsing else: logger.error(f"PROCESSING FAILED. Output file not created: {abs_path}") else: logger.error("PROCESSING FAILED. Please check the error messages above and try again.") except Exception as e: logger.error(f"Fatal error: {e}", exc_info=True) if __name__ == "__main__": main()