FinRyver / bs /bl_llm.py
Sahil Garg
Improved the structure of codebase and updated files code accordingly.
f39814a
"""
Enhanced Balance Sheet Generator
Production-ready script for extracting, analyzing, and generating Excel balance sheets from financial JSON data.
"""
import os
import json
import re
import logging
from datetime import datetime
from typing import Any, Dict, List, Optional
from openpyxl import Workbook
from openpyxl.styles import Font, Border, Side, Alignment
import requests
from dotenv import load_dotenv
from pydantic import BaseModel, Field, ValidationError
from pydantic_settings import BaseSettings
load_dotenv()
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s %(levelname)s %(name)s: %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger(__name__)
class Settings(BaseSettings):
"""Application settings loaded from environment variables or .env file."""
api_key: str = Field(default_factory=lambda: os.getenv("OPENROUTER_API_KEY", ""), env="OPENROUTER_API_KEY")
input_file: str = Field(default="data/clean_financial_data_bs.json", env="INPUT_FILE")
output_dir: str = Field(default="data/output", env="BL_OUTPUT_DIR")
settings = Settings()
class BalanceSheetItem(BaseModel):
category: str
subcategory: Optional[str] = ""
name: str
note: Optional[str] = ""
value_2024: float
value_2023: float
class BalanceSheetTotals(BaseModel):
shareholders_funds_2024: float = 0.0
shareholders_funds_2023: float = 0.0
non_current_liabilities_2024: float = 0.0
non_current_liabilities_2023: float = 0.0
current_liabilities_2024: float = 0.0
current_liabilities_2023: float = 0.0
non_current_assets_2024: float = 0.0
non_current_assets_2023: float = 0.0
current_assets_2024: float = 0.0
current_assets_2023: float = 0.0
total_equity_liabilities_2024: float = 0.0
total_equity_liabilities_2023: float = 0.0
total_assets_2024: float = 0.0
total_assets_2023: float = 0.0
balance_difference_2024: float = 0.0
balance_difference_2023: float = 0.0
class EnhancedBalanceSheetGenerator:
def __init__(self, api_key: str):
self.api_key = api_key
self.base_url = "https://openrouter.ai/api/v1/chat/completions"
# Enhanced mapping with multiple patterns
self.field_mappings = {
# Share Capital patterns
'share_capital': [
'share capital', 'equity share', 'paid up', 'issued shares',
'authorised shares', 'subscribed', 'fully paid'
],
# Reserves patterns
'reserves_surplus': [
'reserves and surplus', 'reserves', 'surplus', 'retained earnings',
'profit and loss', 'general reserves', 'closing balance'
],
# Long term borrowings
'long_term_borrowings': [
'long term borrowings', 'long-term borrowings', 'borrowings',
'debt', 'loans', 'financial corporation', 'bank loan'
],
# Deferred tax
'deferred_tax': [
'deferred tax', 'tax liability', 'deferred tax liability'
],
# Trade payables
'trade_payables': [
'trade payables', 'payables', 'creditors', 'sundry creditors',
'capital expenditure', 'other expenses'
],
# Other current liabilities
'other_current_liabilities': [
'other current liabilities', 'current maturities', 'outstanding liabilities',
'statutory dues', 'accrued expenses'
],
# Short term provisions
'short_term_provisions': [
'short term provisions', 'provisions', 'provision for taxation',
'tax provision'
],
# Fixed assets - Tangible
'tangible_assets': [
'tangible assets', 'property plant', 'fixed assets', 'buildings',
'plant', 'equipment', 'net carrying value'
],
# Fixed assets - Intangible
'intangible_assets': [
'intangible assets', 'software', 'goodwill', 'intangible'
],
# Long term loans and advances
'long_term_loans_advances': [
'long term loans', 'security deposits', 'long term advances'
],
# Inventories
'inventories': [
'inventories', 'stock', 'consumables', 'raw materials'
],
# Trade receivables
'trade_receivables': [
'trade receivables', 'receivables', 'debtors', 'outstanding',
'other receivables'
],
# Cash and bank
'cash_bank': [
'cash and bank', 'cash', 'bank balances', 'current accounts',
'cash on hand', 'fixed deposits'
],
# Short term loans and advances
'short_term_loans_advances': [
'short term loans', 'prepaid expenses', 'other advances',
'advance tax', 'statutory authorities'
],
# Other current assets
'other_current_assets': [
'other current assets', 'accrued income', 'interest accrued'
]
}
def safe_float(self, value: Any) -> float:
"""Convert various value formats to float."""
if not value or str(value).strip() in ['-', '--', 'None', '', 'null']:
return 0.0
# Handle strings
if isinstance(value, str):
# Remove currency symbols and brackets
cleaned = re.sub(r'[₹,Rs\.\s\(\)]', '', value)
# Handle negative values in brackets
if '(' in str(value) and ')' in str(value):
cleaned = '-' + cleaned.replace('(', '').replace(')', '')
try:
return float(cleaned)
except Exception:
return 0.0
# Handle numeric values
try:
return float(value)
except Exception:
return 0.0
def get_value_flexible(self, data: Any, date_key_2024: str = "2024-03-31 00:00:00", date_key_2023: str = "2023-03-31 00:00:00") -> tuple[float, float]:
"""Flexibly extract values from either list or dictionary format."""
if isinstance(data, dict):
# Dictionary format - extract by date keys
val_2024 = self.safe_float(data.get(date_key_2024, 0))
val_2023 = self.safe_float(data.get(date_key_2023, 0))
return val_2024, val_2023
elif isinstance(data, list):
# List format - assume first element is 2024, second is 2023
val_2024 = self.safe_float(data[0]) if len(data) > 0 else 0.0
val_2023 = self.safe_float(data[1]) if len(data) > 1 else 0.0
return val_2024, val_2023
else:
# Single value or other format
val = self.safe_float(data)
return val, 0.0 # Assume it's 2024 value, 2023 is 0
def call_ai_for_analysis(self, data_summary: str) -> Dict[str, Any]:
"""Use AI to analyze and extract balance sheet data"""
prompt = f"""
You are a financial analyst. Extract balance sheet data from the following JSON data and create a properly structured balance sheet.
CRITICAL REQUIREMENTS:
1. Extract ALL line items with their 2024 and 2023 values
2. Calculate missing totals where needed
3. Ensure the balance sheet balances (Assets = Equity + Liabilities)
4. Return ONLY valid JSON in the exact format specified below
Expected Balance Sheet Structure:
- EQUITY AND LIABILITIES
- Shareholders' funds (Share capital, Reserves and surplus)
- Non-Current liabilities (Long term borrowings, Deferred tax liability)
- Current liabilities (Trade payables, Other current liabilities, Short term provisions)
- ASSETS
- Non-current assets (Fixed assets - Tangible/Intangible, Long term loans and advances)
- Current assets (Inventories, Trade receivables, Cash and bank balances, Short-term loans and advances, Other current assets)
Data to analyze:
{data_summary}
Return ONLY this JSON format:
{{
"balance_sheet_items": [
{{
"category": "Shareholders' funds",
"subcategory": "",
"name": "Share capital",
"note": "2",
"value_2024": 542.52,
"value_2023": 542.52
}},
{{
"category": "Shareholders' funds",
"subcategory": "",
"name": "Reserves and surplus",
"note": "3",
"value_2024": 3152.39,
"value_2023": 2642.87
}},
{{
"category": "Non-Current liabilities",
"subcategory": "",
"name": "Long term borrowings",
"note": "4",
"value_2024": 914.46,
"value_2023": 321.36
}}
],
"totals": {{
"shareholders_funds_2024": 3694.91,
"shareholders_funds_2023": 3185.39,
"total_equity_liabilities_2024": 5246.10,
"total_equity_liabilities_2023": 4725.23,
"total_assets_2024": 5246.10,
"total_assets_2023": 4725.23
}}
}}
"""
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
}
payload = {
"model": "anthropic/claude-3.5-sonnet",
"messages": [{"role": "user", "content": prompt}],
"temperature": 0.1,
"max_tokens": 4000
}
try:
response = requests.post(self.base_url, headers=headers, json=payload, timeout=60)
content = response.json()['choices'][0]['message']['content']
# Clean the response
content = re.sub(r'```(?:json)?\s*', '', content).strip('`').strip()
return json.loads(content)
except Exception as e:
logger.error(f"AI analysis failed: {e}")
return {"balance_sheet_items": [], "totals": {}}
def extract_from_json_structure(self, json_data: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Direct extraction from the structured JSON data with flexible list/dict support"""
items = []
company_data = json_data.get("company_financial_data", {})
# Extract Share Capital
share_capital = company_data.get("share_capital", {})
total_share_capital = share_capital.get("Total issued, subscribed and fully paid-up share capital", {})
if total_share_capital:
val_2024, val_2023 = self.get_value_flexible(total_share_capital)
if val_2024 or val_2023:
items.append({
"category": "Shareholders' funds",
"name": "Share capital",
"note": "2",
"value_2024": val_2024,
"value_2023": val_2023
})
# Extract Reserves and Surplus
reserves = company_data.get("reserves_and_surplus", {})
closing_balance = reserves.get("Balance, at the end of the year", {})
if closing_balance:
val_2024, val_2023 = self.get_value_flexible(closing_balance)
if val_2024 or val_2023:
items.append({
"category": "Shareholders' funds",
"name": "Reserves and surplus",
"note": "3",
"value_2024": val_2024,
"value_2023": val_2023
})
# Extract Long-term Borrowings
borrowings = company_data.get("borrowings", {}).get("4. Long-Term Borrowings", {})
total_borrowings_2024 = 0
total_borrowings_2023 = 0
for key, value in borrowings.items():
if key != "_metadata" and value is not None:
val_2024, val_2023 = self.get_value_flexible(value)
total_borrowings_2024 += val_2024
total_borrowings_2023 += val_2023
if total_borrowings_2024 or total_borrowings_2023:
items.append({
"category": "Non-Current liabilities",
"name": "Long term borrowings",
"note": "4",
"value_2024": total_borrowings_2024,
"value_2023": total_borrowings_2023
})
# Extract Deferred Tax
deferred_tax = company_data.get("other_data", {}).get("5. Deferred Tax Liability / (Asset)", {})
if deferred_tax:
dtl = deferred_tax.get("Deferred tax liability", {})
val_2024, val_2023 = self.get_value_flexible(dtl)
if val_2024 or val_2023:
items.append({
"category": "Non-Current liabilities",
"name": "Deferred Tax Liability (Net)",
"note": "5",
"value_2024": val_2024,
"value_2023": val_2023
})
# Extract Current Liabilities
current_liabilities = company_data.get("current_liabilities", {})
# Trade Payables
trade_payables = current_liabilities.get("6. Trade Payables", {})
tp_2024 = tp_2023 = 0
for key, value in trade_payables.items():
if key not in ["_metadata", "Particulars", "Disputed dues"] and value is not None:
val_2024, val_2023 = self.get_value_flexible(value)
tp_2024 += val_2024
tp_2023 += val_2023
if tp_2024 or tp_2023:
items.append({
"category": "Current liabilities",
"name": "Trade payables",
"note": "6",
"value_2024": tp_2024,
"value_2023": tp_2023
})
# Other Current Liabilities
other_cl = current_liabilities.get("7. Other Current Liabilities", {})
ocl_2024 = ocl_2023 = 0
for key, value in other_cl.items():
if key != "_metadata" and value is not None:
val_2024, val_2023 = self.get_value_flexible(value)
ocl_2024 += val_2024
ocl_2023 += val_2023
if ocl_2024 or ocl_2023:
items.append({
"category": "Current liabilities",
"name": "Other current liabilities",
"note": "7",
"value_2024": ocl_2024,
"value_2023": ocl_2023
})
# Short Term Provisions
provisions = current_liabilities.get("8. Short Term Provisions", {})
prov_2024 = prov_2023 = 0
for key, value in provisions.items():
if key != "_metadata" and value is not None:
val_2024, val_2023 = self.get_value_flexible(value)
prov_2024 += val_2024
prov_2023 += val_2023
if prov_2024 or prov_2023:
items.append({
"category": "Current liabilities",
"name": "Short term provisions",
"note": "8",
"value_2024": prov_2024,
"value_2023": prov_2023
})
# Extract Fixed Assets
fixed_assets = company_data.get("fixed_assets", {})
# Tangible Assets
tangible = fixed_assets.get("tangible_assets", {}).get("", {})
if tangible:
net_carrying = tangible.get("net_carrying_value", {})
if net_carrying:
# Handle both dict and list formats for net carrying value
if isinstance(net_carrying, dict):
val_2024 = self.safe_float(net_carrying.get("closing", 0))
val_2023 = self.safe_float(net_carrying.get("opening", 0))
else:
val_2024, val_2023 = self.get_value_flexible(net_carrying)
if val_2024 or val_2023:
items.append({
"category": "Non-current assets",
"subcategory": "Fixed assets",
"name": "Tangible assets",
"note": "9",
"value_2024": val_2024,
"value_2023": val_2023
})
# Intangible Assets
intangible = fixed_assets.get("intangible_assets", {}).get("", {})
if intangible:
net_carrying = intangible.get("net_carrying_value", {})
if net_carrying:
# Handle both dict and list formats for net carrying value
if isinstance(net_carrying, dict):
val_2024 = self.safe_float(net_carrying.get("closing", 0))
val_2023 = self.safe_float(net_carrying.get("opening", 0))
else:
val_2024, val_2023 = self.get_value_flexible(net_carrying)
if val_2024 or val_2023:
items.append({
"category": "Non-current assets",
"subcategory": "Fixed assets",
"name": "Intangible assets",
"note": "9",
"value_2024": val_2024,
"value_2023": val_2023
})
# Long Term Loans and Advances
lt_loans = company_data.get("loans_and_advances", {}).get("10. Long Term Loans and advances", {})
lt_2024 = lt_2023 = 0
for key, value in lt_loans.items():
if key != "_metadata" and value is not None:
val_2024, val_2023 = self.get_value_flexible(value)
lt_2024 += val_2024
lt_2023 += val_2023
if lt_2024 or lt_2023:
items.append({
"category": "Non-current assets",
"name": "Long Term Loans and Advances",
"note": "10",
"value_2024": lt_2024,
"value_2023": lt_2023
})
# Extract Current Assets
current_assets = company_data.get("current_assets", {})
# Inventories
inventories = current_assets.get("11. Inventories", {})
inv_2024 = inv_2023 = 0
for key, value in inventories.items():
if key != "_metadata" and value is not None:
val_2024, val_2023 = self.get_value_flexible(value)
inv_2024 += val_2024
inv_2023 += val_2023
if inv_2024 or inv_2023:
items.append({
"category": "Current assets",
"name": "Inventories",
"note": "11",
"value_2024": inv_2024,
"value_2023": inv_2023
})
# Trade Receivables
trade_recv = current_assets.get("12. Trade receivables", {})
tr_2024 = tr_2023 = 0
for key, value in trade_recv.items():
if key not in ["_metadata", "Particulars", "trade_receivables_aging"] and value is not None:
val_2024, val_2023 = self.get_value_flexible(value)
tr_2024 += val_2024
tr_2023 += val_2023
if tr_2024 or tr_2023:
items.append({
"category": "Current assets",
"name": "Trade receivables",
"note": "12",
"value_2024": tr_2024,
"value_2023": tr_2023
})
# Cash and Bank Balances
cash_bank = current_assets.get("13. Cash and bank balances", {})
cb_2024 = cb_2023 = 0
for key, value in cash_bank.items():
if key != "_metadata" and value is not None:
val_2024, val_2023 = self.get_value_flexible(value)
cb_2024 += val_2024
cb_2023 += val_2023
if cb_2024 or cb_2023:
items.append({
"category": "Current assets",
"name": "Cash and bank balances",
"note": "13",
"value_2024": cb_2024,
"value_2023": cb_2023
})
# Short-term Loans and Advances
st_loans = company_data.get("loans_and_advances", {}).get("14. Short Term Loans and Advances", {})
st_2024 = st_2023 = 0
for key, value in st_loans.items():
if key != "_metadata" and value is not None:
val_2024, val_2023 = self.get_value_flexible(value)
st_2024 += val_2024
st_2023 += val_2023
if st_2024 or st_2023:
items.append({
"category": "Current assets",
"name": "Short-term loans and advances",
"note": "14",
"value_2024": st_2024,
"value_2023": st_2023
})
# Other Current Assets
other_ca = company_data.get("other_data", {}).get("15. Other Current Assets", {})
oca_2024 = oca_2023 = 0
for key, value in other_ca.items():
if key != "_metadata" and value is not None:
val_2024, val_2023 = self.get_value_flexible(value)
oca_2024 += val_2024
oca_2023 += val_2023
if oca_2024 or oca_2023:
items.append({
"category": "Current assets",
"name": "Other current assets",
"note": "15",
"value_2024": oca_2024,
"value_2023": oca_2023
})
return items
def calculate_totals(self, items: List[Dict[str, Any]]) -> BalanceSheetTotals:
"""Calculate section totals and verify balance"""
totals = {}
# Group by categories
categories = {}
for item in items:
cat = item["category"]
if cat not in categories:
categories[cat] = {"2024": 0, "2023": 0}
categories[cat]["2024"] += item["value_2024"]
categories[cat]["2023"] += item["value_2023"]
# Calculate major totals
shareholders_funds_2024 = categories.get("Shareholders' funds", {}).get("2024", 0)
shareholders_funds_2023 = categories.get("Shareholders' funds", {}).get("2023", 0)
non_current_liab_2024 = categories.get("Non-Current liabilities", {}).get("2024", 0)
non_current_liab_2023 = categories.get("Non-Current liabilities", {}).get("2023", 0)
current_liab_2024 = categories.get("Current liabilities", {}).get("2024", 0)
current_liab_2023 = categories.get("Current liabilities", {}).get("2023", 0)
non_current_assets_2024 = categories.get("Non-current assets", {}).get("2024", 0)
non_current_assets_2023 = categories.get("Non-current assets", {}).get("2023", 0)
current_assets_2024 = categories.get("Current assets", {}).get("2024", 0)
current_assets_2023 = categories.get("Current assets", {}).get("2023", 0)
total_equity_liab_2024 = shareholders_funds_2024 + non_current_liab_2024 + current_liab_2024
total_equity_liab_2023 = shareholders_funds_2023 + non_current_liab_2023 + current_liab_2023
total_assets_2024 = non_current_assets_2024 + current_assets_2024
total_assets_2023 = non_current_assets_2023 + current_assets_2023
return BalanceSheetTotals(
shareholders_funds_2024=shareholders_funds_2024,
shareholders_funds_2023=shareholders_funds_2023,
non_current_liabilities_2024=non_current_liab_2024,
non_current_liabilities_2023=non_current_liab_2023,
current_liabilities_2024=current_liab_2024,
current_liabilities_2023=current_liab_2023,
non_current_assets_2024=non_current_assets_2024,
non_current_assets_2023=non_current_assets_2023,
current_assets_2024=current_assets_2024,
current_assets_2023=current_assets_2023,
total_equity_liabilities_2024=total_equity_liab_2024,
total_equity_liabilities_2023=total_equity_liab_2023,
total_assets_2024=total_assets_2024,
total_assets_2023=total_assets_2023,
balance_difference_2024=abs(total_assets_2024 - total_equity_liab_2024),
balance_difference_2023=abs(total_assets_2023 - total_equity_liab_2023)
)
def generate_balance_sheet_excel(self, items: List[Dict[str, Any]], totals: BalanceSheetTotals, output_dir: str = "output") -> str:
"""Generate formatted Excel balance sheet"""
os.makedirs(output_dir, exist_ok=True)
wb = Workbook()
ws = wb.active
ws.title = "Balance Sheet"
# Set column widths
ws.column_dimensions["A"].width = 40
ws.column_dimensions["B"].width = 8
ws.column_dimensions["C"].width = 15
ws.column_dimensions["D"].width = 15
# Styles
bold_font = Font(bold=True)
thin_border = Border(
left=Side(style='thin'), right=Side(style='thin'),
top=Side(style='thin'), bottom=Side(style='thin')
)
row = 1
def add_row(desc, note, val_2024, val_2023, bold=False, indent=0, border=False):
nonlocal row
# Description
cell_a = ws.cell(row=row, column=1, value=" " * indent + desc)
if bold:
cell_a.font = bold_font
if border:
cell_a.border = thin_border
# Note
cell_b = ws.cell(row=row, column=2, value=note)
if bold:
cell_b.font = bold_font
if border:
cell_b.border = thin_border
# Values
for col, val in [(3, val_2024), (4, val_2023)]:
cell = ws.cell(row=row, column=col)
if val != 0:
cell.value = val
cell.number_format = '#,##0.00'
if bold:
cell.font = bold_font
if border:
cell.border = thin_border
cell.alignment = Alignment(horizontal='right')
row += 1
# Header
add_row("Balance Sheet as at March 31, 2024", "", 0, 0, True)
add_row("", "", 0, 0)
add_row("(In Lakhs)", "", 0, 0)
add_row("", "Notes", "March 31, 2024", "March 31, 2023", True)
add_row("", "", 0, 0)
# EQUITY AND LIABILITIES
add_row("EQUITY AND LIABILITIES", "", 0, 0, True)
# Shareholders' funds
add_row("Shareholders' funds", "", 0, 0, True)
shareholders_items = [item for item in items if item["category"] == "Shareholders' funds"]
for item in shareholders_items:
add_row(item["name"], item["note"], item["value_2024"], item["value_2023"])
add_row("", "", totals.shareholders_funds_2024, totals.shareholders_funds_2023, True)
add_row("", "", 0, 0)
# Non-Current liabilities
add_row("Non-Current liabilities", "", 0, 0, True)
non_current_liab_items = [item for item in items if item["category"] == "Non-Current liabilities"]
for item in non_current_liab_items:
add_row(item["name"], item["note"], item["value_2024"], item["value_2023"])
add_row("", "", totals.non_current_liabilities_2024, totals.non_current_liabilities_2023, True)
add_row("", "", 0, 0)
# Current liabilities
add_row("Current liabilities", "", 0, 0, True)
current_liab_items = [item for item in items if item["category"] == "Current liabilities"]
for item in current_liab_items:
add_row(item["name"], item["note"], item["value_2024"], item["value_2023"])
add_row("", "", totals.current_liabilities_2024, totals.current_liabilities_2023, True)
add_row("", "", 0, 0)
# TOTAL EQUITY & LIABILITIES
add_row("TOTAL", "", totals.total_equity_liabilities_2024, totals.total_equity_liabilities_2023, True, 0, True)
add_row("", "", 0, 0)
# ASSETS
add_row("ASSETS", "", 0, 0, True)
# Non-current assets
add_row("Non-current assets", "", 0, 0, True)
# Fixed assets
fixed_asset_items = [item for item in items if item.get("subcategory") == "Fixed assets"]
if fixed_asset_items:
add_row("Fixed assets", "", 0, 0, True, 1)
fixed_total_2024 = fixed_total_2023 = 0
for item in fixed_asset_items:
add_row(item["name"], item["note"], item["value_2024"], item["value_2023"], False, 2)
fixed_total_2024 += item["value_2024"]
fixed_total_2023 += item["value_2023"]
add_row("", "", fixed_total_2024, fixed_total_2023, True, 2)
# Other non-current assets
other_non_current = [item for item in items if item["category"] == "Non-current assets" and item.get("subcategory") != "Fixed assets"]
for item in other_non_current:
add_row(item["name"], item["note"], item["value_2024"], item["value_2023"], False, 1)
add_row("", "", totals.non_current_assets_2024, totals.non_current_assets_2023, True)
add_row("", "", 0, 0)
# Current assets
add_row("Current assets", "", 0, 0, True)
current_asset_items = [item for item in items if item["category"] == "Current assets"]
for item in current_asset_items:
add_row(item["name"], item["note"], item["value_2024"], item["value_2023"], False, 1)
add_row("", "", totals.current_assets_2024, totals.current_assets_2023, True)
add_row("", "", 0, 0)
# TOTAL ASSETS
add_row("TOTAL", "", totals.total_assets_2024, totals.total_assets_2023, True, 0, True)
# Add balance verification
add_row("", "", 0, 0)
balance_2024 = totals.balance_difference_2024
balance_2023 = totals.balance_difference_2023
if balance_2024 < 0.01 and balance_2023 < 0.01:
add_row(" Balance Sheet is BALANCED", "", 0, 0, True)
else:
add_row(f" Balance Difference: {balance_2024:.2f} | {balance_2023:.2f}", "", 0, 0, True)
# Save file
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_file = os.path.join(output_dir, f"balance_sheet_{timestamp}.xlsx")
wb.save(output_file)
logger.info(f"Output file: {output_file}")
print(f"Output file: {os.path.abspath(output_file)}") # For API subprocess parsing
return output_file
def process(self, input_file: str, output_dir: str = "output") -> Optional[str]:
"""Main processing function"""
try:
logger.info(f"Processing: {input_file}")
# Load JSON data
with open(input_file, 'r', encoding='utf-8') as f:
json_data = json.load(f)
logger.info("Extracting data from JSON structure...")
# Method 1: Direct extraction from structured JSON
items = self.extract_from_json_structure(json_data)
logger.info(f"Extracted {len(items)} items from JSON structure")
# Method 2: AI-assisted extraction if needed
if len(items) < 10: # If we don't have enough items
logger.info("Using AI for additional extraction...")
# Create summary for AI
summary = json.dumps(json_data, indent=2)[:8000] # Limit size
ai_result = self.call_ai_for_analysis(summary)
ai_items = ai_result.get("balance_sheet_items", [])
logger.info(f"AI extracted {len(ai_items)} additional items")
# Merge items (avoid duplicates)
existing_names = {item["name"].lower() for item in items}
for ai_item in ai_items:
if ai_item["name"].lower() not in existing_names:
items.append(ai_item)
if not items:
logger.error("No balance sheet items extracted")
return None
# Calculate totals
totals = self.calculate_totals(items)
# Display summary
logger.info(f"\n BALANCE SHEET SUMMARY:")
logger.info(f"Total Items Extracted: {len(items)}")
logger.info(f"Assets 2024: Rs. {totals.total_assets_2024:,.2f} Lakhs")
logger.info(f"Equity & Liabilities 2024: Rs. {totals.total_equity_liabilities_2024:,.2f} Lakhs")
logger.info(f"Balance Difference 2024: Rs. {totals.balance_difference_2024:,.2f} Lakhs")
logger.info(f"Assets 2023: Rs. {totals.total_assets_2023:,.2f} Lakhs")
logger.info(f"Equity & Liabilities 2023: Rs. {totals.total_equity_liabilities_2023:,.2f} Lakhs")
logger.info(f"Balance Difference 2023: Rs. {totals.balance_difference_2023:,.2f} Lakhs")
# Check if balanced
is_balanced_2024 = totals.balance_difference_2024 < 0.01
is_balanced_2023 = totals.balance_difference_2023 < 0.01
if is_balanced_2024 and is_balanced_2023:
logger.info("Balance Sheet is PERFECTLY BALANCED!")
else:
logger.warning("Balance Sheet has differences - may need adjustment")
# Generate Excel
output_file = self.generate_balance_sheet_excel(items, totals, output_dir)
logger.info(f"SUCCESS: Generated {output_file}")
return output_file
except Exception as e:
logger.error(f"Error processing file: {e}", exc_info=True)
return None
def main() -> None:
"""
Main function for running the balance sheet generator.
Accepts input file and output directory from command-line arguments or environment variables.
Handles errors gracefully and logs all major events.
"""
logger.info("ENHANCED BALANCE SHEET GENERATOR v2.0 started.")
import sys
api_key = settings.api_key
input_file = settings.input_file
output_dir = settings.output_dir
if len(sys.argv) > 1:
input_file = sys.argv[1]
if len(sys.argv) > 2:
output_dir = sys.argv[2]
if not api_key:
logger.error("Missing OPENROUTER_API_KEY environment variable. Please set your OpenRouter API key in the .env file.")
return
if not os.path.exists(input_file):
logger.error(f"Input file not found: {input_file}. Please ensure your JSON data file exists.")
return
generator = EnhancedBalanceSheetGenerator(api_key)
try:
result = generator.process(input_file, output_dir)
if result:
abs_path = os.path.abspath(result)
if os.path.exists(abs_path):
logger.info(f"COMPLETED SUCCESSFULLY! Output file: {abs_path}")
print(f"Output file: {abs_path}") # For API subprocess parsing
else:
logger.error(f"PROCESSING FAILED. Output file not created: {abs_path}")
else:
logger.error("PROCESSING FAILED. Please check the error messages above and try again.")
except Exception as e:
logger.error(f"Fatal error: {e}", exc_info=True)
if __name__ == "__main__":
main()