Spaces:
Sleeping
Sleeping
Update notes/llm_notes_generator.py
#5
by
dipan004
- opened
- notes/llm_notes_generator.py +688 -250
notes/llm_notes_generator.py
CHANGED
|
@@ -10,6 +10,7 @@ class FlexibleFinancialNoteGenerator:
|
|
| 10 |
def generate_all_notes(self, trial_balance_path=None):
|
| 11 |
# Placeholder logic
|
| 12 |
return {"dummy": True}
|
|
|
|
| 13 |
import json
|
| 14 |
import os
|
| 15 |
import logging
|
|
@@ -23,10 +24,11 @@ from typing import Dict, List, Any, Optional, Tuple
|
|
| 23 |
import pandas as pd
|
| 24 |
from pydantic import BaseModel, ValidationError
|
| 25 |
from pydantic_settings import BaseSettings
|
|
|
|
| 26 |
from utils.utils import convert_note_json_to_lakhs
|
| 27 |
|
| 28 |
# Load environment variables
|
| 29 |
-
load_dotenv()
|
| 30 |
|
| 31 |
# Configure logging
|
| 32 |
logging.basicConfig(level=logging.INFO)
|
|
@@ -73,61 +75,32 @@ class FlexibleFinancialNoteGenerator:
|
|
| 73 |
"X-Title": "Financial Note Generator"
|
| 74 |
}
|
| 75 |
self.note_templates = self.load_note_templates()
|
| 76 |
-
|
| 77 |
self.recommended_models = [
|
| 78 |
-
|
| 79 |
-
|
|
|
|
|
|
|
| 80 |
]
|
| 81 |
|
| 82 |
-
def _init_account_patterns(self) -> Dict[str, Dict[str, Any]]:
|
| 83 |
-
"""Initialize account classification patterns."""
|
| 84 |
-
return {
|
| 85 |
-
"10": {
|
| 86 |
-
"keywords": ["security deposit", "long term advance", "deposit", "advance recoverable"],
|
| 87 |
-
"groups": ["Long Term Loans and Advances", "Non-Current Assets"],
|
| 88 |
-
"exclude_keywords": ["short term", "current", "trade"]
|
| 89 |
-
},
|
| 90 |
-
"11": {
|
| 91 |
-
"keywords": ["inventory", "stock", "raw material", "finished goods", "work in progress", "consumables"],
|
| 92 |
-
"groups": ["Inventories", "Current Assets"],
|
| 93 |
-
"exclude_keywords": ["advance", "deposit"]
|
| 94 |
-
},
|
| 95 |
-
"12": {
|
| 96 |
-
"keywords": ["trade receivable", "debtors", "accounts receivable", "sundry debtors"],
|
| 97 |
-
"groups": ["Trade Receivables", "Current Assets"],
|
| 98 |
-
"exclude_keywords": ["advance", "deposit"]
|
| 99 |
-
},
|
| 100 |
-
"13": {
|
| 101 |
-
"keywords": ["cash", "bank", "petty cash", "cash on hand", "current account", "savings account", "fixed deposit"],
|
| 102 |
-
"groups": ["Cash and Bank Balances", "Current Assets"],
|
| 103 |
-
"exclude_keywords": ["advance", "loan"]
|
| 104 |
-
},
|
| 105 |
-
"14": {
|
| 106 |
-
"keywords": ["prepaid", "advance", "short term", "employee advance", "supplier advance", "advance tax", "tds", "gst", "statutory"],
|
| 107 |
-
"groups": ["Short Term Loans and Advances", "Current Assets"],
|
| 108 |
-
"exclude_keywords": ["long term", "security deposit"]
|
| 109 |
-
},
|
| 110 |
-
"15": {
|
| 111 |
-
"keywords": ["interest accrued", "accrued income", "other current", "miscellaneous current"],
|
| 112 |
-
"groups": ["Other Current Assets", "Current Assets"],
|
| 113 |
-
"exclude_keywords": ["trade", "advance"]
|
| 114 |
-
}
|
| 115 |
-
}
|
| 116 |
-
|
| 117 |
def load_note_templates(self) -> Dict[str, Any]:
|
| 118 |
-
"""Load note templates from
|
| 119 |
try:
|
| 120 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
return note_templates
|
| 122 |
except ImportError as e:
|
| 123 |
-
logger.error(f"Error importing note_templates from
|
| 124 |
return {}
|
| 125 |
except Exception as e:
|
| 126 |
logger.error(f"Unexpected error loading note_templates: {e}")
|
| 127 |
return {}
|
| 128 |
|
| 129 |
def load_trial_balance(self, file_path: str = settings.trial_balance_json) -> Optional[Dict[str, Any]]:
|
| 130 |
-
"""Load the
|
| 131 |
try:
|
| 132 |
if file_path.endswith('.json'):
|
| 133 |
with open(file_path, 'r', encoding='utf-8') as f:
|
|
@@ -156,186 +129,155 @@ class FlexibleFinancialNoteGenerator:
|
|
| 156 |
logger.error(f"Error loading trial balance: {e}")
|
| 157 |
return None
|
| 158 |
|
| 159 |
-
def
|
| 160 |
-
"""
|
| 161 |
-
if not trial_balance_data or "accounts" not in trial_balance_data:
|
| 162 |
-
return []
|
| 163 |
-
|
| 164 |
-
classified_accounts = []
|
| 165 |
-
patterns = self.account_patterns.get(note_number, {})
|
| 166 |
-
keywords = patterns.get("keywords", [])
|
| 167 |
-
groups = patterns.get("groups", [])
|
| 168 |
-
exclude_keywords = patterns.get("exclude_keywords", [])
|
| 169 |
-
|
| 170 |
-
for account in trial_balance_data["accounts"]:
|
| 171 |
-
account_name = account.get("account_name", "").lower()
|
| 172 |
-
account_group = account.get("group", "")
|
| 173 |
-
|
| 174 |
-
if any(exclude_word.lower() in account_name for exclude_word in exclude_keywords):
|
| 175 |
-
continue
|
| 176 |
-
|
| 177 |
-
keyword_match = any(keyword.lower() in account_name for keyword in keywords)
|
| 178 |
-
group_match = account_group in groups
|
| 179 |
-
|
| 180 |
-
if keyword_match or group_match:
|
| 181 |
-
classified_accounts.append(account)
|
| 182 |
-
|
| 183 |
-
logger.info(f"Classified {len(classified_accounts)} accounts for Note {note_number}")
|
| 184 |
-
return classified_accounts
|
| 185 |
-
|
| 186 |
-
def safe_amount_conversion(self, amount: Any, conversion_factor: float = 100000) -> float:
|
| 187 |
-
"""Safely convert amount to lakhs"""
|
| 188 |
-
try:
|
| 189 |
-
if isinstance(amount, str):
|
| 190 |
-
cleaned = re.sub(r'[^\d.-]', '', amount)
|
| 191 |
-
amount_float = float(cleaned) if cleaned else 0.0
|
| 192 |
-
else:
|
| 193 |
-
amount_float = float(amount) if amount is not None else 0.0
|
| 194 |
-
return round(amount_float / conversion_factor, 2)
|
| 195 |
-
except (ValueError, TypeError):
|
| 196 |
-
return 0.0
|
| 197 |
-
|
| 198 |
-
def calculate_totals(self, accounts: List[Dict[str, Any]], conversion_factor: float = 100000) -> Tuple[float, float]:
|
| 199 |
-
"""Calculate totals with safe amount conversion"""
|
| 200 |
-
total_amount = 0.0
|
| 201 |
-
for account in accounts:
|
| 202 |
-
amount = self.safe_amount_conversion(account.get("amount", 0), 1)
|
| 203 |
-
total_amount += amount
|
| 204 |
-
total_lakhs = round(total_amount / conversion_factor, 2)
|
| 205 |
-
return total_amount, total_lakhs
|
| 206 |
-
|
| 207 |
-
def categorize_accounts(self, accounts: List[Dict[str, Any]], note_number: str) -> Dict[str, List[Dict[str, Any]]]:
|
| 208 |
-
"""Categorize accounts based on note-specific rules"""
|
| 209 |
-
categories = {
|
| 210 |
-
"prepaid_expenses": [],
|
| 211 |
-
"other_advances": [],
|
| 212 |
-
"advance_tax": [],
|
| 213 |
-
"statutory_balances": [],
|
| 214 |
-
"uncategorized": []
|
| 215 |
-
} if note_number == "14" else {}
|
| 216 |
-
|
| 217 |
-
for account in accounts:
|
| 218 |
-
account_name = account.get("account_name", "").lower()
|
| 219 |
-
categorized = False
|
| 220 |
-
|
| 221 |
-
if note_number == "14":
|
| 222 |
-
if "prepaid" in account_name:
|
| 223 |
-
categories["prepaid_expenses"].append(account)
|
| 224 |
-
categorized = True
|
| 225 |
-
elif any(word in account_name for word in ["advance tax", "tax advance", "income tax"]):
|
| 226 |
-
categories["advance_tax"].append(account)
|
| 227 |
-
categorized = True
|
| 228 |
-
elif any(word in account_name for word in ["tds", "gst", "statutory", "government", "vat", "pf", "esi"]):
|
| 229 |
-
categories["statutory_balances"].append(account)
|
| 230 |
-
categorized = True
|
| 231 |
-
elif any(word in account_name for word in ["advance", "deposit", "recoverable", "employee advance", "supplier advance"]):
|
| 232 |
-
categories["other_advances"].append(account)
|
| 233 |
-
categorized = True
|
| 234 |
-
|
| 235 |
-
if not categorized:
|
| 236 |
-
categories["uncategorized"].append(account)
|
| 237 |
-
|
| 238 |
-
return categories
|
| 239 |
-
|
| 240 |
-
def calculate_category_totals(self, categories: Dict[str, List[Dict[str, Any]]], conversion_factor: float = 100000) -> Tuple[Dict[str, Dict[str, Any]], float]:
|
| 241 |
-
"""Calculate totals for each category"""
|
| 242 |
-
category_totals = {}
|
| 243 |
-
grand_total = 0.0
|
| 244 |
-
|
| 245 |
-
for category_name, accounts in categories.items():
|
| 246 |
-
if not isinstance(accounts, list):
|
| 247 |
-
continue
|
| 248 |
-
total_amount = 0.0
|
| 249 |
-
for account in accounts:
|
| 250 |
-
amount = self.safe_amount_conversion(account.get("amount", 0), 1)
|
| 251 |
-
total_amount += amount
|
| 252 |
-
total_lakhs = round(total_amount / conversion_factor, 2)
|
| 253 |
-
category_totals[category_name] = {
|
| 254 |
-
"amount": total_amount,
|
| 255 |
-
"lakhs": total_lakhs,
|
| 256 |
-
"count": len(accounts),
|
| 257 |
-
"accounts": [acc.get("account_name", "") for acc in accounts]
|
| 258 |
-
}
|
| 259 |
-
grand_total += total_amount
|
| 260 |
-
|
| 261 |
-
return category_totals, round(grand_total / conversion_factor, 2)
|
| 262 |
-
|
| 263 |
-
def build_llm_prompt(self, note_number: str, trial_balance_data: Dict[str, Any], classified_accounts: List[Dict[str, Any]]) -> Optional[str]:
|
| 264 |
-
"""Build dynamic LLM prompt based on note template and classified accounts"""
|
| 265 |
if note_number not in self.note_templates:
|
| 266 |
return None
|
| 267 |
|
| 268 |
template = self.note_templates[note_number]
|
| 269 |
-
|
| 270 |
-
categories = self.categorize_accounts(classified_accounts, note_number)
|
| 271 |
-
category_totals, grand_total_lakhs = self.calculate_category_totals(categories)
|
| 272 |
|
|
|
|
| 273 |
context = {
|
| 274 |
"note_info": {
|
| 275 |
"number": note_number,
|
| 276 |
"title": template.get("title", ""),
|
| 277 |
"full_title": template.get("full_title", "")
|
| 278 |
},
|
| 279 |
-
"
|
| 280 |
-
"total_accounts": len(
|
| 281 |
-
"
|
| 282 |
-
"total_lakhs": total_lakhs,
|
| 283 |
-
"grand_total_lakhs": grand_total_lakhs
|
| 284 |
},
|
| 285 |
-
"categories": category_totals,
|
| 286 |
-
"trial_balance": trial_balance_data,
|
| 287 |
"current_date": datetime.now().strftime("%Y-%m-%d"),
|
| 288 |
"financial_year": "2023-24"
|
| 289 |
}
|
| 290 |
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 336 |
|
| 337 |
return prompt
|
| 338 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 339 |
def call_openrouter_api(self, prompt: str) -> Optional[str]:
|
| 340 |
"""Make API call to OpenRouter with model fallback"""
|
| 341 |
for model in self.recommended_models:
|
|
@@ -343,10 +285,13 @@ class FlexibleFinancialNoteGenerator:
|
|
| 343 |
payload = {
|
| 344 |
"model": model,
|
| 345 |
"messages": [
|
| 346 |
-
{
|
|
|
|
|
|
|
|
|
|
| 347 |
{"role": "user", "content": prompt}
|
| 348 |
],
|
| 349 |
-
"max_tokens":
|
| 350 |
"temperature": 0.1,
|
| 351 |
"top_p": 0.9
|
| 352 |
}
|
|
@@ -355,13 +300,20 @@ class FlexibleFinancialNoteGenerator:
|
|
| 355 |
self.api_url,
|
| 356 |
headers=self.headers,
|
| 357 |
json=payload,
|
| 358 |
-
timeout=
|
| 359 |
)
|
| 360 |
response.raise_for_status()
|
| 361 |
result = response.json()
|
| 362 |
content = result['choices'][0]['message']['content']
|
| 363 |
logger.info(f"Successful response from {model}")
|
| 364 |
return content
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 365 |
except Exception as e:
|
| 366 |
logger.error(f"Failed with {model}: {e}")
|
| 367 |
continue
|
|
@@ -369,64 +321,385 @@ class FlexibleFinancialNoteGenerator:
|
|
| 369 |
return None
|
| 370 |
|
| 371 |
def extract_json_from_markdown(self, response_text: str) -> Tuple[Optional[Dict[str, Any]], Optional[str]]:
|
| 372 |
-
"""Extract JSON from response, handling markdown code blocks"""
|
| 373 |
response_text = response_text.strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 374 |
json_patterns = [
|
| 375 |
r'```json\s*(.*?)\s*```',
|
| 376 |
r'```\s*(.*?)\s*```',
|
| 377 |
-
r'(\{
|
| 378 |
]
|
| 379 |
|
| 380 |
for pattern in json_patterns:
|
| 381 |
match = re.search(pattern, response_text, re.DOTALL)
|
| 382 |
if match:
|
| 383 |
try:
|
| 384 |
-
|
| 385 |
-
|
|
|
|
| 386 |
except json.JSONDecodeError:
|
| 387 |
continue
|
| 388 |
|
|
|
|
| 389 |
try:
|
| 390 |
json_data = json.loads(response_text)
|
| 391 |
return json_data, response_text
|
| 392 |
except json.JSONDecodeError:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 393 |
return None, None
|
| 394 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 395 |
def save_generated_note(self, note_data: str, note_number: str, output_dir: str = settings.output_dir) -> bool:
|
| 396 |
-
"""Save the generated note to file
|
| 397 |
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
| 398 |
json_output_path = f"{output_dir}/notes.json"
|
| 399 |
raw_output_path = f"{output_dir}/notes_raw.txt"
|
| 400 |
formatted_md_path = f"{output_dir}/notes_formatted.md"
|
| 401 |
|
| 402 |
try:
|
|
|
|
| 403 |
with open(raw_output_path, 'w', encoding='utf-8') as f:
|
| 404 |
f.write(note_data)
|
|
|
|
|
|
|
| 405 |
json_data, json_string = self.extract_json_from_markdown(note_data)
|
|
|
|
| 406 |
if json_data:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 407 |
json_data = convert_note_json_to_lakhs(json_data)
|
|
|
|
|
|
|
| 408 |
with open(json_output_path, 'w', encoding='utf-8') as f:
|
| 409 |
json.dump(json_data, f, indent=2, ensure_ascii=False)
|
| 410 |
logger.info(f"JSON saved to {json_output_path}")
|
| 411 |
-
|
|
|
|
|
|
|
| 412 |
if not md_content:
|
| 413 |
-
|
|
|
|
|
|
|
|
|
|
| 414 |
with open(formatted_md_path, 'w', encoding='utf-8') as f:
|
| 415 |
f.write(md_content)
|
|
|
|
| 416 |
return True
|
| 417 |
else:
|
|
|
|
|
|
|
| 418 |
fallback_json = {
|
| 419 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 420 |
"raw_response": note_data,
|
| 421 |
-
"error": "Could not parse JSON from response"
|
| 422 |
-
"generated_on": datetime.now().isoformat()
|
| 423 |
}
|
|
|
|
| 424 |
with open(json_output_path, 'w', encoding='utf-8') as f:
|
| 425 |
json.dump(fallback_json, f, indent=2, ensure_ascii=False)
|
| 426 |
-
logger.warning(f"Fallback JSON saved to {json_output_path}")
|
| 427 |
return False
|
|
|
|
| 428 |
except Exception as e:
|
| 429 |
logger.error(f"Error saving files: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 430 |
return False
|
| 431 |
|
| 432 |
def generate_note(self, note_number: str, trial_balance_path: str = settings.trial_balance_json) -> bool:
|
|
@@ -436,21 +709,25 @@ class FlexibleFinancialNoteGenerator:
|
|
| 436 |
return False
|
| 437 |
|
| 438 |
logger.info(f"Starting Note {note_number} generation...")
|
|
|
|
|
|
|
| 439 |
trial_balance = self.load_trial_balance(trial_balance_path)
|
| 440 |
if not trial_balance:
|
| 441 |
return False
|
| 442 |
|
| 443 |
-
|
| 444 |
-
prompt = self.build_llm_prompt(note_number, trial_balance
|
| 445 |
if not prompt:
|
| 446 |
logger.error("Failed to build prompt")
|
| 447 |
return False
|
| 448 |
|
|
|
|
| 449 |
response = self.call_openrouter_api(prompt)
|
| 450 |
if not response:
|
| 451 |
logger.error("Failed to get API response")
|
| 452 |
return False
|
| 453 |
|
|
|
|
| 454 |
success = self.save_generated_note(response, note_number)
|
| 455 |
logger.info(f"Note {note_number} {'generated successfully' if success else 'generated with issues'}")
|
| 456 |
return success
|
|
@@ -460,69 +737,230 @@ class FlexibleFinancialNoteGenerator:
|
|
| 460 |
logger.info(f"Starting generation of all {len(self.note_templates)} notes...")
|
| 461 |
results = {}
|
| 462 |
all_notes = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 463 |
for note_number in self.note_templates.keys():
|
| 464 |
logger.info(f"Processing Note {note_number}")
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
continue
|
| 469 |
-
classified_accounts = self.classify_accounts_by_note(trial_balance, note_number)
|
| 470 |
-
prompt = self.build_llm_prompt(note_number, trial_balance, classified_accounts)
|
| 471 |
if not prompt:
|
| 472 |
results[note_number] = False
|
| 473 |
continue
|
|
|
|
|
|
|
| 474 |
response = self.call_openrouter_api(prompt)
|
| 475 |
if not response:
|
| 476 |
results[note_number] = False
|
| 477 |
continue
|
|
|
|
|
|
|
| 478 |
json_data, _ = self.extract_json_from_markdown(response)
|
| 479 |
if json_data:
|
| 480 |
-
|
| 481 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 482 |
else:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 483 |
results[note_number] = False
|
|
|
|
|
|
|
| 484 |
import time
|
| 485 |
-
time.sleep(
|
| 486 |
-
|
|
|
|
| 487 |
output_dir = settings.output_dir
|
| 488 |
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 489 |
with open(f"{output_dir}/notes.json", "w", encoding="utf-8") as f:
|
| 490 |
-
json.dump(
|
|
|
|
| 491 |
successful = sum(1 for success in results.values() if success)
|
| 492 |
total = len(results)
|
| 493 |
logger.info(f"GENERATION SUMMARY: {successful}/{total} notes generated successfully")
|
| 494 |
logger.info(f"All notes saved to {output_dir}/notes.json")
|
|
|
|
| 495 |
return results
|
| 496 |
|
| 497 |
def main() -> None:
|
| 498 |
"""Main function to run the flexible note generator"""
|
| 499 |
try:
|
|
|
|
| 500 |
generator = FlexibleFinancialNoteGenerator()
|
| 501 |
if not generator.note_templates:
|
| 502 |
-
logger.error("No note templates loaded. Check
|
| 503 |
return
|
| 504 |
-
|
| 505 |
logger.info(f"Loaded {len(generator.note_templates)} note templates")
|
| 506 |
-
|
| 507 |
-
|
| 508 |
-
if
|
| 509 |
-
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
|
| 514 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 515 |
else:
|
| 516 |
-
logger.error(
|
| 517 |
-
|
| 518 |
-
|
| 519 |
-
successful = sum(1 for success in results.values() if success)
|
| 520 |
-
total = len(results)
|
| 521 |
-
logger.info(f"{successful}/{total} notes generated successfully")
|
| 522 |
else:
|
| 523 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 524 |
except Exception as e:
|
| 525 |
logger.error(f"Error: {e}", exc_info=True)
|
|
|
|
| 526 |
|
| 527 |
if __name__ == "__main__":
|
| 528 |
main()
|
|
|
|
| 10 |
def generate_all_notes(self, trial_balance_path=None):
|
| 11 |
# Placeholder logic
|
| 12 |
return {"dummy": True}
|
| 13 |
+
|
| 14 |
import json
|
| 15 |
import os
|
| 16 |
import logging
|
|
|
|
| 24 |
import pandas as pd
|
| 25 |
from pydantic import BaseModel, ValidationError
|
| 26 |
from pydantic_settings import BaseSettings
|
| 27 |
+
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 28 |
from utils.utils import convert_note_json_to_lakhs
|
| 29 |
|
| 30 |
# Load environment variables
|
| 31 |
+
load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
|
| 32 |
|
| 33 |
# Configure logging
|
| 34 |
logging.basicConfig(level=logging.INFO)
|
|
|
|
| 75 |
"X-Title": "Financial Note Generator"
|
| 76 |
}
|
| 77 |
self.note_templates = self.load_note_templates()
|
| 78 |
+
# Updated model list with DeepSeek as first choice
|
| 79 |
self.recommended_models = [
|
| 80 |
+
|
| 81 |
+
"deepseek/deepseek-r1",
|
| 82 |
+
#"deepseek/deepseek-coder",
|
| 83 |
+
"mistralai/mixtral-8x7b-instruct"
|
| 84 |
]
|
| 85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
def load_note_templates(self) -> Dict[str, Any]:
|
| 87 |
+
"""Load note templates from notes_template.py file."""
|
| 88 |
try:
|
| 89 |
+
# Add parent directory to path for imports when run as script
|
| 90 |
+
if __name__ == "__main__":
|
| 91 |
+
sys.path.append(str(Path(__file__).parent.parent))
|
| 92 |
+
|
| 93 |
+
from notes_template import note_templates
|
| 94 |
return note_templates
|
| 95 |
except ImportError as e:
|
| 96 |
+
logger.error(f"Error importing note_templates from notes_template: {e}")
|
| 97 |
return {}
|
| 98 |
except Exception as e:
|
| 99 |
logger.error(f"Unexpected error loading note_templates: {e}")
|
| 100 |
return {}
|
| 101 |
|
| 102 |
def load_trial_balance(self, file_path: str = settings.trial_balance_json) -> Optional[Dict[str, Any]]:
|
| 103 |
+
"""Load the complete trial balance from Excel or JSON."""
|
| 104 |
try:
|
| 105 |
if file_path.endswith('.json'):
|
| 106 |
with open(file_path, 'r', encoding='utf-8') as f:
|
|
|
|
| 129 |
logger.error(f"Error loading trial balance: {e}")
|
| 130 |
return None
|
| 131 |
|
| 132 |
+
def build_llm_prompt(self, note_number: str, trial_balance_data: Dict[str, Any]) -> Optional[str]:
|
| 133 |
+
"""Build comprehensive LLM prompt with strict JSON output requirements"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
if note_number not in self.note_templates:
|
| 135 |
return None
|
| 136 |
|
| 137 |
template = self.note_templates[note_number]
|
| 138 |
+
all_accounts = trial_balance_data.get("accounts", [])
|
|
|
|
|
|
|
| 139 |
|
| 140 |
+
# Build context with full trial balance
|
| 141 |
context = {
|
| 142 |
"note_info": {
|
| 143 |
"number": note_number,
|
| 144 |
"title": template.get("title", ""),
|
| 145 |
"full_title": template.get("full_title", "")
|
| 146 |
},
|
| 147 |
+
"trial_balance": {
|
| 148 |
+
"total_accounts": len(all_accounts),
|
| 149 |
+
"accounts": all_accounts
|
|
|
|
|
|
|
| 150 |
},
|
|
|
|
|
|
|
| 151 |
"current_date": datetime.now().strftime("%Y-%m-%d"),
|
| 152 |
"financial_year": "2023-24"
|
| 153 |
}
|
| 154 |
|
| 155 |
+
# Get note-specific classification guidance
|
| 156 |
+
classification_guide = self._get_classification_guide(note_number)
|
| 157 |
+
|
| 158 |
+
prompt = f"""You are a senior financial analyst and chartered accountant with expertise in Indian accounting standards and Schedule III of the Companies Act 2013.
|
| 159 |
+
|
| 160 |
+
π΄ CRITICAL INSTRUCTIONS - MUST FOLLOW EXACTLY:
|
| 161 |
+
1. OUTPUT ONLY VALID JSON - NO MARKDOWN, NO EXPLANATIONS, NO TEXT OUTSIDE JSON
|
| 162 |
+
2. START YOUR RESPONSE WITH {{ and END WITH }}
|
| 163 |
+
3. DO NOT USE ```json``` CODE BLOCKS
|
| 164 |
+
4. DO NOT ADD ANY COMMENTARY OR EXPLANATIONS
|
| 165 |
+
|
| 166 |
+
π΄ REQUIRED JSON STRUCTURE - ALL FIELDS MANDATORY:
|
| 167 |
+
{{
|
| 168 |
+
"title": "{template.get('title', '')}",
|
| 169 |
+
"full_title": "{template.get('full_title', '')}",
|
| 170 |
+
"structure": [
|
| 171 |
+
{{
|
| 172 |
+
"category": "In Lakhs",
|
| 173 |
+
"subcategories": [
|
| 174 |
+
{{ "label": "March 31, 2024", "value": 0.00 }},
|
| 175 |
+
{{ "label": "March 31, 2023", "value": 0.00 }}
|
| 176 |
+
]
|
| 177 |
+
}},
|
| 178 |
+
{{
|
| 179 |
+
"category": "Category Name",
|
| 180 |
+
"subcategories": [
|
| 181 |
+
{{ "label": "Subcategory Item", "value": 0.00, "previous_value": 0.00 }}
|
| 182 |
+
],
|
| 183 |
+
"total": 0.00,
|
| 184 |
+
"previous_total": 0.00
|
| 185 |
+
}}
|
| 186 |
+
],
|
| 187 |
+
"metadata": {{
|
| 188 |
+
"note_number": {note_number},
|
| 189 |
+
"generated_on": "{datetime.now().isoformat()}"
|
| 190 |
+
}},
|
| 191 |
+
"assumptions": "List any assumptions made during classification"
|
| 192 |
+
}}
|
| 193 |
+
|
| 194 |
+
π΄ STRUCTURE ARRAY EXPLAINED:
|
| 195 |
+
- First element: Header row with column labels (March 31, 2024, March 31, 2023)
|
| 196 |
+
- Subsequent elements: Data categories with subcategories
|
| 197 |
+
- Each data category must have:
|
| 198 |
+
* "category": Main category name
|
| 199 |
+
* "subcategories": Array of line items with "label", "value", "previous_value"
|
| 200 |
+
* "total": Sum of current year values in subcategories
|
| 201 |
+
* "previous_total": Sum of previous year values in subcategories
|
| 202 |
+
|
| 203 |
+
π΄ YOUR TASK:
|
| 204 |
+
1. Analyze ALL trial balance accounts provided below
|
| 205 |
+
2. Identify accounts that belong to "{template['full_title']}"
|
| 206 |
+
3. Classify into appropriate subcategories per Schedule III
|
| 207 |
+
4. Convert all amounts to lakhs (βΉ Γ· 100,000) with 2 decimal places
|
| 208 |
+
5. Calculate accurate totals ensuring mathematical consistency
|
| 209 |
+
6. Structure output in hierarchical "structure" array format
|
| 210 |
+
|
| 211 |
+
π΄ MATHEMATICAL REQUIREMENTS:
|
| 212 |
+
- All amounts MUST be in lakhs (divide original by 100,000)
|
| 213 |
+
- All subtotals MUST equal the grand total exactly
|
| 214 |
+
- Use 0.00 for March 2023 if data missing
|
| 215 |
+
- Round to 2 decimal places consistently
|
| 216 |
+
- Ensure "total" = sum of "value" in subcategories
|
| 217 |
+
- Ensure "previous_total" = sum of "previous_value" in subcategories
|
| 218 |
+
|
| 219 |
+
π΄ CLASSIFICATION GUIDANCE FOR NOTE {note_number}:
|
| 220 |
+
{classification_guide}
|
| 221 |
+
|
| 222 |
+
π΄ COMPLETE TRIAL BALANCE DATA:
|
| 223 |
+
{json.dumps(context, indent=2)}
|
| 224 |
+
|
| 225 |
+
π΄ TEMPLATE STRUCTURE TO FOLLOW:
|
| 226 |
+
{json.dumps(template, indent=2)}
|
| 227 |
+
|
| 228 |
+
π΄ VALIDATION RULES:
|
| 229 |
+
- If no accounts match this note category, use empty categories with 0.00 totals
|
| 230 |
+
- Ensure "metadata.note_number" exactly matches {note_number}
|
| 231 |
+
- Document classification logic in "assumptions" field
|
| 232 |
+
- Structure must have at least 2 elements (header + data)
|
| 233 |
+
|
| 234 |
+
GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
|
| 235 |
|
| 236 |
return prompt
|
| 237 |
+
|
| 238 |
+
def _get_classification_guide(self, note_number: str) -> str:
|
| 239 |
+
"""Get note-specific classification guidance"""
|
| 240 |
+
guides = {
|
| 241 |
+
"10": """
|
| 242 |
+
**Note 10 - Long Term Loans and Advances:**
|
| 243 |
+
- Include: Security deposits, long-term advances to suppliers/employees, deposits with utilities
|
| 244 |
+
- Categories: Unsecured considered good, Unsecured considered doubtful, Doubtful (provision)
|
| 245 |
+
- Exclude: Short-term advances, trade receivables, prepaid expenses under 1 year
|
| 246 |
+
""",
|
| 247 |
+
"11": """
|
| 248 |
+
**Note 11 - Inventories:**
|
| 249 |
+
- Include: Raw materials, work-in-progress, finished goods, stores and spares, consumables
|
| 250 |
+
- Value at lower of cost or net realizable value
|
| 251 |
+
- Exclude: Advances for inventory purchases (classify as advances)
|
| 252 |
+
""",
|
| 253 |
+
"12": """
|
| 254 |
+
**Note 12 - Trade Receivables:**
|
| 255 |
+
- Include: Amounts due from customers for goods/services, bills receivable
|
| 256 |
+
- Categories: Unsecured considered good, Unsecured considered doubtful, Provision for doubtful debts
|
| 257 |
+
- Exclude: Advances, deposits, other receivables
|
| 258 |
+
""",
|
| 259 |
+
"13": """
|
| 260 |
+
**Note 13 - Cash and Cash Equivalents:**
|
| 261 |
+
- Include: Cash on hand, balances with banks (current/savings), short-term deposits (β€3 months)
|
| 262 |
+
- Separate: Cash and cash equivalents vs Other bank balances (FDs >3 months)
|
| 263 |
+
- Show: Balances in current accounts, savings accounts, fixed deposits separately
|
| 264 |
+
""",
|
| 265 |
+
"14": """
|
| 266 |
+
**Note 14 - Short Term Loans and Advances:**
|
| 267 |
+
- Include: Prepaid expenses, advances to suppliers, employee advances, advance tax, TDS receivable
|
| 268 |
+
- Categories:
|
| 269 |
+
* Unsecured, considered good: Prepaid expenses, Other advances
|
| 270 |
+
* Other loans and advances: Advance tax, Balances with statutory/govt authorities
|
| 271 |
+
- Exclude: Long-term advances, trade receivables
|
| 272 |
+
""",
|
| 273 |
+
"15": """
|
| 274 |
+
**Note 15 - Other Current Assets:**
|
| 275 |
+
- Include: Interest accrued, export incentives receivable, insurance claims, other miscellaneous current assets
|
| 276 |
+
- Exclude: Items that fit into specific categories like trade receivables, advances, cash
|
| 277 |
+
"""
|
| 278 |
+
}
|
| 279 |
+
return guides.get(note_number, f"**Note {note_number}:** Classify accounts logically based on their nature and the note title.")
|
| 280 |
+
|
| 281 |
def call_openrouter_api(self, prompt: str) -> Optional[str]:
|
| 282 |
"""Make API call to OpenRouter with model fallback"""
|
| 283 |
for model in self.recommended_models:
|
|
|
|
| 285 |
payload = {
|
| 286 |
"model": model,
|
| 287 |
"messages": [
|
| 288 |
+
{
|
| 289 |
+
"role": "system",
|
| 290 |
+
"content": "You are an expert chartered accountant specializing in Indian accounting standards. You MUST respond with ONLY valid JSON, never with markdown code blocks or explanations. Start with { and end with }."
|
| 291 |
+
},
|
| 292 |
{"role": "user", "content": prompt}
|
| 293 |
],
|
| 294 |
+
"max_tokens": 12000,
|
| 295 |
"temperature": 0.1,
|
| 296 |
"top_p": 0.9
|
| 297 |
}
|
|
|
|
| 300 |
self.api_url,
|
| 301 |
headers=self.headers,
|
| 302 |
json=payload,
|
| 303 |
+
timeout=60
|
| 304 |
)
|
| 305 |
response.raise_for_status()
|
| 306 |
result = response.json()
|
| 307 |
content = result['choices'][0]['message']['content']
|
| 308 |
logger.info(f"Successful response from {model}")
|
| 309 |
return content
|
| 310 |
+
except requests.exceptions.HTTPError as e:
|
| 311 |
+
if e.response.status_code == 404:
|
| 312 |
+
logger.warning(f"Model {model} not found (404), trying next model")
|
| 313 |
+
elif e.response.status_code == 402:
|
| 314 |
+
logger.warning(f"Model {model} requires payment (402), trying next model")
|
| 315 |
+
else:
|
| 316 |
+
logger.error(f"HTTP error with {model}: {e}")
|
| 317 |
except Exception as e:
|
| 318 |
logger.error(f"Failed with {model}: {e}")
|
| 319 |
continue
|
|
|
|
| 321 |
return None
|
| 322 |
|
| 323 |
def extract_json_from_markdown(self, response_text: str) -> Tuple[Optional[Dict[str, Any]], Optional[str]]:
|
| 324 |
+
"""Extract JSON from response, handling markdown code blocks and cleaning"""
|
| 325 |
response_text = response_text.strip()
|
| 326 |
+
|
| 327 |
+
# CRITICAL FIX: Handle concatenated/duplicate JSON (e.g., "}{\n{")
|
| 328 |
+
# Find the first complete JSON object
|
| 329 |
+
json_objects = []
|
| 330 |
+
brace_count = 0
|
| 331 |
+
start_idx = -1
|
| 332 |
+
|
| 333 |
+
for i, char in enumerate(response_text):
|
| 334 |
+
if char == '{':
|
| 335 |
+
if brace_count == 0:
|
| 336 |
+
start_idx = i
|
| 337 |
+
brace_count += 1
|
| 338 |
+
elif char == '}':
|
| 339 |
+
brace_count -= 1
|
| 340 |
+
if brace_count == 0 and start_idx != -1:
|
| 341 |
+
# Found complete JSON object
|
| 342 |
+
potential_json = response_text[start_idx:i+1]
|
| 343 |
+
try:
|
| 344 |
+
parsed = json.loads(potential_json)
|
| 345 |
+
json_objects.append((parsed, potential_json))
|
| 346 |
+
# Use the first valid JSON object
|
| 347 |
+
break
|
| 348 |
+
except json.JSONDecodeError:
|
| 349 |
+
continue
|
| 350 |
+
|
| 351 |
+
if json_objects:
|
| 352 |
+
logger.info("Successfully extracted first valid JSON object from response")
|
| 353 |
+
return json_objects[0]
|
| 354 |
+
|
| 355 |
+
# Fallback: Try original extraction methods
|
| 356 |
+
# Remove any leading/trailing text outside JSON
|
| 357 |
json_patterns = [
|
| 358 |
r'```json\s*(.*?)\s*```',
|
| 359 |
r'```\s*(.*?)\s*```',
|
| 360 |
+
r'(\{.*?\})'
|
| 361 |
]
|
| 362 |
|
| 363 |
for pattern in json_patterns:
|
| 364 |
match = re.search(pattern, response_text, re.DOTALL)
|
| 365 |
if match:
|
| 366 |
try:
|
| 367 |
+
json_content = match.group(1).strip()
|
| 368 |
+
json_data = json.loads(json_content)
|
| 369 |
+
return json_data, json_content
|
| 370 |
except json.JSONDecodeError:
|
| 371 |
continue
|
| 372 |
|
| 373 |
+
# Try parsing the entire response as JSON
|
| 374 |
try:
|
| 375 |
json_data = json.loads(response_text)
|
| 376 |
return json_data, response_text
|
| 377 |
except json.JSONDecodeError:
|
| 378 |
+
# Last attempt: find JSON-like structure
|
| 379 |
+
try:
|
| 380 |
+
start = response_text.find('{')
|
| 381 |
+
end = response_text.rfind('}') + 1
|
| 382 |
+
if start != -1 and end > start:
|
| 383 |
+
json_part = response_text[start:end]
|
| 384 |
+
json_data = json.loads(json_part)
|
| 385 |
+
return json_data, json_part
|
| 386 |
+
except json.JSONDecodeError:
|
| 387 |
+
pass
|
| 388 |
+
|
| 389 |
return None, None
|
| 390 |
|
| 391 |
+
def validate_and_fix_json(self, json_data: Dict[str, Any], note_number: str) -> Dict[str, Any]:
|
| 392 |
+
"""Validate JSON structure and auto-fix missing required fields"""
|
| 393 |
+
fixed_data = json_data.copy()
|
| 394 |
+
|
| 395 |
+
# Get template for this note
|
| 396 |
+
template = self.note_templates.get(note_number, {})
|
| 397 |
+
|
| 398 |
+
# Auto-fix title fields
|
| 399 |
+
if "title" not in fixed_data or not fixed_data["title"]:
|
| 400 |
+
fixed_data["title"] = template.get("title", f"Note {note_number}")
|
| 401 |
+
logger.info(f"Auto-fixed missing title field")
|
| 402 |
+
|
| 403 |
+
if "full_title" not in fixed_data or not fixed_data["full_title"]:
|
| 404 |
+
fixed_data["full_title"] = template.get("full_title", f"{note_number}. {fixed_data.get('title', 'Financial Note')}")
|
| 405 |
+
logger.info(f"Auto-fixed missing full_title field")
|
| 406 |
+
|
| 407 |
+
# Auto-fix or create metadata
|
| 408 |
+
if "metadata" not in fixed_data or not isinstance(fixed_data["metadata"], dict):
|
| 409 |
+
fixed_data["metadata"] = {}
|
| 410 |
+
logger.info("Auto-created metadata object")
|
| 411 |
+
|
| 412 |
+
# CRITICAL FIX: Ensure note_number is correct integer, not 0.0
|
| 413 |
+
metadata_note_num = fixed_data["metadata"].get("note_number")
|
| 414 |
+
try:
|
| 415 |
+
# Convert note_number string to int
|
| 416 |
+
expected_note_num = int(note_number)
|
| 417 |
+
|
| 418 |
+
# Check if metadata note_number is wrong (0, 0.0, or mismatch)
|
| 419 |
+
if (metadata_note_num is None or
|
| 420 |
+
metadata_note_num == 0 or
|
| 421 |
+
metadata_note_num == 0.0 or
|
| 422 |
+
int(metadata_note_num) != expected_note_num):
|
| 423 |
+
|
| 424 |
+
fixed_data["metadata"]["note_number"] = expected_note_num
|
| 425 |
+
logger.info(f"Auto-corrected metadata.note_number from {metadata_note_num} to {expected_note_num}")
|
| 426 |
+
except ValueError:
|
| 427 |
+
fixed_data["metadata"]["note_number"] = note_number
|
| 428 |
+
logger.info(f"Auto-set metadata.note_number to string: {note_number}")
|
| 429 |
+
|
| 430 |
+
if "generated_on" not in fixed_data["metadata"]:
|
| 431 |
+
fixed_data["metadata"]["generated_on"] = datetime.now().isoformat()
|
| 432 |
+
logger.info("Auto-fixed missing metadata.generated_on field")
|
| 433 |
+
|
| 434 |
+
# Auto-fix or create structure array
|
| 435 |
+
if "structure" not in fixed_data or not isinstance(fixed_data["structure"], list):
|
| 436 |
+
logger.warning("Structure array missing, creating default structure")
|
| 437 |
+
fixed_data["structure"] = [
|
| 438 |
+
{
|
| 439 |
+
"category": "In Lakhs",
|
| 440 |
+
"subcategories": [
|
| 441 |
+
{"label": "March 31, 2024", "value": 0.00},
|
| 442 |
+
{"label": "March 31, 2023", "value": 0.00}
|
| 443 |
+
]
|
| 444 |
+
},
|
| 445 |
+
{
|
| 446 |
+
"category": "No data available",
|
| 447 |
+
"subcategories": [
|
| 448 |
+
{"label": "Items", "value": 0.00, "previous_value": 0.00}
|
| 449 |
+
],
|
| 450 |
+
"total": 0.00,
|
| 451 |
+
"previous_total": 0.00
|
| 452 |
+
}
|
| 453 |
+
]
|
| 454 |
+
else:
|
| 455 |
+
# Validate and fix structure elements
|
| 456 |
+
if len(fixed_data["structure"]) == 0:
|
| 457 |
+
logger.warning("Empty structure array, adding default elements")
|
| 458 |
+
fixed_data["structure"] = [
|
| 459 |
+
{
|
| 460 |
+
"category": "In Lakhs",
|
| 461 |
+
"subcategories": [
|
| 462 |
+
{"label": "March 31, 2024", "value": 0.00},
|
| 463 |
+
{"label": "March 31, 2023", "value": 0.00}
|
| 464 |
+
]
|
| 465 |
+
}
|
| 466 |
+
]
|
| 467 |
+
|
| 468 |
+
# Ensure each structure element has required fields
|
| 469 |
+
for i, struct_elem in enumerate(fixed_data["structure"]):
|
| 470 |
+
if not isinstance(struct_elem, dict):
|
| 471 |
+
continue
|
| 472 |
+
|
| 473 |
+
if "category" not in struct_elem:
|
| 474 |
+
struct_elem["category"] = f"Category {i}"
|
| 475 |
+
|
| 476 |
+
if "subcategories" not in struct_elem or not isinstance(struct_elem["subcategories"], list):
|
| 477 |
+
struct_elem["subcategories"] = []
|
| 478 |
+
|
| 479 |
+
# For data rows (not header), ensure totals exist
|
| 480 |
+
if i > 0 and struct_elem.get("subcategories"):
|
| 481 |
+
if "total" not in struct_elem:
|
| 482 |
+
struct_elem["total"] = sum(
|
| 483 |
+
sub.get("value", 0.0)
|
| 484 |
+
for sub in struct_elem["subcategories"]
|
| 485 |
+
if isinstance(sub, dict)
|
| 486 |
+
)
|
| 487 |
+
|
| 488 |
+
if "previous_total" not in struct_elem:
|
| 489 |
+
struct_elem["previous_total"] = sum(
|
| 490 |
+
sub.get("previous_value", 0.0)
|
| 491 |
+
for sub in struct_elem["subcategories"]
|
| 492 |
+
if isinstance(sub, dict)
|
| 493 |
+
)
|
| 494 |
+
|
| 495 |
+
# Auto-fix assumptions
|
| 496 |
+
if "assumptions" not in fixed_data:
|
| 497 |
+
fixed_data["assumptions"] = "Classification based on account names and standard accounting practices"
|
| 498 |
+
logger.info("Auto-added default assumptions")
|
| 499 |
+
|
| 500 |
+
return fixed_data
|
| 501 |
+
|
| 502 |
+
def validate_json_structure(self, json_data: Dict[str, Any], note_number: str) -> Tuple[bool, str]:
|
| 503 |
+
"""Validate that the JSON matches expected structure"""
|
| 504 |
+
required_fields = ["title", "full_title", "structure", "metadata", "assumptions"]
|
| 505 |
+
|
| 506 |
+
# Check required fields
|
| 507 |
+
missing_fields = []
|
| 508 |
+
for field in required_fields:
|
| 509 |
+
if field not in json_data:
|
| 510 |
+
missing_fields.append(field)
|
| 511 |
+
|
| 512 |
+
if missing_fields:
|
| 513 |
+
return False, f"Missing required fields: {', '.join(missing_fields)}"
|
| 514 |
+
|
| 515 |
+
# Check metadata structure
|
| 516 |
+
if not isinstance(json_data.get("metadata"), dict):
|
| 517 |
+
return False, "metadata must be an object"
|
| 518 |
+
|
| 519 |
+
metadata = json_data["metadata"]
|
| 520 |
+
if "note_number" not in metadata:
|
| 521 |
+
return False, "metadata.note_number is required"
|
| 522 |
+
|
| 523 |
+
if str(metadata.get("note_number", "")) != str(note_number):
|
| 524 |
+
return False, f"Note number mismatch: expected {note_number}, got {metadata.get('note_number')}"
|
| 525 |
+
|
| 526 |
+
# Check structure array
|
| 527 |
+
if not isinstance(json_data.get("structure"), list):
|
| 528 |
+
return False, "structure must be an array"
|
| 529 |
+
|
| 530 |
+
if len(json_data["structure"]) == 0:
|
| 531 |
+
return False, "structure array cannot be empty"
|
| 532 |
+
|
| 533 |
+
return True, "Validation passed"
|
| 534 |
+
|
| 535 |
+
def _generate_markdown_from_structure(self, json_data: Dict[str, Any]) -> str:
|
| 536 |
+
"""Generate markdown table from structure array"""
|
| 537 |
+
try:
|
| 538 |
+
title = json_data.get("full_title", json_data.get("title", "Financial Note"))
|
| 539 |
+
structure = json_data.get("structure", [])
|
| 540 |
+
|
| 541 |
+
if not structure:
|
| 542 |
+
return f"# {title}\n\n*No data available*"
|
| 543 |
+
|
| 544 |
+
# Start markdown
|
| 545 |
+
md_lines = [f"# {title}\n"]
|
| 546 |
+
|
| 547 |
+
# Get header row (first element)
|
| 548 |
+
header_elem = structure[0] if len(structure) > 0 else None
|
| 549 |
+
if header_elem and header_elem.get("subcategories"):
|
| 550 |
+
headers = [sub.get("label", "") for sub in header_elem["subcategories"]]
|
| 551 |
+
md_lines.append("| Particulars | " + " | ".join(headers) + " |")
|
| 552 |
+
md_lines.append("|" + "---|" * (len(headers) + 1))
|
| 553 |
+
|
| 554 |
+
# Process data rows
|
| 555 |
+
for i in range(1, len(structure)):
|
| 556 |
+
elem = structure[i]
|
| 557 |
+
category = elem.get("category", "")
|
| 558 |
+
subcategories = elem.get("subcategories", [])
|
| 559 |
+
|
| 560 |
+
# Add category header if exists
|
| 561 |
+
if category:
|
| 562 |
+
md_lines.append(f"\n**{category}**\n")
|
| 563 |
+
|
| 564 |
+
# Add subcategory rows
|
| 565 |
+
for sub in subcategories:
|
| 566 |
+
label = sub.get("label", "")
|
| 567 |
+
value = sub.get("value", 0.00)
|
| 568 |
+
previous_value = sub.get("previous_value", 0.00)
|
| 569 |
+
md_lines.append(f"| {label} | {value:.2f} | {previous_value:.2f} |")
|
| 570 |
+
|
| 571 |
+
# Add total row if exists
|
| 572 |
+
if "total" in elem:
|
| 573 |
+
total = elem.get("total", 0.00)
|
| 574 |
+
previous_total = elem.get("previous_total", 0.00)
|
| 575 |
+
md_lines.append(f"| **Total {category}** | **{total:.2f}** | **{previous_total:.2f}** |")
|
| 576 |
+
|
| 577 |
+
# Add metadata
|
| 578 |
+
metadata = json_data.get("metadata", {})
|
| 579 |
+
md_lines.append(f"\n\n*Generated on: {metadata.get('generated_on', 'Unknown')}*")
|
| 580 |
+
|
| 581 |
+
# Add assumptions if present
|
| 582 |
+
assumptions = json_data.get("assumptions", "")
|
| 583 |
+
if assumptions:
|
| 584 |
+
md_lines.append(f"\n\n**Assumptions:** {assumptions}")
|
| 585 |
+
|
| 586 |
+
return "\n".join(md_lines)
|
| 587 |
+
|
| 588 |
+
except Exception as e:
|
| 589 |
+
logger.error(f"Error generating markdown from structure: {e}")
|
| 590 |
+
return f"# {json_data.get('full_title', 'Financial Note')}\n\n*Error generating markdown table*"
|
| 591 |
+
|
| 592 |
def save_generated_note(self, note_data: str, note_number: str, output_dir: str = settings.output_dir) -> bool:
|
| 593 |
+
"""Save the generated note to file with robust validation and auto-fixing"""
|
| 594 |
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
| 595 |
json_output_path = f"{output_dir}/notes.json"
|
| 596 |
raw_output_path = f"{output_dir}/notes_raw.txt"
|
| 597 |
formatted_md_path = f"{output_dir}/notes_formatted.md"
|
| 598 |
|
| 599 |
try:
|
| 600 |
+
# Always save raw response for debugging
|
| 601 |
with open(raw_output_path, 'w', encoding='utf-8') as f:
|
| 602 |
f.write(note_data)
|
| 603 |
+
|
| 604 |
+
# Extract and validate JSON
|
| 605 |
json_data, json_string = self.extract_json_from_markdown(note_data)
|
| 606 |
+
|
| 607 |
if json_data:
|
| 608 |
+
# Auto-fix missing or incorrect fields
|
| 609 |
+
json_data = self.validate_and_fix_json(json_data, note_number)
|
| 610 |
+
|
| 611 |
+
# Final validation
|
| 612 |
+
is_valid, validation_msg = self.validate_json_structure(json_data, note_number)
|
| 613 |
+
if not is_valid:
|
| 614 |
+
logger.warning(f"JSON validation warning after auto-fix: {validation_msg}")
|
| 615 |
+
|
| 616 |
+
# Convert to lakhs if needed
|
| 617 |
json_data = convert_note_json_to_lakhs(json_data)
|
| 618 |
+
|
| 619 |
+
# Save JSON
|
| 620 |
with open(json_output_path, 'w', encoding='utf-8') as f:
|
| 621 |
json.dump(json_data, f, indent=2, ensure_ascii=False)
|
| 622 |
logger.info(f"JSON saved to {json_output_path}")
|
| 623 |
+
|
| 624 |
+
# Generate and save markdown
|
| 625 |
+
md_content = json_data.get('markdown_content', '')
|
| 626 |
if not md_content:
|
| 627 |
+
# Generate markdown from structure
|
| 628 |
+
md_content = self._generate_markdown_from_structure(json_data)
|
| 629 |
+
logger.info("Auto-generated markdown from structure array")
|
| 630 |
+
|
| 631 |
with open(formatted_md_path, 'w', encoding='utf-8') as f:
|
| 632 |
f.write(md_content)
|
| 633 |
+
|
| 634 |
return True
|
| 635 |
else:
|
| 636 |
+
# Create fallback JSON with all required fields
|
| 637 |
+
template = self.note_templates.get(note_number, {})
|
| 638 |
fallback_json = {
|
| 639 |
+
"title": template.get("title", f"Note {note_number}"),
|
| 640 |
+
"full_title": template.get("full_title", f"{note_number}. Financial Note"),
|
| 641 |
+
"structure": [
|
| 642 |
+
{
|
| 643 |
+
"category": "In Lakhs",
|
| 644 |
+
"subcategories": [
|
| 645 |
+
{"label": "March 31, 2024", "value": 0.00},
|
| 646 |
+
{"label": "March 31, 2023", "value": 0.00}
|
| 647 |
+
]
|
| 648 |
+
},
|
| 649 |
+
{
|
| 650 |
+
"category": "Error - No data",
|
| 651 |
+
"subcategories": [
|
| 652 |
+
{"label": "Could not parse response", "value": 0.00, "previous_value": 0.00}
|
| 653 |
+
],
|
| 654 |
+
"total": 0.00,
|
| 655 |
+
"previous_total": 0.00
|
| 656 |
+
}
|
| 657 |
+
],
|
| 658 |
+
"metadata": {
|
| 659 |
+
"note_number": int(note_number) if note_number.isdigit() else note_number,
|
| 660 |
+
"generated_on": datetime.now().isoformat()
|
| 661 |
+
},
|
| 662 |
+
"assumptions": "Failed to parse LLM response",
|
| 663 |
"raw_response": note_data,
|
| 664 |
+
"error": "Could not parse JSON from response"
|
|
|
|
| 665 |
}
|
| 666 |
+
|
| 667 |
with open(json_output_path, 'w', encoding='utf-8') as f:
|
| 668 |
json.dump(fallback_json, f, indent=2, ensure_ascii=False)
|
| 669 |
+
logger.warning(f"Fallback JSON with required fields saved to {json_output_path}")
|
| 670 |
return False
|
| 671 |
+
|
| 672 |
except Exception as e:
|
| 673 |
logger.error(f"Error saving files: {e}")
|
| 674 |
+
|
| 675 |
+
# Emergency fallback
|
| 676 |
+
try:
|
| 677 |
+
template = self.note_templates.get(note_number, {})
|
| 678 |
+
emergency_json = {
|
| 679 |
+
"title": template.get("title", f"Note {note_number}"),
|
| 680 |
+
"full_title": template.get("full_title", f"{note_number}. Financial Note"),
|
| 681 |
+
"structure": [
|
| 682 |
+
{
|
| 683 |
+
"category": "In Lakhs",
|
| 684 |
+
"subcategories": [
|
| 685 |
+
{"label": "March 31, 2024", "value": 0.00},
|
| 686 |
+
{"label": "March 31, 2023", "value": 0.00}
|
| 687 |
+
]
|
| 688 |
+
}
|
| 689 |
+
],
|
| 690 |
+
"metadata": {
|
| 691 |
+
"note_number": int(note_number) if note_number.isdigit() else note_number,
|
| 692 |
+
"generated_on": datetime.now().isoformat()
|
| 693 |
+
},
|
| 694 |
+
"assumptions": "Emergency fallback due to processing error",
|
| 695 |
+
"error": str(e)
|
| 696 |
+
}
|
| 697 |
+
with open(json_output_path, 'w', encoding='utf-8') as f:
|
| 698 |
+
json.dump(emergency_json, f, indent=2, ensure_ascii=False)
|
| 699 |
+
logger.info(f"Emergency fallback JSON saved to {json_output_path}")
|
| 700 |
+
except Exception as emergency_error:
|
| 701 |
+
logger.error(f"Emergency fallback also failed: {emergency_error}")
|
| 702 |
+
|
| 703 |
return False
|
| 704 |
|
| 705 |
def generate_note(self, note_number: str, trial_balance_path: str = settings.trial_balance_json) -> bool:
|
|
|
|
| 709 |
return False
|
| 710 |
|
| 711 |
logger.info(f"Starting Note {note_number} generation...")
|
| 712 |
+
|
| 713 |
+
# Load complete trial balance
|
| 714 |
trial_balance = self.load_trial_balance(trial_balance_path)
|
| 715 |
if not trial_balance:
|
| 716 |
return False
|
| 717 |
|
| 718 |
+
# Build prompt with full trial balance
|
| 719 |
+
prompt = self.build_llm_prompt(note_number, trial_balance)
|
| 720 |
if not prompt:
|
| 721 |
logger.error("Failed to build prompt")
|
| 722 |
return False
|
| 723 |
|
| 724 |
+
# Get LLM response
|
| 725 |
response = self.call_openrouter_api(prompt)
|
| 726 |
if not response:
|
| 727 |
logger.error("Failed to get API response")
|
| 728 |
return False
|
| 729 |
|
| 730 |
+
# Save the generated note
|
| 731 |
success = self.save_generated_note(response, note_number)
|
| 732 |
logger.info(f"Note {note_number} {'generated successfully' if success else 'generated with issues'}")
|
| 733 |
return success
|
|
|
|
| 737 |
logger.info(f"Starting generation of all {len(self.note_templates)} notes...")
|
| 738 |
results = {}
|
| 739 |
all_notes = []
|
| 740 |
+
|
| 741 |
+
# Load trial balance once
|
| 742 |
+
trial_balance = self.load_trial_balance(trial_balance_path)
|
| 743 |
+
if not trial_balance:
|
| 744 |
+
logger.error("Failed to load trial balance")
|
| 745 |
+
return {note: False for note in self.note_templates.keys()}
|
| 746 |
+
|
| 747 |
for note_number in self.note_templates.keys():
|
| 748 |
logger.info(f"Processing Note {note_number}")
|
| 749 |
+
|
| 750 |
+
# Build prompt for this note
|
| 751 |
+
prompt = self.build_llm_prompt(note_number, trial_balance)
|
|
|
|
|
|
|
|
|
|
| 752 |
if not prompt:
|
| 753 |
results[note_number] = False
|
| 754 |
continue
|
| 755 |
+
|
| 756 |
+
# Get LLM response
|
| 757 |
response = self.call_openrouter_api(prompt)
|
| 758 |
if not response:
|
| 759 |
results[note_number] = False
|
| 760 |
continue
|
| 761 |
+
|
| 762 |
+
# Parse JSON response
|
| 763 |
json_data, _ = self.extract_json_from_markdown(response)
|
| 764 |
if json_data:
|
| 765 |
+
# Auto-fix and validate
|
| 766 |
+
json_data = self.validate_and_fix_json(json_data, note_number)
|
| 767 |
+
is_valid, validation_msg = self.validate_json_structure(json_data, note_number)
|
| 768 |
+
|
| 769 |
+
if is_valid:
|
| 770 |
+
json_data = convert_note_json_to_lakhs(json_data)
|
| 771 |
+
all_notes.append(json_data)
|
| 772 |
+
results[note_number] = True
|
| 773 |
+
logger.info(f"Note {note_number} processed successfully")
|
| 774 |
+
else:
|
| 775 |
+
logger.warning(f"Note {note_number} validation failed even after auto-fix: {validation_msg}")
|
| 776 |
+
# Still include it but mark as failed
|
| 777 |
+
json_data = convert_note_json_to_lakhs(json_data)
|
| 778 |
+
all_notes.append(json_data)
|
| 779 |
+
results[note_number] = False
|
| 780 |
else:
|
| 781 |
+
logger.error(f"Note {note_number}: Could not parse JSON from response")
|
| 782 |
+
# Create fallback note with new structure
|
| 783 |
+
template = self.note_templates.get(note_number, {})
|
| 784 |
+
fallback_note = {
|
| 785 |
+
"title": template.get("title", f"Note {note_number}"),
|
| 786 |
+
"full_title": template.get("full_title", f"{note_number}. Financial Note"),
|
| 787 |
+
"structure": [
|
| 788 |
+
{
|
| 789 |
+
"category": "In Lakhs",
|
| 790 |
+
"subcategories": [
|
| 791 |
+
{"label": "March 31, 2024", "value": 0.00},
|
| 792 |
+
{"label": "March 31, 2023", "value": 0.00}
|
| 793 |
+
]
|
| 794 |
+
},
|
| 795 |
+
{
|
| 796 |
+
"category": "Error",
|
| 797 |
+
"subcategories": [
|
| 798 |
+
{"label": "Failed to generate from LLM response", "value": 0.00, "previous_value": 0.00}
|
| 799 |
+
],
|
| 800 |
+
"total": 0.00,
|
| 801 |
+
"previous_total": 0.00
|
| 802 |
+
}
|
| 803 |
+
],
|
| 804 |
+
"metadata": {
|
| 805 |
+
"note_number": int(note_number) if note_number.isdigit() else note_number,
|
| 806 |
+
"generated_on": datetime.now().isoformat()
|
| 807 |
+
},
|
| 808 |
+
"assumptions": "LLM response parsing failed",
|
| 809 |
+
"error": "JSON parsing failed"
|
| 810 |
+
}
|
| 811 |
+
all_notes.append(fallback_note)
|
| 812 |
results[note_number] = False
|
| 813 |
+
|
| 814 |
+
# Brief pause between API calls
|
| 815 |
import time
|
| 816 |
+
time.sleep(2)
|
| 817 |
+
|
| 818 |
+
# Save all notes in consolidated file
|
| 819 |
output_dir = settings.output_dir
|
| 820 |
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
| 821 |
+
|
| 822 |
+
consolidated_output = {
|
| 823 |
+
"notes": all_notes,
|
| 824 |
+
"generation_summary": {
|
| 825 |
+
"total_notes": len(self.note_templates),
|
| 826 |
+
"successful_notes": sum(1 for success in results.values() if success),
|
| 827 |
+
"failed_notes": sum(1 for success in results.values() if not success),
|
| 828 |
+
"generated_on": datetime.now().isoformat(),
|
| 829 |
+
"results": results
|
| 830 |
+
}
|
| 831 |
+
}
|
| 832 |
+
|
| 833 |
with open(f"{output_dir}/notes.json", "w", encoding="utf-8") as f:
|
| 834 |
+
json.dump(consolidated_output, f, indent=2, ensure_ascii=False)
|
| 835 |
+
|
| 836 |
successful = sum(1 for success in results.values() if success)
|
| 837 |
total = len(results)
|
| 838 |
logger.info(f"GENERATION SUMMARY: {successful}/{total} notes generated successfully")
|
| 839 |
logger.info(f"All notes saved to {output_dir}/notes.json")
|
| 840 |
+
|
| 841 |
return results
|
| 842 |
|
| 843 |
def main() -> None:
|
| 844 |
"""Main function to run the flexible note generator"""
|
| 845 |
try:
|
| 846 |
+
# Initialize generator
|
| 847 |
generator = FlexibleFinancialNoteGenerator()
|
| 848 |
if not generator.note_templates:
|
| 849 |
+
logger.error("No note templates loaded. Check notes_template.py")
|
| 850 |
return
|
| 851 |
+
|
| 852 |
logger.info(f"Loaded {len(generator.note_templates)} note templates")
|
| 853 |
+
|
| 854 |
+
# Check for command line arguments
|
| 855 |
+
if len(sys.argv) > 1:
|
| 856 |
+
# Command line mode
|
| 857 |
+
if len(sys.argv) < 3:
|
| 858 |
+
logger.error("Usage: python llm_notes_generator.py <mode> <note_numbers>")
|
| 859 |
+
logger.error(" mode: 'specific' or 'all'")
|
| 860 |
+
logger.error(" note_numbers: comma-separated note numbers (for specific mode)")
|
| 861 |
+
sys.exit(1)
|
| 862 |
+
|
| 863 |
+
mode = sys.argv[1].lower()
|
| 864 |
+
note_numbers = sys.argv[2] if len(sys.argv) > 2 else ""
|
| 865 |
+
|
| 866 |
+
if mode == "specific":
|
| 867 |
+
if not note_numbers:
|
| 868 |
+
logger.error("Note numbers required for specific mode")
|
| 869 |
+
sys.exit(1)
|
| 870 |
+
|
| 871 |
+
note_list = [n.strip() for n in note_numbers.split(",")]
|
| 872 |
+
all_notes = []
|
| 873 |
+
successful_notes = []
|
| 874 |
+
|
| 875 |
+
for note_number in note_list:
|
| 876 |
+
if note_number in generator.note_templates:
|
| 877 |
+
success = generator.generate_note(note_number)
|
| 878 |
+
if success:
|
| 879 |
+
# Load the generated note
|
| 880 |
+
try:
|
| 881 |
+
with open("data/generated_notes/notes.json", "r", encoding="utf-8") as f:
|
| 882 |
+
note_data = json.load(f)
|
| 883 |
+
all_notes.append(note_data)
|
| 884 |
+
successful_notes.append(note_number)
|
| 885 |
+
logger.info(f"Note {note_number} generated successfully")
|
| 886 |
+
except Exception as e:
|
| 887 |
+
logger.error(f"Failed to load generated note {note_number}: {e}")
|
| 888 |
+
else:
|
| 889 |
+
logger.error(f"Failed to generate note {note_number}")
|
| 890 |
+
else:
|
| 891 |
+
logger.error(f"Note {note_number} not found in templates")
|
| 892 |
+
|
| 893 |
+
# Save consolidated notes
|
| 894 |
+
if all_notes:
|
| 895 |
+
output_dir = settings.output_dir
|
| 896 |
+
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
| 897 |
+
consolidated = {
|
| 898 |
+
"notes": all_notes,
|
| 899 |
+
"generation_summary": {
|
| 900 |
+
"requested_notes": note_list,
|
| 901 |
+
"successful_notes": successful_notes,
|
| 902 |
+
"total_successful": len(successful_notes),
|
| 903 |
+
"generated_on": datetime.now().isoformat()
|
| 904 |
+
}
|
| 905 |
+
}
|
| 906 |
+
with open(f"{output_dir}/notes.json", "w", encoding="utf-8") as f:
|
| 907 |
+
json.dump(consolidated, f, indent=2, ensure_ascii=False)
|
| 908 |
+
logger.info(f"Consolidated notes saved to {output_dir}/notes.json")
|
| 909 |
+
|
| 910 |
+
elif mode == "all":
|
| 911 |
+
results = generator.generate_all_notes()
|
| 912 |
+
successful = sum(1 for success in results.values() if success)
|
| 913 |
+
total = len(results)
|
| 914 |
+
logger.info(f"{successful}/{total} notes generated successfully")
|
| 915 |
+
|
| 916 |
+
# Print detailed results
|
| 917 |
+
for note, success in results.items():
|
| 918 |
+
status = "β
SUCCESS" if success else "β FAILED"
|
| 919 |
+
logger.info(f" Note {note}: {status}")
|
| 920 |
+
|
| 921 |
else:
|
| 922 |
+
logger.error("Invalid mode. Use 'specific' or 'all'")
|
| 923 |
+
sys.exit(1)
|
| 924 |
+
|
|
|
|
|
|
|
|
|
|
| 925 |
else:
|
| 926 |
+
# Interactive mode
|
| 927 |
+
choice = input("\nGenerate (1) specific note or (2) all notes? Enter 1 or 2: ").strip()
|
| 928 |
+
|
| 929 |
+
if choice == "1":
|
| 930 |
+
available_notes = list(generator.note_templates.keys())
|
| 931 |
+
print(f"Available notes: {', '.join(available_notes)}")
|
| 932 |
+
note_number = input("Enter note number: ").strip()
|
| 933 |
+
|
| 934 |
+
if note_number in available_notes:
|
| 935 |
+
success = generator.generate_note(note_number)
|
| 936 |
+
logger.info(f"Note {note_number} {'generated successfully' if success else 'generated with issues'}")
|
| 937 |
+
else:
|
| 938 |
+
logger.error(f"Note {note_number} not found")
|
| 939 |
+
|
| 940 |
+
elif choice == "2":
|
| 941 |
+
results = generator.generate_all_notes()
|
| 942 |
+
successful = sum(1 for success in results.values() if success)
|
| 943 |
+
total = len(results)
|
| 944 |
+
logger.info(f"{successful}/{total} notes generated successfully")
|
| 945 |
+
|
| 946 |
+
# Print summary
|
| 947 |
+
print("\n" + "="*50)
|
| 948 |
+
print("GENERATION SUMMARY")
|
| 949 |
+
print("="*50)
|
| 950 |
+
for note, success in results.items():
|
| 951 |
+
status = "β
SUCCESS" if success else "β FAILED"
|
| 952 |
+
print(f"Note {note}: {status}")
|
| 953 |
+
print("="*50)
|
| 954 |
+
|
| 955 |
+
else:
|
| 956 |
+
logger.error("Invalid choice. Enter 1 or 2.")
|
| 957 |
+
|
| 958 |
+
except KeyboardInterrupt:
|
| 959 |
+
logger.info("Generation interrupted by user")
|
| 960 |
+
sys.exit(0)
|
| 961 |
except Exception as e:
|
| 962 |
logger.error(f"Error: {e}", exc_info=True)
|
| 963 |
+
sys.exit(1)
|
| 964 |
|
| 965 |
if __name__ == "__main__":
|
| 966 |
main()
|