Update notes/llm_notes_generator.py

#5
Files changed (1) hide show
  1. notes/llm_notes_generator.py +688 -250
notes/llm_notes_generator.py CHANGED
@@ -10,6 +10,7 @@ class FlexibleFinancialNoteGenerator:
10
  def generate_all_notes(self, trial_balance_path=None):
11
  # Placeholder logic
12
  return {"dummy": True}
 
13
  import json
14
  import os
15
  import logging
@@ -23,10 +24,11 @@ from typing import Dict, List, Any, Optional, Tuple
23
  import pandas as pd
24
  from pydantic import BaseModel, ValidationError
25
  from pydantic_settings import BaseSettings
 
26
  from utils.utils import convert_note_json_to_lakhs
27
 
28
  # Load environment variables
29
- load_dotenv()
30
 
31
  # Configure logging
32
  logging.basicConfig(level=logging.INFO)
@@ -73,61 +75,32 @@ class FlexibleFinancialNoteGenerator:
73
  "X-Title": "Financial Note Generator"
74
  }
75
  self.note_templates = self.load_note_templates()
76
- self.account_patterns = self._init_account_patterns()
77
  self.recommended_models = [
78
- "mistralai/mixtral-8x7b-instruct",
79
- "mistralai/mistral-7b-instruct-v0.2"
 
 
80
  ]
81
 
82
- def _init_account_patterns(self) -> Dict[str, Dict[str, Any]]:
83
- """Initialize account classification patterns."""
84
- return {
85
- "10": {
86
- "keywords": ["security deposit", "long term advance", "deposit", "advance recoverable"],
87
- "groups": ["Long Term Loans and Advances", "Non-Current Assets"],
88
- "exclude_keywords": ["short term", "current", "trade"]
89
- },
90
- "11": {
91
- "keywords": ["inventory", "stock", "raw material", "finished goods", "work in progress", "consumables"],
92
- "groups": ["Inventories", "Current Assets"],
93
- "exclude_keywords": ["advance", "deposit"]
94
- },
95
- "12": {
96
- "keywords": ["trade receivable", "debtors", "accounts receivable", "sundry debtors"],
97
- "groups": ["Trade Receivables", "Current Assets"],
98
- "exclude_keywords": ["advance", "deposit"]
99
- },
100
- "13": {
101
- "keywords": ["cash", "bank", "petty cash", "cash on hand", "current account", "savings account", "fixed deposit"],
102
- "groups": ["Cash and Bank Balances", "Current Assets"],
103
- "exclude_keywords": ["advance", "loan"]
104
- },
105
- "14": {
106
- "keywords": ["prepaid", "advance", "short term", "employee advance", "supplier advance", "advance tax", "tds", "gst", "statutory"],
107
- "groups": ["Short Term Loans and Advances", "Current Assets"],
108
- "exclude_keywords": ["long term", "security deposit"]
109
- },
110
- "15": {
111
- "keywords": ["interest accrued", "accrued income", "other current", "miscellaneous current"],
112
- "groups": ["Other Current Assets", "Current Assets"],
113
- "exclude_keywords": ["trade", "advance"]
114
- }
115
- }
116
-
117
  def load_note_templates(self) -> Dict[str, Any]:
118
- """Load note templates from app.notes_template.py file."""
119
  try:
120
- from .notes_template import note_templates
 
 
 
 
121
  return note_templates
122
  except ImportError as e:
123
- logger.error(f"Error importing note_templates from app.notes_template: {e}")
124
  return {}
125
  except Exception as e:
126
  logger.error(f"Unexpected error loading note_templates: {e}")
127
  return {}
128
 
129
  def load_trial_balance(self, file_path: str = settings.trial_balance_json) -> Optional[Dict[str, Any]]:
130
- """Load the classified trial balance from Excel or JSON."""
131
  try:
132
  if file_path.endswith('.json'):
133
  with open(file_path, 'r', encoding='utf-8') as f:
@@ -156,186 +129,155 @@ class FlexibleFinancialNoteGenerator:
156
  logger.error(f"Error loading trial balance: {e}")
157
  return None
158
 
159
- def classify_accounts_by_note(self, trial_balance_data: Dict[str, Any], note_number: str) -> List[Dict[str, Any]]:
160
- """Classify accounts based on note number and patterns"""
161
- if not trial_balance_data or "accounts" not in trial_balance_data:
162
- return []
163
-
164
- classified_accounts = []
165
- patterns = self.account_patterns.get(note_number, {})
166
- keywords = patterns.get("keywords", [])
167
- groups = patterns.get("groups", [])
168
- exclude_keywords = patterns.get("exclude_keywords", [])
169
-
170
- for account in trial_balance_data["accounts"]:
171
- account_name = account.get("account_name", "").lower()
172
- account_group = account.get("group", "")
173
-
174
- if any(exclude_word.lower() in account_name for exclude_word in exclude_keywords):
175
- continue
176
-
177
- keyword_match = any(keyword.lower() in account_name for keyword in keywords)
178
- group_match = account_group in groups
179
-
180
- if keyword_match or group_match:
181
- classified_accounts.append(account)
182
-
183
- logger.info(f"Classified {len(classified_accounts)} accounts for Note {note_number}")
184
- return classified_accounts
185
-
186
- def safe_amount_conversion(self, amount: Any, conversion_factor: float = 100000) -> float:
187
- """Safely convert amount to lakhs"""
188
- try:
189
- if isinstance(amount, str):
190
- cleaned = re.sub(r'[^\d.-]', '', amount)
191
- amount_float = float(cleaned) if cleaned else 0.0
192
- else:
193
- amount_float = float(amount) if amount is not None else 0.0
194
- return round(amount_float / conversion_factor, 2)
195
- except (ValueError, TypeError):
196
- return 0.0
197
-
198
- def calculate_totals(self, accounts: List[Dict[str, Any]], conversion_factor: float = 100000) -> Tuple[float, float]:
199
- """Calculate totals with safe amount conversion"""
200
- total_amount = 0.0
201
- for account in accounts:
202
- amount = self.safe_amount_conversion(account.get("amount", 0), 1)
203
- total_amount += amount
204
- total_lakhs = round(total_amount / conversion_factor, 2)
205
- return total_amount, total_lakhs
206
-
207
- def categorize_accounts(self, accounts: List[Dict[str, Any]], note_number: str) -> Dict[str, List[Dict[str, Any]]]:
208
- """Categorize accounts based on note-specific rules"""
209
- categories = {
210
- "prepaid_expenses": [],
211
- "other_advances": [],
212
- "advance_tax": [],
213
- "statutory_balances": [],
214
- "uncategorized": []
215
- } if note_number == "14" else {}
216
-
217
- for account in accounts:
218
- account_name = account.get("account_name", "").lower()
219
- categorized = False
220
-
221
- if note_number == "14":
222
- if "prepaid" in account_name:
223
- categories["prepaid_expenses"].append(account)
224
- categorized = True
225
- elif any(word in account_name for word in ["advance tax", "tax advance", "income tax"]):
226
- categories["advance_tax"].append(account)
227
- categorized = True
228
- elif any(word in account_name for word in ["tds", "gst", "statutory", "government", "vat", "pf", "esi"]):
229
- categories["statutory_balances"].append(account)
230
- categorized = True
231
- elif any(word in account_name for word in ["advance", "deposit", "recoverable", "employee advance", "supplier advance"]):
232
- categories["other_advances"].append(account)
233
- categorized = True
234
-
235
- if not categorized:
236
- categories["uncategorized"].append(account)
237
-
238
- return categories
239
-
240
- def calculate_category_totals(self, categories: Dict[str, List[Dict[str, Any]]], conversion_factor: float = 100000) -> Tuple[Dict[str, Dict[str, Any]], float]:
241
- """Calculate totals for each category"""
242
- category_totals = {}
243
- grand_total = 0.0
244
-
245
- for category_name, accounts in categories.items():
246
- if not isinstance(accounts, list):
247
- continue
248
- total_amount = 0.0
249
- for account in accounts:
250
- amount = self.safe_amount_conversion(account.get("amount", 0), 1)
251
- total_amount += amount
252
- total_lakhs = round(total_amount / conversion_factor, 2)
253
- category_totals[category_name] = {
254
- "amount": total_amount,
255
- "lakhs": total_lakhs,
256
- "count": len(accounts),
257
- "accounts": [acc.get("account_name", "") for acc in accounts]
258
- }
259
- grand_total += total_amount
260
-
261
- return category_totals, round(grand_total / conversion_factor, 2)
262
-
263
- def build_llm_prompt(self, note_number: str, trial_balance_data: Dict[str, Any], classified_accounts: List[Dict[str, Any]]) -> Optional[str]:
264
- """Build dynamic LLM prompt based on note template and classified accounts"""
265
  if note_number not in self.note_templates:
266
  return None
267
 
268
  template = self.note_templates[note_number]
269
- total_amount, total_lakhs = self.calculate_totals(classified_accounts)
270
- categories = self.categorize_accounts(classified_accounts, note_number)
271
- category_totals, grand_total_lakhs = self.calculate_category_totals(categories)
272
 
 
273
  context = {
274
  "note_info": {
275
  "number": note_number,
276
  "title": template.get("title", ""),
277
  "full_title": template.get("full_title", "")
278
  },
279
- "financial_data": {
280
- "total_accounts": len(classified_accounts),
281
- "total_amount": total_amount,
282
- "total_lakhs": total_lakhs,
283
- "grand_total_lakhs": grand_total_lakhs
284
  },
285
- "categories": category_totals,
286
- "trial_balance": trial_balance_data,
287
  "current_date": datetime.now().strftime("%Y-%m-%d"),
288
  "financial_year": "2023-24"
289
  }
290
 
291
- prompt = (
292
- f"\nYou are a financial reporting AI system with two roles:\n"
293
- f"1. ACCOUNTANT β€” You extract, compute, and classify data from the financial context and trial balance.\n"
294
- f"2. AUDITOR β€” You review the Accountant’s output for accuracy, assumptions, and consistency with reporting standards.\n"
295
- f"\nYour task is to generate a financial note titled: \"{template['full_title']}\" strictly following the JSON structure below, based on the provided financial context and trial balance data.\n"
296
- f"\n---\n**CRITICAL RULES**\n"
297
- f"- Respond ONLY with a valid JSON object (no markdown, no explanations).\n"
298
- f"- If a value is unavailable or not calculable, use `0.0`.\n"
299
- f"- Strictly Convert all β‚Ή amounts to lakhs by dividing by 100000 and round to 2 decimal places.\n"
300
- f"- Ensure that category subtotals **match** the grand total.\n"
301
- f"- Return a key `markdown_content` containing a markdown-formatted table for this note.\n"
302
- f"- Validate that your JSON structure matches the `TEMPLATE STRUCTURE` exactly.\n"
303
- f"- Perform intelligent classification: if an entry from the trial balance clearly fits a category, assign it logically.\n"
304
- f"- If data is ambiguous, make a conservative estimate, and record it in an `assumptions` field inside the JSON.\n"
305
- f"\n---\n**REFLECTION**\n"
306
- f"- After generating the financial note, reflect on the process: Did you miss any data? Are there any uncertainties or assumptions that should be highlighted?\n"
307
- f"- Explicitly mention any limitations, ambiguities, or areas where further information would improve accuracy in the `assumptions` field.\n"
308
- f"\n**REFLEXION**\n"
309
- f"- Before finalizing the output, review your own reasoning and calculations. Double-check that all β‚Ή amounts are converted to lakhs and that category subtotals match the grand total.\n"
310
- f"- If you spot any inconsistencies or possible errors, correct them and note your corrections in the `assumptions` field.\n"
311
- f"\n**TALES**\n"
312
- f"- For each major category or unusual entry, briefly narrate (in the `assumptions` field) the story or logic behind its classification, especially if it required inference or was ambiguous.\n"
313
- f"- Use the `assumptions` field to share any tales of how you mapped trial balance entries to categories, including any conservative estimates or judgment calls.\n"
314
- f"\n---\n**TEMPLATE STRUCTURE**\n{json.dumps(template, indent=2)}\n"
315
- f"\n---\n**TRIAL BALANCE & CONTEXT**\n{json.dumps(context, indent=2)}\n"
316
- f"\n---\n**CATEGORY RULES FOR NOTE 14 (Short Term Loans and Advances):**\n"
317
- f"- Categorize entries under:\n"
318
- f" - Unsecured, considered good:\n"
319
- f" - Prepaid Expenses\n"
320
- f" - Other Advances\n"
321
- f" - Other loans and advances:\n"
322
- f" - Advance Tax\n"
323
- f" - Balances with statutory/government authorities\n"
324
- f"- Use logical inference to map trial balance entries into these subcategories\n"
325
- f"- If values for March 31, 2023 are missing, default to 0\n"
326
- f"- Ensure the sum of all subcategories = `Total`\n"
327
- f"\n---\n**REQUIRED OUTPUT JSON FORMAT**\n"
328
- f"- The JSON must include:\n"
329
- f" - All categories and subcategories with March 2024 and March 2023 values\n"
330
- f" - A computed `grand_total_lakhs`\n"
331
- f" - A `markdown_content` with the financial note table\n"
332
- f" - A `generated_on` timestamp\n"
333
- f" - An `assumptions` field (optional, if any data was inferred or missing)\n"
334
- f"\n---\nGenerate the final JSON now:\n"
335
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
336
 
337
  return prompt
338
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
339
  def call_openrouter_api(self, prompt: str) -> Optional[str]:
340
  """Make API call to OpenRouter with model fallback"""
341
  for model in self.recommended_models:
@@ -343,10 +285,13 @@ class FlexibleFinancialNoteGenerator:
343
  payload = {
344
  "model": model,
345
  "messages": [
346
- {"role": "system", "content": "You are a financial reporting expert. Always respond with valid JSON only."},
 
 
 
347
  {"role": "user", "content": prompt}
348
  ],
349
- "max_tokens": 8000,
350
  "temperature": 0.1,
351
  "top_p": 0.9
352
  }
@@ -355,13 +300,20 @@ class FlexibleFinancialNoteGenerator:
355
  self.api_url,
356
  headers=self.headers,
357
  json=payload,
358
- timeout=30 # <-- Add timeout here!
359
  )
360
  response.raise_for_status()
361
  result = response.json()
362
  content = result['choices'][0]['message']['content']
363
  logger.info(f"Successful response from {model}")
364
  return content
 
 
 
 
 
 
 
365
  except Exception as e:
366
  logger.error(f"Failed with {model}: {e}")
367
  continue
@@ -369,64 +321,385 @@ class FlexibleFinancialNoteGenerator:
369
  return None
370
 
371
  def extract_json_from_markdown(self, response_text: str) -> Tuple[Optional[Dict[str, Any]], Optional[str]]:
372
- """Extract JSON from response, handling markdown code blocks"""
373
  response_text = response_text.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
374
  json_patterns = [
375
  r'```json\s*(.*?)\s*```',
376
  r'```\s*(.*?)\s*```',
377
- r'(\{.*\})'
378
  ]
379
 
380
  for pattern in json_patterns:
381
  match = re.search(pattern, response_text, re.DOTALL)
382
  if match:
383
  try:
384
- json_data = json.loads(match.group(1))
385
- return json_data, match.group(1)
 
386
  except json.JSONDecodeError:
387
  continue
388
 
 
389
  try:
390
  json_data = json.loads(response_text)
391
  return json_data, response_text
392
  except json.JSONDecodeError:
 
 
 
 
 
 
 
 
 
 
 
393
  return None, None
394
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
395
  def save_generated_note(self, note_data: str, note_number: str, output_dir: str = settings.output_dir) -> bool:
396
- """Save the generated note to file in both JSON and markdown formats"""
397
  Path(output_dir).mkdir(parents=True, exist_ok=True)
398
  json_output_path = f"{output_dir}/notes.json"
399
  raw_output_path = f"{output_dir}/notes_raw.txt"
400
  formatted_md_path = f"{output_dir}/notes_formatted.md"
401
 
402
  try:
 
403
  with open(raw_output_path, 'w', encoding='utf-8') as f:
404
  f.write(note_data)
 
 
405
  json_data, json_string = self.extract_json_from_markdown(note_data)
 
406
  if json_data:
 
 
 
 
 
 
 
 
 
407
  json_data = convert_note_json_to_lakhs(json_data)
 
 
408
  with open(json_output_path, 'w', encoding='utf-8') as f:
409
  json.dump(json_data, f, indent=2, ensure_ascii=False)
410
  logger.info(f"JSON saved to {json_output_path}")
411
- md_content = json_data.get('markdown_content')
 
 
412
  if not md_content:
413
- md_content = f"# Note {note_number}\n\n```json\n{json.dumps(json_data, indent=2)}\n```"
 
 
 
414
  with open(formatted_md_path, 'w', encoding='utf-8') as f:
415
  f.write(md_content)
 
416
  return True
417
  else:
 
 
418
  fallback_json = {
419
- "note_number": note_number,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
420
  "raw_response": note_data,
421
- "error": "Could not parse JSON from response",
422
- "generated_on": datetime.now().isoformat()
423
  }
 
424
  with open(json_output_path, 'w', encoding='utf-8') as f:
425
  json.dump(fallback_json, f, indent=2, ensure_ascii=False)
426
- logger.warning(f"Fallback JSON saved to {json_output_path}")
427
  return False
 
428
  except Exception as e:
429
  logger.error(f"Error saving files: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
430
  return False
431
 
432
  def generate_note(self, note_number: str, trial_balance_path: str = settings.trial_balance_json) -> bool:
@@ -436,21 +709,25 @@ class FlexibleFinancialNoteGenerator:
436
  return False
437
 
438
  logger.info(f"Starting Note {note_number} generation...")
 
 
439
  trial_balance = self.load_trial_balance(trial_balance_path)
440
  if not trial_balance:
441
  return False
442
 
443
- classified_accounts = self.classify_accounts_by_note(trial_balance, note_number)
444
- prompt = self.build_llm_prompt(note_number, trial_balance, classified_accounts)
445
  if not prompt:
446
  logger.error("Failed to build prompt")
447
  return False
448
 
 
449
  response = self.call_openrouter_api(prompt)
450
  if not response:
451
  logger.error("Failed to get API response")
452
  return False
453
 
 
454
  success = self.save_generated_note(response, note_number)
455
  logger.info(f"Note {note_number} {'generated successfully' if success else 'generated with issues'}")
456
  return success
@@ -460,69 +737,230 @@ class FlexibleFinancialNoteGenerator:
460
  logger.info(f"Starting generation of all {len(self.note_templates)} notes...")
461
  results = {}
462
  all_notes = []
 
 
 
 
 
 
 
463
  for note_number in self.note_templates.keys():
464
  logger.info(f"Processing Note {note_number}")
465
- trial_balance = self.load_trial_balance(trial_balance_path)
466
- if not trial_balance:
467
- results[note_number] = False
468
- continue
469
- classified_accounts = self.classify_accounts_by_note(trial_balance, note_number)
470
- prompt = self.build_llm_prompt(note_number, trial_balance, classified_accounts)
471
  if not prompt:
472
  results[note_number] = False
473
  continue
 
 
474
  response = self.call_openrouter_api(prompt)
475
  if not response:
476
  results[note_number] = False
477
  continue
 
 
478
  json_data, _ = self.extract_json_from_markdown(response)
479
  if json_data:
480
- all_notes.append(json_data)
481
- results[note_number] = True
 
 
 
 
 
 
 
 
 
 
 
 
 
482
  else:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
483
  results[note_number] = False
 
 
484
  import time
485
- time.sleep(1)
486
- # Save all notes in one file
 
487
  output_dir = settings.output_dir
488
  Path(output_dir).mkdir(parents=True, exist_ok=True)
 
 
 
 
 
 
 
 
 
 
 
 
489
  with open(f"{output_dir}/notes.json", "w", encoding="utf-8") as f:
490
- json.dump({"notes": all_notes}, f, indent=2, ensure_ascii=False)
 
491
  successful = sum(1 for success in results.values() if success)
492
  total = len(results)
493
  logger.info(f"GENERATION SUMMARY: {successful}/{total} notes generated successfully")
494
  logger.info(f"All notes saved to {output_dir}/notes.json")
 
495
  return results
496
 
497
  def main() -> None:
498
  """Main function to run the flexible note generator"""
499
  try:
 
500
  generator = FlexibleFinancialNoteGenerator()
501
  if not generator.note_templates:
502
- logger.error("No note templates loaded. Check app/new.py")
503
  return
504
-
505
  logger.info(f"Loaded {len(generator.note_templates)} note templates")
506
- choice = input("\nGenerate (1) specific note or (2) all notes? Enter 1 or 2: ").strip()
507
-
508
- if choice == "1":
509
- available_notes = list(generator.note_templates.keys())
510
- print(f"Available notes: {', '.join(available_notes)}")
511
- note_number = input("Enter note number: ").strip()
512
- if note_number in available_notes:
513
- success = generator.generate_note(note_number)
514
- logger.info(f"Note {note_number} {'generated successfully' if success else 'generated with issues'}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
515
  else:
516
- logger.error(f"Note {note_number} not found")
517
- elif choice == "2":
518
- results = generator.generate_all_notes()
519
- successful = sum(1 for success in results.values() if success)
520
- total = len(results)
521
- logger.info(f"{successful}/{total} notes generated successfully")
522
  else:
523
- logger.error("Invalid choice. Enter 1 or 2.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
524
  except Exception as e:
525
  logger.error(f"Error: {e}", exc_info=True)
 
526
 
527
  if __name__ == "__main__":
528
  main()
 
10
  def generate_all_notes(self, trial_balance_path=None):
11
  # Placeholder logic
12
  return {"dummy": True}
13
+
14
  import json
15
  import os
16
  import logging
 
24
  import pandas as pd
25
  from pydantic import BaseModel, ValidationError
26
  from pydantic_settings import BaseSettings
27
+ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
28
  from utils.utils import convert_note_json_to_lakhs
29
 
30
  # Load environment variables
31
+ load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
32
 
33
  # Configure logging
34
  logging.basicConfig(level=logging.INFO)
 
75
  "X-Title": "Financial Note Generator"
76
  }
77
  self.note_templates = self.load_note_templates()
78
+ # Updated model list with DeepSeek as first choice
79
  self.recommended_models = [
80
+
81
+ "deepseek/deepseek-r1",
82
+ #"deepseek/deepseek-coder",
83
+ "mistralai/mixtral-8x7b-instruct"
84
  ]
85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  def load_note_templates(self) -> Dict[str, Any]:
87
+ """Load note templates from notes_template.py file."""
88
  try:
89
+ # Add parent directory to path for imports when run as script
90
+ if __name__ == "__main__":
91
+ sys.path.append(str(Path(__file__).parent.parent))
92
+
93
+ from notes_template import note_templates
94
  return note_templates
95
  except ImportError as e:
96
+ logger.error(f"Error importing note_templates from notes_template: {e}")
97
  return {}
98
  except Exception as e:
99
  logger.error(f"Unexpected error loading note_templates: {e}")
100
  return {}
101
 
102
  def load_trial_balance(self, file_path: str = settings.trial_balance_json) -> Optional[Dict[str, Any]]:
103
+ """Load the complete trial balance from Excel or JSON."""
104
  try:
105
  if file_path.endswith('.json'):
106
  with open(file_path, 'r', encoding='utf-8') as f:
 
129
  logger.error(f"Error loading trial balance: {e}")
130
  return None
131
 
132
+ def build_llm_prompt(self, note_number: str, trial_balance_data: Dict[str, Any]) -> Optional[str]:
133
+ """Build comprehensive LLM prompt with strict JSON output requirements"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  if note_number not in self.note_templates:
135
  return None
136
 
137
  template = self.note_templates[note_number]
138
+ all_accounts = trial_balance_data.get("accounts", [])
 
 
139
 
140
+ # Build context with full trial balance
141
  context = {
142
  "note_info": {
143
  "number": note_number,
144
  "title": template.get("title", ""),
145
  "full_title": template.get("full_title", "")
146
  },
147
+ "trial_balance": {
148
+ "total_accounts": len(all_accounts),
149
+ "accounts": all_accounts
 
 
150
  },
 
 
151
  "current_date": datetime.now().strftime("%Y-%m-%d"),
152
  "financial_year": "2023-24"
153
  }
154
 
155
+ # Get note-specific classification guidance
156
+ classification_guide = self._get_classification_guide(note_number)
157
+
158
+ prompt = f"""You are a senior financial analyst and chartered accountant with expertise in Indian accounting standards and Schedule III of the Companies Act 2013.
159
+
160
+ πŸ”΄ CRITICAL INSTRUCTIONS - MUST FOLLOW EXACTLY:
161
+ 1. OUTPUT ONLY VALID JSON - NO MARKDOWN, NO EXPLANATIONS, NO TEXT OUTSIDE JSON
162
+ 2. START YOUR RESPONSE WITH {{ and END WITH }}
163
+ 3. DO NOT USE ```json``` CODE BLOCKS
164
+ 4. DO NOT ADD ANY COMMENTARY OR EXPLANATIONS
165
+
166
+ πŸ”΄ REQUIRED JSON STRUCTURE - ALL FIELDS MANDATORY:
167
+ {{
168
+ "title": "{template.get('title', '')}",
169
+ "full_title": "{template.get('full_title', '')}",
170
+ "structure": [
171
+ {{
172
+ "category": "In Lakhs",
173
+ "subcategories": [
174
+ {{ "label": "March 31, 2024", "value": 0.00 }},
175
+ {{ "label": "March 31, 2023", "value": 0.00 }}
176
+ ]
177
+ }},
178
+ {{
179
+ "category": "Category Name",
180
+ "subcategories": [
181
+ {{ "label": "Subcategory Item", "value": 0.00, "previous_value": 0.00 }}
182
+ ],
183
+ "total": 0.00,
184
+ "previous_total": 0.00
185
+ }}
186
+ ],
187
+ "metadata": {{
188
+ "note_number": {note_number},
189
+ "generated_on": "{datetime.now().isoformat()}"
190
+ }},
191
+ "assumptions": "List any assumptions made during classification"
192
+ }}
193
+
194
+ πŸ”΄ STRUCTURE ARRAY EXPLAINED:
195
+ - First element: Header row with column labels (March 31, 2024, March 31, 2023)
196
+ - Subsequent elements: Data categories with subcategories
197
+ - Each data category must have:
198
+ * "category": Main category name
199
+ * "subcategories": Array of line items with "label", "value", "previous_value"
200
+ * "total": Sum of current year values in subcategories
201
+ * "previous_total": Sum of previous year values in subcategories
202
+
203
+ πŸ”΄ YOUR TASK:
204
+ 1. Analyze ALL trial balance accounts provided below
205
+ 2. Identify accounts that belong to "{template['full_title']}"
206
+ 3. Classify into appropriate subcategories per Schedule III
207
+ 4. Convert all amounts to lakhs (β‚Ή Γ· 100,000) with 2 decimal places
208
+ 5. Calculate accurate totals ensuring mathematical consistency
209
+ 6. Structure output in hierarchical "structure" array format
210
+
211
+ πŸ”΄ MATHEMATICAL REQUIREMENTS:
212
+ - All amounts MUST be in lakhs (divide original by 100,000)
213
+ - All subtotals MUST equal the grand total exactly
214
+ - Use 0.00 for March 2023 if data missing
215
+ - Round to 2 decimal places consistently
216
+ - Ensure "total" = sum of "value" in subcategories
217
+ - Ensure "previous_total" = sum of "previous_value" in subcategories
218
+
219
+ πŸ”΄ CLASSIFICATION GUIDANCE FOR NOTE {note_number}:
220
+ {classification_guide}
221
+
222
+ πŸ”΄ COMPLETE TRIAL BALANCE DATA:
223
+ {json.dumps(context, indent=2)}
224
+
225
+ πŸ”΄ TEMPLATE STRUCTURE TO FOLLOW:
226
+ {json.dumps(template, indent=2)}
227
+
228
+ πŸ”΄ VALIDATION RULES:
229
+ - If no accounts match this note category, use empty categories with 0.00 totals
230
+ - Ensure "metadata.note_number" exactly matches {note_number}
231
+ - Document classification logic in "assumptions" field
232
+ - Structure must have at least 2 elements (header + data)
233
+
234
+ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
235
 
236
  return prompt
237
+
238
+ def _get_classification_guide(self, note_number: str) -> str:
239
+ """Get note-specific classification guidance"""
240
+ guides = {
241
+ "10": """
242
+ **Note 10 - Long Term Loans and Advances:**
243
+ - Include: Security deposits, long-term advances to suppliers/employees, deposits with utilities
244
+ - Categories: Unsecured considered good, Unsecured considered doubtful, Doubtful (provision)
245
+ - Exclude: Short-term advances, trade receivables, prepaid expenses under 1 year
246
+ """,
247
+ "11": """
248
+ **Note 11 - Inventories:**
249
+ - Include: Raw materials, work-in-progress, finished goods, stores and spares, consumables
250
+ - Value at lower of cost or net realizable value
251
+ - Exclude: Advances for inventory purchases (classify as advances)
252
+ """,
253
+ "12": """
254
+ **Note 12 - Trade Receivables:**
255
+ - Include: Amounts due from customers for goods/services, bills receivable
256
+ - Categories: Unsecured considered good, Unsecured considered doubtful, Provision for doubtful debts
257
+ - Exclude: Advances, deposits, other receivables
258
+ """,
259
+ "13": """
260
+ **Note 13 - Cash and Cash Equivalents:**
261
+ - Include: Cash on hand, balances with banks (current/savings), short-term deposits (≀3 months)
262
+ - Separate: Cash and cash equivalents vs Other bank balances (FDs >3 months)
263
+ - Show: Balances in current accounts, savings accounts, fixed deposits separately
264
+ """,
265
+ "14": """
266
+ **Note 14 - Short Term Loans and Advances:**
267
+ - Include: Prepaid expenses, advances to suppliers, employee advances, advance tax, TDS receivable
268
+ - Categories:
269
+ * Unsecured, considered good: Prepaid expenses, Other advances
270
+ * Other loans and advances: Advance tax, Balances with statutory/govt authorities
271
+ - Exclude: Long-term advances, trade receivables
272
+ """,
273
+ "15": """
274
+ **Note 15 - Other Current Assets:**
275
+ - Include: Interest accrued, export incentives receivable, insurance claims, other miscellaneous current assets
276
+ - Exclude: Items that fit into specific categories like trade receivables, advances, cash
277
+ """
278
+ }
279
+ return guides.get(note_number, f"**Note {note_number}:** Classify accounts logically based on their nature and the note title.")
280
+
281
  def call_openrouter_api(self, prompt: str) -> Optional[str]:
282
  """Make API call to OpenRouter with model fallback"""
283
  for model in self.recommended_models:
 
285
  payload = {
286
  "model": model,
287
  "messages": [
288
+ {
289
+ "role": "system",
290
+ "content": "You are an expert chartered accountant specializing in Indian accounting standards. You MUST respond with ONLY valid JSON, never with markdown code blocks or explanations. Start with { and end with }."
291
+ },
292
  {"role": "user", "content": prompt}
293
  ],
294
+ "max_tokens": 12000,
295
  "temperature": 0.1,
296
  "top_p": 0.9
297
  }
 
300
  self.api_url,
301
  headers=self.headers,
302
  json=payload,
303
+ timeout=60
304
  )
305
  response.raise_for_status()
306
  result = response.json()
307
  content = result['choices'][0]['message']['content']
308
  logger.info(f"Successful response from {model}")
309
  return content
310
+ except requests.exceptions.HTTPError as e:
311
+ if e.response.status_code == 404:
312
+ logger.warning(f"Model {model} not found (404), trying next model")
313
+ elif e.response.status_code == 402:
314
+ logger.warning(f"Model {model} requires payment (402), trying next model")
315
+ else:
316
+ logger.error(f"HTTP error with {model}: {e}")
317
  except Exception as e:
318
  logger.error(f"Failed with {model}: {e}")
319
  continue
 
321
  return None
322
 
323
  def extract_json_from_markdown(self, response_text: str) -> Tuple[Optional[Dict[str, Any]], Optional[str]]:
324
+ """Extract JSON from response, handling markdown code blocks and cleaning"""
325
  response_text = response_text.strip()
326
+
327
+ # CRITICAL FIX: Handle concatenated/duplicate JSON (e.g., "}{\n{")
328
+ # Find the first complete JSON object
329
+ json_objects = []
330
+ brace_count = 0
331
+ start_idx = -1
332
+
333
+ for i, char in enumerate(response_text):
334
+ if char == '{':
335
+ if brace_count == 0:
336
+ start_idx = i
337
+ brace_count += 1
338
+ elif char == '}':
339
+ brace_count -= 1
340
+ if brace_count == 0 and start_idx != -1:
341
+ # Found complete JSON object
342
+ potential_json = response_text[start_idx:i+1]
343
+ try:
344
+ parsed = json.loads(potential_json)
345
+ json_objects.append((parsed, potential_json))
346
+ # Use the first valid JSON object
347
+ break
348
+ except json.JSONDecodeError:
349
+ continue
350
+
351
+ if json_objects:
352
+ logger.info("Successfully extracted first valid JSON object from response")
353
+ return json_objects[0]
354
+
355
+ # Fallback: Try original extraction methods
356
+ # Remove any leading/trailing text outside JSON
357
  json_patterns = [
358
  r'```json\s*(.*?)\s*```',
359
  r'```\s*(.*?)\s*```',
360
+ r'(\{.*?\})'
361
  ]
362
 
363
  for pattern in json_patterns:
364
  match = re.search(pattern, response_text, re.DOTALL)
365
  if match:
366
  try:
367
+ json_content = match.group(1).strip()
368
+ json_data = json.loads(json_content)
369
+ return json_data, json_content
370
  except json.JSONDecodeError:
371
  continue
372
 
373
+ # Try parsing the entire response as JSON
374
  try:
375
  json_data = json.loads(response_text)
376
  return json_data, response_text
377
  except json.JSONDecodeError:
378
+ # Last attempt: find JSON-like structure
379
+ try:
380
+ start = response_text.find('{')
381
+ end = response_text.rfind('}') + 1
382
+ if start != -1 and end > start:
383
+ json_part = response_text[start:end]
384
+ json_data = json.loads(json_part)
385
+ return json_data, json_part
386
+ except json.JSONDecodeError:
387
+ pass
388
+
389
  return None, None
390
 
391
+ def validate_and_fix_json(self, json_data: Dict[str, Any], note_number: str) -> Dict[str, Any]:
392
+ """Validate JSON structure and auto-fix missing required fields"""
393
+ fixed_data = json_data.copy()
394
+
395
+ # Get template for this note
396
+ template = self.note_templates.get(note_number, {})
397
+
398
+ # Auto-fix title fields
399
+ if "title" not in fixed_data or not fixed_data["title"]:
400
+ fixed_data["title"] = template.get("title", f"Note {note_number}")
401
+ logger.info(f"Auto-fixed missing title field")
402
+
403
+ if "full_title" not in fixed_data or not fixed_data["full_title"]:
404
+ fixed_data["full_title"] = template.get("full_title", f"{note_number}. {fixed_data.get('title', 'Financial Note')}")
405
+ logger.info(f"Auto-fixed missing full_title field")
406
+
407
+ # Auto-fix or create metadata
408
+ if "metadata" not in fixed_data or not isinstance(fixed_data["metadata"], dict):
409
+ fixed_data["metadata"] = {}
410
+ logger.info("Auto-created metadata object")
411
+
412
+ # CRITICAL FIX: Ensure note_number is correct integer, not 0.0
413
+ metadata_note_num = fixed_data["metadata"].get("note_number")
414
+ try:
415
+ # Convert note_number string to int
416
+ expected_note_num = int(note_number)
417
+
418
+ # Check if metadata note_number is wrong (0, 0.0, or mismatch)
419
+ if (metadata_note_num is None or
420
+ metadata_note_num == 0 or
421
+ metadata_note_num == 0.0 or
422
+ int(metadata_note_num) != expected_note_num):
423
+
424
+ fixed_data["metadata"]["note_number"] = expected_note_num
425
+ logger.info(f"Auto-corrected metadata.note_number from {metadata_note_num} to {expected_note_num}")
426
+ except ValueError:
427
+ fixed_data["metadata"]["note_number"] = note_number
428
+ logger.info(f"Auto-set metadata.note_number to string: {note_number}")
429
+
430
+ if "generated_on" not in fixed_data["metadata"]:
431
+ fixed_data["metadata"]["generated_on"] = datetime.now().isoformat()
432
+ logger.info("Auto-fixed missing metadata.generated_on field")
433
+
434
+ # Auto-fix or create structure array
435
+ if "structure" not in fixed_data or not isinstance(fixed_data["structure"], list):
436
+ logger.warning("Structure array missing, creating default structure")
437
+ fixed_data["structure"] = [
438
+ {
439
+ "category": "In Lakhs",
440
+ "subcategories": [
441
+ {"label": "March 31, 2024", "value": 0.00},
442
+ {"label": "March 31, 2023", "value": 0.00}
443
+ ]
444
+ },
445
+ {
446
+ "category": "No data available",
447
+ "subcategories": [
448
+ {"label": "Items", "value": 0.00, "previous_value": 0.00}
449
+ ],
450
+ "total": 0.00,
451
+ "previous_total": 0.00
452
+ }
453
+ ]
454
+ else:
455
+ # Validate and fix structure elements
456
+ if len(fixed_data["structure"]) == 0:
457
+ logger.warning("Empty structure array, adding default elements")
458
+ fixed_data["structure"] = [
459
+ {
460
+ "category": "In Lakhs",
461
+ "subcategories": [
462
+ {"label": "March 31, 2024", "value": 0.00},
463
+ {"label": "March 31, 2023", "value": 0.00}
464
+ ]
465
+ }
466
+ ]
467
+
468
+ # Ensure each structure element has required fields
469
+ for i, struct_elem in enumerate(fixed_data["structure"]):
470
+ if not isinstance(struct_elem, dict):
471
+ continue
472
+
473
+ if "category" not in struct_elem:
474
+ struct_elem["category"] = f"Category {i}"
475
+
476
+ if "subcategories" not in struct_elem or not isinstance(struct_elem["subcategories"], list):
477
+ struct_elem["subcategories"] = []
478
+
479
+ # For data rows (not header), ensure totals exist
480
+ if i > 0 and struct_elem.get("subcategories"):
481
+ if "total" not in struct_elem:
482
+ struct_elem["total"] = sum(
483
+ sub.get("value", 0.0)
484
+ for sub in struct_elem["subcategories"]
485
+ if isinstance(sub, dict)
486
+ )
487
+
488
+ if "previous_total" not in struct_elem:
489
+ struct_elem["previous_total"] = sum(
490
+ sub.get("previous_value", 0.0)
491
+ for sub in struct_elem["subcategories"]
492
+ if isinstance(sub, dict)
493
+ )
494
+
495
+ # Auto-fix assumptions
496
+ if "assumptions" not in fixed_data:
497
+ fixed_data["assumptions"] = "Classification based on account names and standard accounting practices"
498
+ logger.info("Auto-added default assumptions")
499
+
500
+ return fixed_data
501
+
502
+ def validate_json_structure(self, json_data: Dict[str, Any], note_number: str) -> Tuple[bool, str]:
503
+ """Validate that the JSON matches expected structure"""
504
+ required_fields = ["title", "full_title", "structure", "metadata", "assumptions"]
505
+
506
+ # Check required fields
507
+ missing_fields = []
508
+ for field in required_fields:
509
+ if field not in json_data:
510
+ missing_fields.append(field)
511
+
512
+ if missing_fields:
513
+ return False, f"Missing required fields: {', '.join(missing_fields)}"
514
+
515
+ # Check metadata structure
516
+ if not isinstance(json_data.get("metadata"), dict):
517
+ return False, "metadata must be an object"
518
+
519
+ metadata = json_data["metadata"]
520
+ if "note_number" not in metadata:
521
+ return False, "metadata.note_number is required"
522
+
523
+ if str(metadata.get("note_number", "")) != str(note_number):
524
+ return False, f"Note number mismatch: expected {note_number}, got {metadata.get('note_number')}"
525
+
526
+ # Check structure array
527
+ if not isinstance(json_data.get("structure"), list):
528
+ return False, "structure must be an array"
529
+
530
+ if len(json_data["structure"]) == 0:
531
+ return False, "structure array cannot be empty"
532
+
533
+ return True, "Validation passed"
534
+
535
+ def _generate_markdown_from_structure(self, json_data: Dict[str, Any]) -> str:
536
+ """Generate markdown table from structure array"""
537
+ try:
538
+ title = json_data.get("full_title", json_data.get("title", "Financial Note"))
539
+ structure = json_data.get("structure", [])
540
+
541
+ if not structure:
542
+ return f"# {title}\n\n*No data available*"
543
+
544
+ # Start markdown
545
+ md_lines = [f"# {title}\n"]
546
+
547
+ # Get header row (first element)
548
+ header_elem = structure[0] if len(structure) > 0 else None
549
+ if header_elem and header_elem.get("subcategories"):
550
+ headers = [sub.get("label", "") for sub in header_elem["subcategories"]]
551
+ md_lines.append("| Particulars | " + " | ".join(headers) + " |")
552
+ md_lines.append("|" + "---|" * (len(headers) + 1))
553
+
554
+ # Process data rows
555
+ for i in range(1, len(structure)):
556
+ elem = structure[i]
557
+ category = elem.get("category", "")
558
+ subcategories = elem.get("subcategories", [])
559
+
560
+ # Add category header if exists
561
+ if category:
562
+ md_lines.append(f"\n**{category}**\n")
563
+
564
+ # Add subcategory rows
565
+ for sub in subcategories:
566
+ label = sub.get("label", "")
567
+ value = sub.get("value", 0.00)
568
+ previous_value = sub.get("previous_value", 0.00)
569
+ md_lines.append(f"| {label} | {value:.2f} | {previous_value:.2f} |")
570
+
571
+ # Add total row if exists
572
+ if "total" in elem:
573
+ total = elem.get("total", 0.00)
574
+ previous_total = elem.get("previous_total", 0.00)
575
+ md_lines.append(f"| **Total {category}** | **{total:.2f}** | **{previous_total:.2f}** |")
576
+
577
+ # Add metadata
578
+ metadata = json_data.get("metadata", {})
579
+ md_lines.append(f"\n\n*Generated on: {metadata.get('generated_on', 'Unknown')}*")
580
+
581
+ # Add assumptions if present
582
+ assumptions = json_data.get("assumptions", "")
583
+ if assumptions:
584
+ md_lines.append(f"\n\n**Assumptions:** {assumptions}")
585
+
586
+ return "\n".join(md_lines)
587
+
588
+ except Exception as e:
589
+ logger.error(f"Error generating markdown from structure: {e}")
590
+ return f"# {json_data.get('full_title', 'Financial Note')}\n\n*Error generating markdown table*"
591
+
592
  def save_generated_note(self, note_data: str, note_number: str, output_dir: str = settings.output_dir) -> bool:
593
+ """Save the generated note to file with robust validation and auto-fixing"""
594
  Path(output_dir).mkdir(parents=True, exist_ok=True)
595
  json_output_path = f"{output_dir}/notes.json"
596
  raw_output_path = f"{output_dir}/notes_raw.txt"
597
  formatted_md_path = f"{output_dir}/notes_formatted.md"
598
 
599
  try:
600
+ # Always save raw response for debugging
601
  with open(raw_output_path, 'w', encoding='utf-8') as f:
602
  f.write(note_data)
603
+
604
+ # Extract and validate JSON
605
  json_data, json_string = self.extract_json_from_markdown(note_data)
606
+
607
  if json_data:
608
+ # Auto-fix missing or incorrect fields
609
+ json_data = self.validate_and_fix_json(json_data, note_number)
610
+
611
+ # Final validation
612
+ is_valid, validation_msg = self.validate_json_structure(json_data, note_number)
613
+ if not is_valid:
614
+ logger.warning(f"JSON validation warning after auto-fix: {validation_msg}")
615
+
616
+ # Convert to lakhs if needed
617
  json_data = convert_note_json_to_lakhs(json_data)
618
+
619
+ # Save JSON
620
  with open(json_output_path, 'w', encoding='utf-8') as f:
621
  json.dump(json_data, f, indent=2, ensure_ascii=False)
622
  logger.info(f"JSON saved to {json_output_path}")
623
+
624
+ # Generate and save markdown
625
+ md_content = json_data.get('markdown_content', '')
626
  if not md_content:
627
+ # Generate markdown from structure
628
+ md_content = self._generate_markdown_from_structure(json_data)
629
+ logger.info("Auto-generated markdown from structure array")
630
+
631
  with open(formatted_md_path, 'w', encoding='utf-8') as f:
632
  f.write(md_content)
633
+
634
  return True
635
  else:
636
+ # Create fallback JSON with all required fields
637
+ template = self.note_templates.get(note_number, {})
638
  fallback_json = {
639
+ "title": template.get("title", f"Note {note_number}"),
640
+ "full_title": template.get("full_title", f"{note_number}. Financial Note"),
641
+ "structure": [
642
+ {
643
+ "category": "In Lakhs",
644
+ "subcategories": [
645
+ {"label": "March 31, 2024", "value": 0.00},
646
+ {"label": "March 31, 2023", "value": 0.00}
647
+ ]
648
+ },
649
+ {
650
+ "category": "Error - No data",
651
+ "subcategories": [
652
+ {"label": "Could not parse response", "value": 0.00, "previous_value": 0.00}
653
+ ],
654
+ "total": 0.00,
655
+ "previous_total": 0.00
656
+ }
657
+ ],
658
+ "metadata": {
659
+ "note_number": int(note_number) if note_number.isdigit() else note_number,
660
+ "generated_on": datetime.now().isoformat()
661
+ },
662
+ "assumptions": "Failed to parse LLM response",
663
  "raw_response": note_data,
664
+ "error": "Could not parse JSON from response"
 
665
  }
666
+
667
  with open(json_output_path, 'w', encoding='utf-8') as f:
668
  json.dump(fallback_json, f, indent=2, ensure_ascii=False)
669
+ logger.warning(f"Fallback JSON with required fields saved to {json_output_path}")
670
  return False
671
+
672
  except Exception as e:
673
  logger.error(f"Error saving files: {e}")
674
+
675
+ # Emergency fallback
676
+ try:
677
+ template = self.note_templates.get(note_number, {})
678
+ emergency_json = {
679
+ "title": template.get("title", f"Note {note_number}"),
680
+ "full_title": template.get("full_title", f"{note_number}. Financial Note"),
681
+ "structure": [
682
+ {
683
+ "category": "In Lakhs",
684
+ "subcategories": [
685
+ {"label": "March 31, 2024", "value": 0.00},
686
+ {"label": "March 31, 2023", "value": 0.00}
687
+ ]
688
+ }
689
+ ],
690
+ "metadata": {
691
+ "note_number": int(note_number) if note_number.isdigit() else note_number,
692
+ "generated_on": datetime.now().isoformat()
693
+ },
694
+ "assumptions": "Emergency fallback due to processing error",
695
+ "error": str(e)
696
+ }
697
+ with open(json_output_path, 'w', encoding='utf-8') as f:
698
+ json.dump(emergency_json, f, indent=2, ensure_ascii=False)
699
+ logger.info(f"Emergency fallback JSON saved to {json_output_path}")
700
+ except Exception as emergency_error:
701
+ logger.error(f"Emergency fallback also failed: {emergency_error}")
702
+
703
  return False
704
 
705
  def generate_note(self, note_number: str, trial_balance_path: str = settings.trial_balance_json) -> bool:
 
709
  return False
710
 
711
  logger.info(f"Starting Note {note_number} generation...")
712
+
713
+ # Load complete trial balance
714
  trial_balance = self.load_trial_balance(trial_balance_path)
715
  if not trial_balance:
716
  return False
717
 
718
+ # Build prompt with full trial balance
719
+ prompt = self.build_llm_prompt(note_number, trial_balance)
720
  if not prompt:
721
  logger.error("Failed to build prompt")
722
  return False
723
 
724
+ # Get LLM response
725
  response = self.call_openrouter_api(prompt)
726
  if not response:
727
  logger.error("Failed to get API response")
728
  return False
729
 
730
+ # Save the generated note
731
  success = self.save_generated_note(response, note_number)
732
  logger.info(f"Note {note_number} {'generated successfully' if success else 'generated with issues'}")
733
  return success
 
737
  logger.info(f"Starting generation of all {len(self.note_templates)} notes...")
738
  results = {}
739
  all_notes = []
740
+
741
+ # Load trial balance once
742
+ trial_balance = self.load_trial_balance(trial_balance_path)
743
+ if not trial_balance:
744
+ logger.error("Failed to load trial balance")
745
+ return {note: False for note in self.note_templates.keys()}
746
+
747
  for note_number in self.note_templates.keys():
748
  logger.info(f"Processing Note {note_number}")
749
+
750
+ # Build prompt for this note
751
+ prompt = self.build_llm_prompt(note_number, trial_balance)
 
 
 
752
  if not prompt:
753
  results[note_number] = False
754
  continue
755
+
756
+ # Get LLM response
757
  response = self.call_openrouter_api(prompt)
758
  if not response:
759
  results[note_number] = False
760
  continue
761
+
762
+ # Parse JSON response
763
  json_data, _ = self.extract_json_from_markdown(response)
764
  if json_data:
765
+ # Auto-fix and validate
766
+ json_data = self.validate_and_fix_json(json_data, note_number)
767
+ is_valid, validation_msg = self.validate_json_structure(json_data, note_number)
768
+
769
+ if is_valid:
770
+ json_data = convert_note_json_to_lakhs(json_data)
771
+ all_notes.append(json_data)
772
+ results[note_number] = True
773
+ logger.info(f"Note {note_number} processed successfully")
774
+ else:
775
+ logger.warning(f"Note {note_number} validation failed even after auto-fix: {validation_msg}")
776
+ # Still include it but mark as failed
777
+ json_data = convert_note_json_to_lakhs(json_data)
778
+ all_notes.append(json_data)
779
+ results[note_number] = False
780
  else:
781
+ logger.error(f"Note {note_number}: Could not parse JSON from response")
782
+ # Create fallback note with new structure
783
+ template = self.note_templates.get(note_number, {})
784
+ fallback_note = {
785
+ "title": template.get("title", f"Note {note_number}"),
786
+ "full_title": template.get("full_title", f"{note_number}. Financial Note"),
787
+ "structure": [
788
+ {
789
+ "category": "In Lakhs",
790
+ "subcategories": [
791
+ {"label": "March 31, 2024", "value": 0.00},
792
+ {"label": "March 31, 2023", "value": 0.00}
793
+ ]
794
+ },
795
+ {
796
+ "category": "Error",
797
+ "subcategories": [
798
+ {"label": "Failed to generate from LLM response", "value": 0.00, "previous_value": 0.00}
799
+ ],
800
+ "total": 0.00,
801
+ "previous_total": 0.00
802
+ }
803
+ ],
804
+ "metadata": {
805
+ "note_number": int(note_number) if note_number.isdigit() else note_number,
806
+ "generated_on": datetime.now().isoformat()
807
+ },
808
+ "assumptions": "LLM response parsing failed",
809
+ "error": "JSON parsing failed"
810
+ }
811
+ all_notes.append(fallback_note)
812
  results[note_number] = False
813
+
814
+ # Brief pause between API calls
815
  import time
816
+ time.sleep(2)
817
+
818
+ # Save all notes in consolidated file
819
  output_dir = settings.output_dir
820
  Path(output_dir).mkdir(parents=True, exist_ok=True)
821
+
822
+ consolidated_output = {
823
+ "notes": all_notes,
824
+ "generation_summary": {
825
+ "total_notes": len(self.note_templates),
826
+ "successful_notes": sum(1 for success in results.values() if success),
827
+ "failed_notes": sum(1 for success in results.values() if not success),
828
+ "generated_on": datetime.now().isoformat(),
829
+ "results": results
830
+ }
831
+ }
832
+
833
  with open(f"{output_dir}/notes.json", "w", encoding="utf-8") as f:
834
+ json.dump(consolidated_output, f, indent=2, ensure_ascii=False)
835
+
836
  successful = sum(1 for success in results.values() if success)
837
  total = len(results)
838
  logger.info(f"GENERATION SUMMARY: {successful}/{total} notes generated successfully")
839
  logger.info(f"All notes saved to {output_dir}/notes.json")
840
+
841
  return results
842
 
843
  def main() -> None:
844
  """Main function to run the flexible note generator"""
845
  try:
846
+ # Initialize generator
847
  generator = FlexibleFinancialNoteGenerator()
848
  if not generator.note_templates:
849
+ logger.error("No note templates loaded. Check notes_template.py")
850
  return
851
+
852
  logger.info(f"Loaded {len(generator.note_templates)} note templates")
853
+
854
+ # Check for command line arguments
855
+ if len(sys.argv) > 1:
856
+ # Command line mode
857
+ if len(sys.argv) < 3:
858
+ logger.error("Usage: python llm_notes_generator.py <mode> <note_numbers>")
859
+ logger.error(" mode: 'specific' or 'all'")
860
+ logger.error(" note_numbers: comma-separated note numbers (for specific mode)")
861
+ sys.exit(1)
862
+
863
+ mode = sys.argv[1].lower()
864
+ note_numbers = sys.argv[2] if len(sys.argv) > 2 else ""
865
+
866
+ if mode == "specific":
867
+ if not note_numbers:
868
+ logger.error("Note numbers required for specific mode")
869
+ sys.exit(1)
870
+
871
+ note_list = [n.strip() for n in note_numbers.split(",")]
872
+ all_notes = []
873
+ successful_notes = []
874
+
875
+ for note_number in note_list:
876
+ if note_number in generator.note_templates:
877
+ success = generator.generate_note(note_number)
878
+ if success:
879
+ # Load the generated note
880
+ try:
881
+ with open("data/generated_notes/notes.json", "r", encoding="utf-8") as f:
882
+ note_data = json.load(f)
883
+ all_notes.append(note_data)
884
+ successful_notes.append(note_number)
885
+ logger.info(f"Note {note_number} generated successfully")
886
+ except Exception as e:
887
+ logger.error(f"Failed to load generated note {note_number}: {e}")
888
+ else:
889
+ logger.error(f"Failed to generate note {note_number}")
890
+ else:
891
+ logger.error(f"Note {note_number} not found in templates")
892
+
893
+ # Save consolidated notes
894
+ if all_notes:
895
+ output_dir = settings.output_dir
896
+ Path(output_dir).mkdir(parents=True, exist_ok=True)
897
+ consolidated = {
898
+ "notes": all_notes,
899
+ "generation_summary": {
900
+ "requested_notes": note_list,
901
+ "successful_notes": successful_notes,
902
+ "total_successful": len(successful_notes),
903
+ "generated_on": datetime.now().isoformat()
904
+ }
905
+ }
906
+ with open(f"{output_dir}/notes.json", "w", encoding="utf-8") as f:
907
+ json.dump(consolidated, f, indent=2, ensure_ascii=False)
908
+ logger.info(f"Consolidated notes saved to {output_dir}/notes.json")
909
+
910
+ elif mode == "all":
911
+ results = generator.generate_all_notes()
912
+ successful = sum(1 for success in results.values() if success)
913
+ total = len(results)
914
+ logger.info(f"{successful}/{total} notes generated successfully")
915
+
916
+ # Print detailed results
917
+ for note, success in results.items():
918
+ status = "βœ… SUCCESS" if success else "❌ FAILED"
919
+ logger.info(f" Note {note}: {status}")
920
+
921
  else:
922
+ logger.error("Invalid mode. Use 'specific' or 'all'")
923
+ sys.exit(1)
924
+
 
 
 
925
  else:
926
+ # Interactive mode
927
+ choice = input("\nGenerate (1) specific note or (2) all notes? Enter 1 or 2: ").strip()
928
+
929
+ if choice == "1":
930
+ available_notes = list(generator.note_templates.keys())
931
+ print(f"Available notes: {', '.join(available_notes)}")
932
+ note_number = input("Enter note number: ").strip()
933
+
934
+ if note_number in available_notes:
935
+ success = generator.generate_note(note_number)
936
+ logger.info(f"Note {note_number} {'generated successfully' if success else 'generated with issues'}")
937
+ else:
938
+ logger.error(f"Note {note_number} not found")
939
+
940
+ elif choice == "2":
941
+ results = generator.generate_all_notes()
942
+ successful = sum(1 for success in results.values() if success)
943
+ total = len(results)
944
+ logger.info(f"{successful}/{total} notes generated successfully")
945
+
946
+ # Print summary
947
+ print("\n" + "="*50)
948
+ print("GENERATION SUMMARY")
949
+ print("="*50)
950
+ for note, success in results.items():
951
+ status = "βœ… SUCCESS" if success else "❌ FAILED"
952
+ print(f"Note {note}: {status}")
953
+ print("="*50)
954
+
955
+ else:
956
+ logger.error("Invalid choice. Enter 1 or 2.")
957
+
958
+ except KeyboardInterrupt:
959
+ logger.info("Generation interrupted by user")
960
+ sys.exit(0)
961
  except Exception as e:
962
  logger.error(f"Error: {e}", exc_info=True)
963
+ sys.exit(1)
964
 
965
  if __name__ == "__main__":
966
  main()