Dipan04 commited on
Commit
4cc66a9
Β·
1 Parent(s): a7cd9e6

Updated notes and fixes

Browse files
Files changed (4) hide show
  1. Dockerfile +3 -3
  2. app.py +1 -1
  3. docker-compose.yml +3 -3
  4. notes/llm_notes_generator.py +627 -253
Dockerfile CHANGED
@@ -41,11 +41,11 @@ ENV AGENT_TEMPERATURE=0.1
41
  ENV AGENT_MAX_TOKENS=2000
42
 
43
  # Expose the port
44
- EXPOSE 8000
45
 
46
  # Health check for the API
47
  HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
48
- CMD curl -f http://localhost:8000/docs || exit 1
49
 
50
  # Start FastAPI app
51
- CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
 
41
  ENV AGENT_MAX_TOKENS=2000
42
 
43
  # Expose the port
44
+ EXPOSE 7860
45
 
46
  # Health check for the API
47
  HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
48
+ CMD curl -f http://localhost:7860/docs || exit 1
49
 
50
  # Start FastAPI app
51
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py CHANGED
@@ -392,4 +392,4 @@ app.include_router(router)
392
 
393
  if __name__ == "__main__":
394
  import uvicorn
395
- uvicorn.run(app, host="0.0.0.0", port=8000)
 
392
 
393
  if __name__ == "__main__":
394
  import uvicorn
395
+ uvicorn.run(app, host="0.0.0.0", port=7860)
docker-compose.yml CHANGED
@@ -4,13 +4,13 @@ services:
4
  build: .
5
  container_name: finryver
6
  ports:
7
- - "8000:8000"
8
  volumes:
9
  # Bind mount entire project for live code edits (includes data & config)
10
  - .:/app
11
  environment:
12
  - PYTHONUNBUFFERED=1
13
- - PORT=8000
14
  # Default agent settings (override with .env file)
15
  - AGENT_MODEL=gpt-3.5-turbo
16
  - AGENT_TEMPERATURE=0.1
@@ -19,7 +19,7 @@ services:
19
  - .env
20
  restart: unless-stopped
21
  healthcheck:
22
- test: ["CMD", "curl", "-f", "http://localhost:8000/docs"]
23
  interval: 30s
24
  timeout: 10s
25
  retries: 3
 
4
  build: .
5
  container_name: finryver
6
  ports:
7
+ - "7860:7860"
8
  volumes:
9
  # Bind mount entire project for live code edits (includes data & config)
10
  - .:/app
11
  environment:
12
  - PYTHONUNBUFFERED=1
13
+ - PORT=7860
14
  # Default agent settings (override with .env file)
15
  - AGENT_MODEL=gpt-3.5-turbo
16
  - AGENT_TEMPERATURE=0.1
 
19
  - .env
20
  restart: unless-stopped
21
  healthcheck:
22
+ test: ["CMD", "curl", "-f", "http://localhost:7860/docs"]
23
  interval: 30s
24
  timeout: 10s
25
  retries: 3
notes/llm_notes_generator.py CHANGED
@@ -10,6 +10,7 @@ class FlexibleFinancialNoteGenerator:
10
  def generate_all_notes(self, trial_balance_path=None):
11
  # Placeholder logic
12
  return {"dummy": True}
 
13
  import json
14
  import os
15
  import logging
@@ -74,47 +75,14 @@ class FlexibleFinancialNoteGenerator:
74
  "X-Title": "Financial Note Generator"
75
  }
76
  self.note_templates = self.load_note_templates()
77
- self.account_patterns = self._init_account_patterns()
78
  self.recommended_models = [
79
- "mistralai/mixtral-8x7b-instruct",
80
- "mistralai/mistral-7b-instruct-v0.2"
 
 
81
  ]
82
 
83
- def _init_account_patterns(self) -> Dict[str, Dict[str, Any]]:
84
- """Initialize account classification patterns."""
85
- return {
86
- "10": {
87
- "keywords": ["security deposit", "long term advance", "deposit", "advance recoverable"],
88
- "groups": ["Long Term Loans and Advances", "Non-Current Assets"],
89
- "exclude_keywords": ["short term", "current", "trade"]
90
- },
91
- "11": {
92
- "keywords": ["inventory", "stock", "raw material", "finished goods", "work in progress", "consumables"],
93
- "groups": ["Inventories", "Current Assets"],
94
- "exclude_keywords": ["advance", "deposit"]
95
- },
96
- "12": {
97
- "keywords": ["trade receivable", "debtors", "accounts receivable", "sundry debtors"],
98
- "groups": ["Trade Receivables", "Current Assets"],
99
- "exclude_keywords": ["advance", "deposit"]
100
- },
101
- "13": {
102
- "keywords": ["cash", "bank", "petty cash", "cash on hand", "current account", "savings account", "fixed deposit"],
103
- "groups": ["Cash and Bank Balances", "Current Assets"],
104
- "exclude_keywords": ["advance", "loan"]
105
- },
106
- "14": {
107
- "keywords": ["prepaid", "advance", "short term", "employee advance", "supplier advance", "advance tax", "tds", "gst", "statutory"],
108
- "groups": ["Short Term Loans and Advances", "Current Assets"],
109
- "exclude_keywords": ["long term", "security deposit"]
110
- },
111
- "15": {
112
- "keywords": ["interest accrued", "accrued income", "other current", "miscellaneous current"],
113
- "groups": ["Other Current Assets", "Current Assets"],
114
- "exclude_keywords": ["trade", "advance"]
115
- }
116
- }
117
-
118
  def load_note_templates(self) -> Dict[str, Any]:
119
  """Load note templates from notes_template.py file."""
120
  try:
@@ -132,7 +100,7 @@ class FlexibleFinancialNoteGenerator:
132
  return {}
133
 
134
  def load_trial_balance(self, file_path: str = settings.trial_balance_json) -> Optional[Dict[str, Any]]:
135
- """Load the classified trial balance from Excel or JSON."""
136
  try:
137
  if file_path.endswith('.json'):
138
  with open(file_path, 'r', encoding='utf-8') as f:
@@ -161,186 +129,155 @@ class FlexibleFinancialNoteGenerator:
161
  logger.error(f"Error loading trial balance: {e}")
162
  return None
163
 
164
- def classify_accounts_by_note(self, trial_balance_data: Dict[str, Any], note_number: str) -> List[Dict[str, Any]]:
165
- """Classify accounts based on note number and patterns"""
166
- if not trial_balance_data or "accounts" not in trial_balance_data:
167
- return []
168
-
169
- classified_accounts = []
170
- patterns = self.account_patterns.get(note_number, {})
171
- keywords = patterns.get("keywords", [])
172
- groups = patterns.get("groups", [])
173
- exclude_keywords = patterns.get("exclude_keywords", [])
174
-
175
- for account in trial_balance_data["accounts"]:
176
- account_name = account.get("account_name", "").lower()
177
- account_group = account.get("group", "")
178
-
179
- if any(exclude_word.lower() in account_name for exclude_word in exclude_keywords):
180
- continue
181
-
182
- keyword_match = any(keyword.lower() in account_name for keyword in keywords)
183
- group_match = account_group in groups
184
-
185
- if keyword_match or group_match:
186
- classified_accounts.append(account)
187
-
188
- logger.info(f"Classified {len(classified_accounts)} accounts for Note {note_number}")
189
- return classified_accounts
190
-
191
- def safe_amount_conversion(self, amount: Any, conversion_factor: float = 100000) -> float:
192
- """Safely convert amount to lakhs"""
193
- try:
194
- if isinstance(amount, str):
195
- cleaned = re.sub(r'[^\d.-]', '', amount)
196
- amount_float = float(cleaned) if cleaned else 0.0
197
- else:
198
- amount_float = float(amount) if amount is not None else 0.0
199
- return round(amount_float / conversion_factor, 2)
200
- except (ValueError, TypeError):
201
- return 0.0
202
-
203
- def calculate_totals(self, accounts: List[Dict[str, Any]], conversion_factor: float = 100000) -> Tuple[float, float]:
204
- """Calculate totals with safe amount conversion"""
205
- total_amount = 0.0
206
- for account in accounts:
207
- amount = self.safe_amount_conversion(account.get("amount", 0), 1)
208
- total_amount += amount
209
- total_lakhs = round(total_amount / conversion_factor, 2)
210
- return total_amount, total_lakhs
211
-
212
- def categorize_accounts(self, accounts: List[Dict[str, Any]], note_number: str) -> Dict[str, List[Dict[str, Any]]]:
213
- """Categorize accounts based on note-specific rules"""
214
- categories = {
215
- "prepaid_expenses": [],
216
- "other_advances": [],
217
- "advance_tax": [],
218
- "statutory_balances": [],
219
- "uncategorized": []
220
- } if note_number == "14" else {}
221
-
222
- for account in accounts:
223
- account_name = account.get("account_name", "").lower()
224
- categorized = False
225
-
226
- if note_number == "14":
227
- if "prepaid" in account_name:
228
- categories["prepaid_expenses"].append(account)
229
- categorized = True
230
- elif any(word in account_name for word in ["advance tax", "tax advance", "income tax"]):
231
- categories["advance_tax"].append(account)
232
- categorized = True
233
- elif any(word in account_name for word in ["tds", "gst", "statutory", "government", "vat", "pf", "esi"]):
234
- categories["statutory_balances"].append(account)
235
- categorized = True
236
- elif any(word in account_name for word in ["advance", "deposit", "recoverable", "employee advance", "supplier advance"]):
237
- categories["other_advances"].append(account)
238
- categorized = True
239
-
240
- if not categorized:
241
- categories["uncategorized"].append(account)
242
-
243
- return categories
244
-
245
- def calculate_category_totals(self, categories: Dict[str, List[Dict[str, Any]]], conversion_factor: float = 100000) -> Tuple[Dict[str, Dict[str, Any]], float]:
246
- """Calculate totals for each category"""
247
- category_totals = {}
248
- grand_total = 0.0
249
-
250
- for category_name, accounts in categories.items():
251
- if not isinstance(accounts, list):
252
- continue
253
- total_amount = 0.0
254
- for account in accounts:
255
- amount = self.safe_amount_conversion(account.get("amount", 0), 1)
256
- total_amount += amount
257
- total_lakhs = round(total_amount / conversion_factor, 2)
258
- category_totals[category_name] = {
259
- "amount": total_amount,
260
- "lakhs": total_lakhs,
261
- "count": len(accounts),
262
- "accounts": [acc.get("account_name", "") for acc in accounts]
263
- }
264
- grand_total += total_amount
265
-
266
- return category_totals, round(grand_total / conversion_factor, 2)
267
-
268
- def build_llm_prompt(self, note_number: str, trial_balance_data: Dict[str, Any], classified_accounts: List[Dict[str, Any]]) -> Optional[str]:
269
- """Build dynamic LLM prompt based on note template and classified accounts"""
270
  if note_number not in self.note_templates:
271
  return None
272
 
273
  template = self.note_templates[note_number]
274
- total_amount, total_lakhs = self.calculate_totals(classified_accounts)
275
- categories = self.categorize_accounts(classified_accounts, note_number)
276
- category_totals, grand_total_lakhs = self.calculate_category_totals(categories)
277
 
 
278
  context = {
279
  "note_info": {
280
  "number": note_number,
281
  "title": template.get("title", ""),
282
  "full_title": template.get("full_title", "")
283
  },
284
- "financial_data": {
285
- "total_accounts": len(classified_accounts),
286
- "total_amount": total_amount,
287
- "total_lakhs": total_lakhs,
288
- "grand_total_lakhs": grand_total_lakhs
289
  },
290
- "categories": category_totals,
291
- "trial_balance": trial_balance_data,
292
  "current_date": datetime.now().strftime("%Y-%m-%d"),
293
  "financial_year": "2023-24"
294
  }
295
 
296
- prompt = (
297
- f"\nYou are a financial reporting AI system with two roles:\n"
298
- f"1. ACCOUNTANT β€” You extract, compute, and classify data from the financial context and trial balance.\n"
299
- f"2. AUDITOR β€” You review the Accountant’s output for accuracy, assumptions, and consistency with reporting standards.\n"
300
- f"\nYour task is to generate a financial note titled: \"{template['full_title']}\" strictly following the JSON structure below, based on the provided financial context and trial balance data.\n"
301
- f"\n---\n**CRITICAL RULES**\n"
302
- f"- Respond ONLY with a valid JSON object (no markdown, no explanations).\n"
303
- f"- If a value is unavailable or not calculable, use `0.0`.\n"
304
- f"- Strictly Convert all β‚Ή amounts to lakhs by dividing by 100000 and round to 2 decimal places.\n"
305
- f"- Ensure that category subtotals **match** the grand total.\n"
306
- f"- Return a key `markdown_content` containing a markdown-formatted table for this note.\n"
307
- f"- Validate that your JSON structure matches the `TEMPLATE STRUCTURE` exactly.\n"
308
- f"- Perform intelligent classification: if an entry from the trial balance clearly fits a category, assign it logically.\n"
309
- f"- If data is ambiguous, make a conservative estimate, and record it in an `assumptions` field inside the JSON.\n"
310
- f"\n---\n**REFLECTION**\n"
311
- f"- After generating the financial note, reflect on the process: Did you miss any data? Are there any uncertainties or assumptions that should be highlighted?\n"
312
- f"- Explicitly mention any limitations, ambiguities, or areas where further information would improve accuracy in the `assumptions` field.\n"
313
- f"\n**REFLEXION**\n"
314
- f"- Before finalizing the output, review your own reasoning and calculations. Double-check that all β‚Ή amounts are converted to lakhs and that category subtotals match the grand total.\n"
315
- f"- If you spot any inconsistencies or possible errors, correct them and note your corrections in the `assumptions` field.\n"
316
- f"\n**TALES**\n"
317
- f"- For each major category or unusual entry, briefly narrate (in the `assumptions` field) the story or logic behind its classification, especially if it required inference or was ambiguous.\n"
318
- f"- Use the `assumptions` field to share any tales of how you mapped trial balance entries to categories, including any conservative estimates or judgment calls.\n"
319
- f"\n---\n**TEMPLATE STRUCTURE**\n{json.dumps(template, indent=2)}\n"
320
- f"\n---\n**TRIAL BALANCE & CONTEXT**\n{json.dumps(context, indent=2)}\n"
321
- f"\n---\n**CATEGORY RULES FOR NOTE 14 (Short Term Loans and Advances):**\n"
322
- f"- Categorize entries under:\n"
323
- f" - Unsecured, considered good:\n"
324
- f" - Prepaid Expenses\n"
325
- f" - Other Advances\n"
326
- f" - Other loans and advances:\n"
327
- f" - Advance Tax\n"
328
- f" - Balances with statutory/government authorities\n"
329
- f"- Use logical inference to map trial balance entries into these subcategories\n"
330
- f"- If values for March 31, 2023 are missing, default to 0\n"
331
- f"- Ensure the sum of all subcategories = `Total`\n"
332
- f"\n---\n**REQUIRED OUTPUT JSON FORMAT**\n"
333
- f"- The JSON must include:\n"
334
- f" - All categories and subcategories with March 2024 and March 2023 values\n"
335
- f" - A computed `grand_total_lakhs`\n"
336
- f" - A `markdown_content` with the financial note table\n"
337
- f" - A `generated_on` timestamp\n"
338
- f" - An `assumptions` field (optional, if any data was inferred or missing)\n"
339
- f"\n---\nGenerate the final JSON now:\n"
340
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
341
 
342
  return prompt
343
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
344
  def call_openrouter_api(self, prompt: str) -> Optional[str]:
345
  """Make API call to OpenRouter with model fallback"""
346
  for model in self.recommended_models:
@@ -348,10 +285,13 @@ class FlexibleFinancialNoteGenerator:
348
  payload = {
349
  "model": model,
350
  "messages": [
351
- {"role": "system", "content": "You are a financial reporting expert. Always respond with valid JSON only."},
 
 
 
352
  {"role": "user", "content": prompt}
353
  ],
354
- "max_tokens": 8000,
355
  "temperature": 0.1,
356
  "top_p": 0.9
357
  }
@@ -360,13 +300,20 @@ class FlexibleFinancialNoteGenerator:
360
  self.api_url,
361
  headers=self.headers,
362
  json=payload,
363
- timeout=30 # <-- Add timeout here!
364
  )
365
  response.raise_for_status()
366
  result = response.json()
367
  content = result['choices'][0]['message']['content']
368
  logger.info(f"Successful response from {model}")
369
  return content
 
 
 
 
 
 
 
370
  except Exception as e:
371
  logger.error(f"Failed with {model}: {e}")
372
  continue
@@ -374,64 +321,385 @@ class FlexibleFinancialNoteGenerator:
374
  return None
375
 
376
  def extract_json_from_markdown(self, response_text: str) -> Tuple[Optional[Dict[str, Any]], Optional[str]]:
377
- """Extract JSON from response, handling markdown code blocks"""
378
  response_text = response_text.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
379
  json_patterns = [
380
  r'```json\s*(.*?)\s*```',
381
  r'```\s*(.*?)\s*```',
382
- r'(\{.*\})'
383
  ]
384
 
385
  for pattern in json_patterns:
386
  match = re.search(pattern, response_text, re.DOTALL)
387
  if match:
388
  try:
389
- json_data = json.loads(match.group(1))
390
- return json_data, match.group(1)
 
391
  except json.JSONDecodeError:
392
  continue
393
 
 
394
  try:
395
  json_data = json.loads(response_text)
396
  return json_data, response_text
397
  except json.JSONDecodeError:
 
 
 
 
 
 
 
 
 
 
 
398
  return None, None
399
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
400
  def save_generated_note(self, note_data: str, note_number: str, output_dir: str = settings.output_dir) -> bool:
401
- """Save the generated note to file in both JSON and markdown formats"""
402
  Path(output_dir).mkdir(parents=True, exist_ok=True)
403
  json_output_path = f"{output_dir}/notes.json"
404
  raw_output_path = f"{output_dir}/notes_raw.txt"
405
  formatted_md_path = f"{output_dir}/notes_formatted.md"
406
 
407
  try:
 
408
  with open(raw_output_path, 'w', encoding='utf-8') as f:
409
  f.write(note_data)
 
 
410
  json_data, json_string = self.extract_json_from_markdown(note_data)
 
411
  if json_data:
 
 
 
 
 
 
 
 
 
412
  json_data = convert_note_json_to_lakhs(json_data)
 
 
413
  with open(json_output_path, 'w', encoding='utf-8') as f:
414
  json.dump(json_data, f, indent=2, ensure_ascii=False)
415
  logger.info(f"JSON saved to {json_output_path}")
416
- md_content = json_data.get('markdown_content')
 
 
417
  if not md_content:
418
- md_content = f"# Note {note_number}\n\n```json\n{json.dumps(json_data, indent=2)}\n```"
 
 
 
419
  with open(formatted_md_path, 'w', encoding='utf-8') as f:
420
  f.write(md_content)
 
421
  return True
422
  else:
 
 
423
  fallback_json = {
424
- "note_number": note_number,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
425
  "raw_response": note_data,
426
- "error": "Could not parse JSON from response",
427
- "generated_on": datetime.now().isoformat()
428
  }
 
429
  with open(json_output_path, 'w', encoding='utf-8') as f:
430
  json.dump(fallback_json, f, indent=2, ensure_ascii=False)
431
- logger.warning(f"Fallback JSON saved to {json_output_path}")
432
  return False
 
433
  except Exception as e:
434
  logger.error(f"Error saving files: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
435
  return False
436
 
437
  def generate_note(self, note_number: str, trial_balance_path: str = settings.trial_balance_json) -> bool:
@@ -441,21 +709,25 @@ class FlexibleFinancialNoteGenerator:
441
  return False
442
 
443
  logger.info(f"Starting Note {note_number} generation...")
 
 
444
  trial_balance = self.load_trial_balance(trial_balance_path)
445
  if not trial_balance:
446
  return False
447
 
448
- classified_accounts = self.classify_accounts_by_note(trial_balance, note_number)
449
- prompt = self.build_llm_prompt(note_number, trial_balance, classified_accounts)
450
  if not prompt:
451
  logger.error("Failed to build prompt")
452
  return False
453
 
 
454
  response = self.call_openrouter_api(prompt)
455
  if not response:
456
  logger.error("Failed to get API response")
457
  return False
458
 
 
459
  success = self.save_generated_note(response, note_number)
460
  logger.info(f"Note {note_number} {'generated successfully' if success else 'generated with issues'}")
461
  return success
@@ -465,43 +737,120 @@ class FlexibleFinancialNoteGenerator:
465
  logger.info(f"Starting generation of all {len(self.note_templates)} notes...")
466
  results = {}
467
  all_notes = []
 
 
 
 
 
 
 
468
  for note_number in self.note_templates.keys():
469
  logger.info(f"Processing Note {note_number}")
470
- trial_balance = self.load_trial_balance(trial_balance_path)
471
- if not trial_balance:
472
- results[note_number] = False
473
- continue
474
- classified_accounts = self.classify_accounts_by_note(trial_balance, note_number)
475
- prompt = self.build_llm_prompt(note_number, trial_balance, classified_accounts)
476
  if not prompt:
477
  results[note_number] = False
478
  continue
 
 
479
  response = self.call_openrouter_api(prompt)
480
  if not response:
481
  results[note_number] = False
482
  continue
 
 
483
  json_data, _ = self.extract_json_from_markdown(response)
484
  if json_data:
485
- all_notes.append(json_data)
486
- results[note_number] = True
 
 
 
 
 
 
 
 
 
 
 
 
 
487
  else:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
488
  results[note_number] = False
 
 
489
  import time
490
- time.sleep(1)
491
- # Save all notes in one file
 
492
  output_dir = settings.output_dir
493
  Path(output_dir).mkdir(parents=True, exist_ok=True)
 
 
 
 
 
 
 
 
 
 
 
 
494
  with open(f"{output_dir}/notes.json", "w", encoding="utf-8") as f:
495
- json.dump({"notes": all_notes}, f, indent=2, ensure_ascii=False)
 
496
  successful = sum(1 for success in results.values() if success)
497
  total = len(results)
498
  logger.info(f"GENERATION SUMMARY: {successful}/{total} notes generated successfully")
499
  logger.info(f"All notes saved to {output_dir}/notes.json")
 
500
  return results
501
 
502
  def main() -> None:
503
  """Main function to run the flexible note generator"""
504
  try:
 
 
 
 
 
 
 
 
505
  # Check for command line arguments
506
  if len(sys.argv) > 1:
507
  # Command line mode
@@ -514,13 +863,6 @@ def main() -> None:
514
  mode = sys.argv[1].lower()
515
  note_numbers = sys.argv[2] if len(sys.argv) > 2 else ""
516
 
517
- generator = FlexibleFinancialNoteGenerator()
518
- if not generator.note_templates:
519
- logger.error("No note templates loaded. Check app/new.py")
520
- sys.exit(1)
521
-
522
- logger.info(f"Loaded {len(generator.note_templates)} note templates")
523
-
524
  if mode == "specific":
525
  if not note_numbers:
526
  logger.error("Note numbers required for specific mode")
@@ -528,65 +870,97 @@ def main() -> None:
528
 
529
  note_list = [n.strip() for n in note_numbers.split(",")]
530
  all_notes = []
 
531
 
532
  for note_number in note_list:
533
  if note_number in generator.note_templates:
534
  success = generator.generate_note(note_number)
535
  if success:
536
  # Load the generated note
537
- with open("data/generated_notes/notes.json", "r", encoding="utf-8") as f:
538
- note_data = json.load(f)
539
- all_notes.append(note_data)
540
- logger.info(f"Note {note_number} generated successfully")
 
 
 
 
541
  else:
542
  logger.error(f"Failed to generate note {note_number}")
543
  else:
544
  logger.error(f"Note {note_number} not found in templates")
545
 
546
- # Save all notes
547
- output_dir = settings.output_dir
548
- Path(output_dir).mkdir(parents=True, exist_ok=True)
549
- with open(f"{output_dir}/notes.json", "w", encoding="utf-8") as f:
550
- json.dump({"notes": all_notes}, f, indent=2, ensure_ascii=False)
551
- logger.info(f"All notes saved to {output_dir}/notes.json")
 
 
 
 
 
 
 
 
 
 
552
 
553
  elif mode == "all":
554
  results = generator.generate_all_notes()
555
  successful = sum(1 for success in results.values() if success)
556
  total = len(results)
557
  logger.info(f"{successful}/{total} notes generated successfully")
 
 
 
 
 
 
558
  else:
559
  logger.error("Invalid mode. Use 'specific' or 'all'")
560
  sys.exit(1)
 
561
  else:
562
- # Interactive mode (original behavior)
563
- generator = FlexibleFinancialNoteGenerator()
564
- if not generator.note_templates:
565
- logger.error("No note templates loaded. Check app/new.py")
566
- return
567
-
568
- logger.info(f"Loaded {len(generator.note_templates)} note templates")
569
  choice = input("\nGenerate (1) specific note or (2) all notes? Enter 1 or 2: ").strip()
570
 
571
  if choice == "1":
572
  available_notes = list(generator.note_templates.keys())
573
  print(f"Available notes: {', '.join(available_notes)}")
574
  note_number = input("Enter note number: ").strip()
 
575
  if note_number in available_notes:
576
  success = generator.generate_note(note_number)
577
  logger.info(f"Note {note_number} {'generated successfully' if success else 'generated with issues'}")
578
  else:
579
  logger.error(f"Note {note_number} not found")
 
580
  elif choice == "2":
581
  results = generator.generate_all_notes()
582
  successful = sum(1 for success in results.values() if success)
583
  total = len(results)
584
  logger.info(f"{successful}/{total} notes generated successfully")
 
 
 
 
 
 
 
 
 
 
585
  else:
586
  logger.error("Invalid choice. Enter 1 or 2.")
 
 
 
 
587
  except Exception as e:
588
  logger.error(f"Error: {e}", exc_info=True)
589
  sys.exit(1)
590
 
591
  if __name__ == "__main__":
592
- main()
 
10
  def generate_all_notes(self, trial_balance_path=None):
11
  # Placeholder logic
12
  return {"dummy": True}
13
+
14
  import json
15
  import os
16
  import logging
 
75
  "X-Title": "Financial Note Generator"
76
  }
77
  self.note_templates = self.load_note_templates()
78
+ # Updated model list with DeepSeek as first choice
79
  self.recommended_models = [
80
+
81
+ "deepseek/deepseek-r1",
82
+ #"deepseek/deepseek-coder",
83
+ "mistralai/mixtral-8x7b-instruct"
84
  ]
85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  def load_note_templates(self) -> Dict[str, Any]:
87
  """Load note templates from notes_template.py file."""
88
  try:
 
100
  return {}
101
 
102
  def load_trial_balance(self, file_path: str = settings.trial_balance_json) -> Optional[Dict[str, Any]]:
103
+ """Load the complete trial balance from Excel or JSON."""
104
  try:
105
  if file_path.endswith('.json'):
106
  with open(file_path, 'r', encoding='utf-8') as f:
 
129
  logger.error(f"Error loading trial balance: {e}")
130
  return None
131
 
132
+ def build_llm_prompt(self, note_number: str, trial_balance_data: Dict[str, Any]) -> Optional[str]:
133
+ """Build comprehensive LLM prompt with strict JSON output requirements"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  if note_number not in self.note_templates:
135
  return None
136
 
137
  template = self.note_templates[note_number]
138
+ all_accounts = trial_balance_data.get("accounts", [])
 
 
139
 
140
+ # Build context with full trial balance
141
  context = {
142
  "note_info": {
143
  "number": note_number,
144
  "title": template.get("title", ""),
145
  "full_title": template.get("full_title", "")
146
  },
147
+ "trial_balance": {
148
+ "total_accounts": len(all_accounts),
149
+ "accounts": all_accounts
 
 
150
  },
 
 
151
  "current_date": datetime.now().strftime("%Y-%m-%d"),
152
  "financial_year": "2023-24"
153
  }
154
 
155
+ # Get note-specific classification guidance
156
+ classification_guide = self._get_classification_guide(note_number)
157
+
158
+ prompt = f"""You are a senior financial analyst and chartered accountant with expertise in Indian accounting standards and Schedule III of the Companies Act 2013.
159
+
160
+ πŸ”΄ CRITICAL INSTRUCTIONS - MUST FOLLOW EXACTLY:
161
+ 1. OUTPUT ONLY VALID JSON - NO MARKDOWN, NO EXPLANATIONS, NO TEXT OUTSIDE JSON
162
+ 2. START YOUR RESPONSE WITH {{ and END WITH }}
163
+ 3. DO NOT USE ```json``` CODE BLOCKS
164
+ 4. DO NOT ADD ANY COMMENTARY OR EXPLANATIONS
165
+
166
+ πŸ”΄ REQUIRED JSON STRUCTURE - ALL FIELDS MANDATORY:
167
+ {{
168
+ "title": "{template.get('title', '')}",
169
+ "full_title": "{template.get('full_title', '')}",
170
+ "structure": [
171
+ {{
172
+ "category": "In Lakhs",
173
+ "subcategories": [
174
+ {{ "label": "March 31, 2024", "value": 0.00 }},
175
+ {{ "label": "March 31, 2023", "value": 0.00 }}
176
+ ]
177
+ }},
178
+ {{
179
+ "category": "Category Name",
180
+ "subcategories": [
181
+ {{ "label": "Subcategory Item", "value": 0.00, "previous_value": 0.00 }}
182
+ ],
183
+ "total": 0.00,
184
+ "previous_total": 0.00
185
+ }}
186
+ ],
187
+ "metadata": {{
188
+ "note_number": {note_number},
189
+ "generated_on": "{datetime.now().isoformat()}"
190
+ }},
191
+ "assumptions": "List any assumptions made during classification"
192
+ }}
193
+
194
+ πŸ”΄ STRUCTURE ARRAY EXPLAINED:
195
+ - First element: Header row with column labels (March 31, 2024, March 31, 2023)
196
+ - Subsequent elements: Data categories with subcategories
197
+ - Each data category must have:
198
+ * "category": Main category name
199
+ * "subcategories": Array of line items with "label", "value", "previous_value"
200
+ * "total": Sum of current year values in subcategories
201
+ * "previous_total": Sum of previous year values in subcategories
202
+
203
+ πŸ”΄ YOUR TASK:
204
+ 1. Analyze ALL trial balance accounts provided below
205
+ 2. Identify accounts that belong to "{template['full_title']}"
206
+ 3. Classify into appropriate subcategories per Schedule III
207
+ 4. Convert all amounts to lakhs (β‚Ή Γ· 100,000) with 2 decimal places
208
+ 5. Calculate accurate totals ensuring mathematical consistency
209
+ 6. Structure output in hierarchical "structure" array format
210
+
211
+ πŸ”΄ MATHEMATICAL REQUIREMENTS:
212
+ - All amounts MUST be in lakhs (divide original by 100,000)
213
+ - All subtotals MUST equal the grand total exactly
214
+ - Use 0.00 for March 2023 if data missing
215
+ - Round to 2 decimal places consistently
216
+ - Ensure "total" = sum of "value" in subcategories
217
+ - Ensure "previous_total" = sum of "previous_value" in subcategories
218
+
219
+ πŸ”΄ CLASSIFICATION GUIDANCE FOR NOTE {note_number}:
220
+ {classification_guide}
221
+
222
+ πŸ”΄ COMPLETE TRIAL BALANCE DATA:
223
+ {json.dumps(context, indent=2)}
224
+
225
+ πŸ”΄ TEMPLATE STRUCTURE TO FOLLOW:
226
+ {json.dumps(template, indent=2)}
227
+
228
+ πŸ”΄ VALIDATION RULES:
229
+ - If no accounts match this note category, use empty categories with 0.00 totals
230
+ - Ensure "metadata.note_number" exactly matches {note_number}
231
+ - Document classification logic in "assumptions" field
232
+ - Structure must have at least 2 elements (header + data)
233
+
234
+ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
235
 
236
  return prompt
237
+
238
+ def _get_classification_guide(self, note_number: str) -> str:
239
+ """Get note-specific classification guidance"""
240
+ guides = {
241
+ "10": """
242
+ **Note 10 - Long Term Loans and Advances:**
243
+ - Include: Security deposits, long-term advances to suppliers/employees, deposits with utilities
244
+ - Categories: Unsecured considered good, Unsecured considered doubtful, Doubtful (provision)
245
+ - Exclude: Short-term advances, trade receivables, prepaid expenses under 1 year
246
+ """,
247
+ "11": """
248
+ **Note 11 - Inventories:**
249
+ - Include: Raw materials, work-in-progress, finished goods, stores and spares, consumables
250
+ - Value at lower of cost or net realizable value
251
+ - Exclude: Advances for inventory purchases (classify as advances)
252
+ """,
253
+ "12": """
254
+ **Note 12 - Trade Receivables:**
255
+ - Include: Amounts due from customers for goods/services, bills receivable
256
+ - Categories: Unsecured considered good, Unsecured considered doubtful, Provision for doubtful debts
257
+ - Exclude: Advances, deposits, other receivables
258
+ """,
259
+ "13": """
260
+ **Note 13 - Cash and Cash Equivalents:**
261
+ - Include: Cash on hand, balances with banks (current/savings), short-term deposits (≀3 months)
262
+ - Separate: Cash and cash equivalents vs Other bank balances (FDs >3 months)
263
+ - Show: Balances in current accounts, savings accounts, fixed deposits separately
264
+ """,
265
+ "14": """
266
+ **Note 14 - Short Term Loans and Advances:**
267
+ - Include: Prepaid expenses, advances to suppliers, employee advances, advance tax, TDS receivable
268
+ - Categories:
269
+ * Unsecured, considered good: Prepaid expenses, Other advances
270
+ * Other loans and advances: Advance tax, Balances with statutory/govt authorities
271
+ - Exclude: Long-term advances, trade receivables
272
+ """,
273
+ "15": """
274
+ **Note 15 - Other Current Assets:**
275
+ - Include: Interest accrued, export incentives receivable, insurance claims, other miscellaneous current assets
276
+ - Exclude: Items that fit into specific categories like trade receivables, advances, cash
277
+ """
278
+ }
279
+ return guides.get(note_number, f"**Note {note_number}:** Classify accounts logically based on their nature and the note title.")
280
+
281
  def call_openrouter_api(self, prompt: str) -> Optional[str]:
282
  """Make API call to OpenRouter with model fallback"""
283
  for model in self.recommended_models:
 
285
  payload = {
286
  "model": model,
287
  "messages": [
288
+ {
289
+ "role": "system",
290
+ "content": "You are an expert chartered accountant specializing in Indian accounting standards. You MUST respond with ONLY valid JSON, never with markdown code blocks or explanations. Start with { and end with }."
291
+ },
292
  {"role": "user", "content": prompt}
293
  ],
294
+ "max_tokens": 12000,
295
  "temperature": 0.1,
296
  "top_p": 0.9
297
  }
 
300
  self.api_url,
301
  headers=self.headers,
302
  json=payload,
303
+ timeout=60
304
  )
305
  response.raise_for_status()
306
  result = response.json()
307
  content = result['choices'][0]['message']['content']
308
  logger.info(f"Successful response from {model}")
309
  return content
310
+ except requests.exceptions.HTTPError as e:
311
+ if e.response.status_code == 404:
312
+ logger.warning(f"Model {model} not found (404), trying next model")
313
+ elif e.response.status_code == 402:
314
+ logger.warning(f"Model {model} requires payment (402), trying next model")
315
+ else:
316
+ logger.error(f"HTTP error with {model}: {e}")
317
  except Exception as e:
318
  logger.error(f"Failed with {model}: {e}")
319
  continue
 
321
  return None
322
 
323
  def extract_json_from_markdown(self, response_text: str) -> Tuple[Optional[Dict[str, Any]], Optional[str]]:
324
+ """Extract JSON from response, handling markdown code blocks and cleaning"""
325
  response_text = response_text.strip()
326
+
327
+ # CRITICAL FIX: Handle concatenated/duplicate JSON (e.g., "}{\n{")
328
+ # Find the first complete JSON object
329
+ json_objects = []
330
+ brace_count = 0
331
+ start_idx = -1
332
+
333
+ for i, char in enumerate(response_text):
334
+ if char == '{':
335
+ if brace_count == 0:
336
+ start_idx = i
337
+ brace_count += 1
338
+ elif char == '}':
339
+ brace_count -= 1
340
+ if brace_count == 0 and start_idx != -1:
341
+ # Found complete JSON object
342
+ potential_json = response_text[start_idx:i+1]
343
+ try:
344
+ parsed = json.loads(potential_json)
345
+ json_objects.append((parsed, potential_json))
346
+ # Use the first valid JSON object
347
+ break
348
+ except json.JSONDecodeError:
349
+ continue
350
+
351
+ if json_objects:
352
+ logger.info("Successfully extracted first valid JSON object from response")
353
+ return json_objects[0]
354
+
355
+ # Fallback: Try original extraction methods
356
+ # Remove any leading/trailing text outside JSON
357
  json_patterns = [
358
  r'```json\s*(.*?)\s*```',
359
  r'```\s*(.*?)\s*```',
360
+ r'(\{.*?\})'
361
  ]
362
 
363
  for pattern in json_patterns:
364
  match = re.search(pattern, response_text, re.DOTALL)
365
  if match:
366
  try:
367
+ json_content = match.group(1).strip()
368
+ json_data = json.loads(json_content)
369
+ return json_data, json_content
370
  except json.JSONDecodeError:
371
  continue
372
 
373
+ # Try parsing the entire response as JSON
374
  try:
375
  json_data = json.loads(response_text)
376
  return json_data, response_text
377
  except json.JSONDecodeError:
378
+ # Last attempt: find JSON-like structure
379
+ try:
380
+ start = response_text.find('{')
381
+ end = response_text.rfind('}') + 1
382
+ if start != -1 and end > start:
383
+ json_part = response_text[start:end]
384
+ json_data = json.loads(json_part)
385
+ return json_data, json_part
386
+ except json.JSONDecodeError:
387
+ pass
388
+
389
  return None, None
390
 
391
+ def validate_and_fix_json(self, json_data: Dict[str, Any], note_number: str) -> Dict[str, Any]:
392
+ """Validate JSON structure and auto-fix missing required fields"""
393
+ fixed_data = json_data.copy()
394
+
395
+ # Get template for this note
396
+ template = self.note_templates.get(note_number, {})
397
+
398
+ # Auto-fix title fields
399
+ if "title" not in fixed_data or not fixed_data["title"]:
400
+ fixed_data["title"] = template.get("title", f"Note {note_number}")
401
+ logger.info(f"Auto-fixed missing title field")
402
+
403
+ if "full_title" not in fixed_data or not fixed_data["full_title"]:
404
+ fixed_data["full_title"] = template.get("full_title", f"{note_number}. {fixed_data.get('title', 'Financial Note')}")
405
+ logger.info(f"Auto-fixed missing full_title field")
406
+
407
+ # Auto-fix or create metadata
408
+ if "metadata" not in fixed_data or not isinstance(fixed_data["metadata"], dict):
409
+ fixed_data["metadata"] = {}
410
+ logger.info("Auto-created metadata object")
411
+
412
+ # CRITICAL FIX: Ensure note_number is correct integer, not 0.0
413
+ metadata_note_num = fixed_data["metadata"].get("note_number")
414
+ try:
415
+ # Convert note_number string to int
416
+ expected_note_num = int(note_number)
417
+
418
+ # Check if metadata note_number is wrong (0, 0.0, or mismatch)
419
+ if (metadata_note_num is None or
420
+ metadata_note_num == 0 or
421
+ metadata_note_num == 0.0 or
422
+ int(metadata_note_num) != expected_note_num):
423
+
424
+ fixed_data["metadata"]["note_number"] = expected_note_num
425
+ logger.info(f"Auto-corrected metadata.note_number from {metadata_note_num} to {expected_note_num}")
426
+ except ValueError:
427
+ fixed_data["metadata"]["note_number"] = note_number
428
+ logger.info(f"Auto-set metadata.note_number to string: {note_number}")
429
+
430
+ if "generated_on" not in fixed_data["metadata"]:
431
+ fixed_data["metadata"]["generated_on"] = datetime.now().isoformat()
432
+ logger.info("Auto-fixed missing metadata.generated_on field")
433
+
434
+ # Auto-fix or create structure array
435
+ if "structure" not in fixed_data or not isinstance(fixed_data["structure"], list):
436
+ logger.warning("Structure array missing, creating default structure")
437
+ fixed_data["structure"] = [
438
+ {
439
+ "category": "In Lakhs",
440
+ "subcategories": [
441
+ {"label": "March 31, 2024", "value": 0.00},
442
+ {"label": "March 31, 2023", "value": 0.00}
443
+ ]
444
+ },
445
+ {
446
+ "category": "No data available",
447
+ "subcategories": [
448
+ {"label": "Items", "value": 0.00, "previous_value": 0.00}
449
+ ],
450
+ "total": 0.00,
451
+ "previous_total": 0.00
452
+ }
453
+ ]
454
+ else:
455
+ # Validate and fix structure elements
456
+ if len(fixed_data["structure"]) == 0:
457
+ logger.warning("Empty structure array, adding default elements")
458
+ fixed_data["structure"] = [
459
+ {
460
+ "category": "In Lakhs",
461
+ "subcategories": [
462
+ {"label": "March 31, 2024", "value": 0.00},
463
+ {"label": "March 31, 2023", "value": 0.00}
464
+ ]
465
+ }
466
+ ]
467
+
468
+ # Ensure each structure element has required fields
469
+ for i, struct_elem in enumerate(fixed_data["structure"]):
470
+ if not isinstance(struct_elem, dict):
471
+ continue
472
+
473
+ if "category" not in struct_elem:
474
+ struct_elem["category"] = f"Category {i}"
475
+
476
+ if "subcategories" not in struct_elem or not isinstance(struct_elem["subcategories"], list):
477
+ struct_elem["subcategories"] = []
478
+
479
+ # For data rows (not header), ensure totals exist
480
+ if i > 0 and struct_elem.get("subcategories"):
481
+ if "total" not in struct_elem:
482
+ struct_elem["total"] = sum(
483
+ sub.get("value", 0.0)
484
+ for sub in struct_elem["subcategories"]
485
+ if isinstance(sub, dict)
486
+ )
487
+
488
+ if "previous_total" not in struct_elem:
489
+ struct_elem["previous_total"] = sum(
490
+ sub.get("previous_value", 0.0)
491
+ for sub in struct_elem["subcategories"]
492
+ if isinstance(sub, dict)
493
+ )
494
+
495
+ # Auto-fix assumptions
496
+ if "assumptions" not in fixed_data:
497
+ fixed_data["assumptions"] = "Classification based on account names and standard accounting practices"
498
+ logger.info("Auto-added default assumptions")
499
+
500
+ return fixed_data
501
+
502
+ def validate_json_structure(self, json_data: Dict[str, Any], note_number: str) -> Tuple[bool, str]:
503
+ """Validate that the JSON matches expected structure"""
504
+ required_fields = ["title", "full_title", "structure", "metadata", "assumptions"]
505
+
506
+ # Check required fields
507
+ missing_fields = []
508
+ for field in required_fields:
509
+ if field not in json_data:
510
+ missing_fields.append(field)
511
+
512
+ if missing_fields:
513
+ return False, f"Missing required fields: {', '.join(missing_fields)}"
514
+
515
+ # Check metadata structure
516
+ if not isinstance(json_data.get("metadata"), dict):
517
+ return False, "metadata must be an object"
518
+
519
+ metadata = json_data["metadata"]
520
+ if "note_number" not in metadata:
521
+ return False, "metadata.note_number is required"
522
+
523
+ if str(metadata.get("note_number", "")) != str(note_number):
524
+ return False, f"Note number mismatch: expected {note_number}, got {metadata.get('note_number')}"
525
+
526
+ # Check structure array
527
+ if not isinstance(json_data.get("structure"), list):
528
+ return False, "structure must be an array"
529
+
530
+ if len(json_data["structure"]) == 0:
531
+ return False, "structure array cannot be empty"
532
+
533
+ return True, "Validation passed"
534
+
535
+ def _generate_markdown_from_structure(self, json_data: Dict[str, Any]) -> str:
536
+ """Generate markdown table from structure array"""
537
+ try:
538
+ title = json_data.get("full_title", json_data.get("title", "Financial Note"))
539
+ structure = json_data.get("structure", [])
540
+
541
+ if not structure:
542
+ return f"# {title}\n\n*No data available*"
543
+
544
+ # Start markdown
545
+ md_lines = [f"# {title}\n"]
546
+
547
+ # Get header row (first element)
548
+ header_elem = structure[0] if len(structure) > 0 else None
549
+ if header_elem and header_elem.get("subcategories"):
550
+ headers = [sub.get("label", "") for sub in header_elem["subcategories"]]
551
+ md_lines.append("| Particulars | " + " | ".join(headers) + " |")
552
+ md_lines.append("|" + "---|" * (len(headers) + 1))
553
+
554
+ # Process data rows
555
+ for i in range(1, len(structure)):
556
+ elem = structure[i]
557
+ category = elem.get("category", "")
558
+ subcategories = elem.get("subcategories", [])
559
+
560
+ # Add category header if exists
561
+ if category:
562
+ md_lines.append(f"\n**{category}**\n")
563
+
564
+ # Add subcategory rows
565
+ for sub in subcategories:
566
+ label = sub.get("label", "")
567
+ value = sub.get("value", 0.00)
568
+ previous_value = sub.get("previous_value", 0.00)
569
+ md_lines.append(f"| {label} | {value:.2f} | {previous_value:.2f} |")
570
+
571
+ # Add total row if exists
572
+ if "total" in elem:
573
+ total = elem.get("total", 0.00)
574
+ previous_total = elem.get("previous_total", 0.00)
575
+ md_lines.append(f"| **Total {category}** | **{total:.2f}** | **{previous_total:.2f}** |")
576
+
577
+ # Add metadata
578
+ metadata = json_data.get("metadata", {})
579
+ md_lines.append(f"\n\n*Generated on: {metadata.get('generated_on', 'Unknown')}*")
580
+
581
+ # Add assumptions if present
582
+ assumptions = json_data.get("assumptions", "")
583
+ if assumptions:
584
+ md_lines.append(f"\n\n**Assumptions:** {assumptions}")
585
+
586
+ return "\n".join(md_lines)
587
+
588
+ except Exception as e:
589
+ logger.error(f"Error generating markdown from structure: {e}")
590
+ return f"# {json_data.get('full_title', 'Financial Note')}\n\n*Error generating markdown table*"
591
+
592
  def save_generated_note(self, note_data: str, note_number: str, output_dir: str = settings.output_dir) -> bool:
593
+ """Save the generated note to file with robust validation and auto-fixing"""
594
  Path(output_dir).mkdir(parents=True, exist_ok=True)
595
  json_output_path = f"{output_dir}/notes.json"
596
  raw_output_path = f"{output_dir}/notes_raw.txt"
597
  formatted_md_path = f"{output_dir}/notes_formatted.md"
598
 
599
  try:
600
+ # Always save raw response for debugging
601
  with open(raw_output_path, 'w', encoding='utf-8') as f:
602
  f.write(note_data)
603
+
604
+ # Extract and validate JSON
605
  json_data, json_string = self.extract_json_from_markdown(note_data)
606
+
607
  if json_data:
608
+ # Auto-fix missing or incorrect fields
609
+ json_data = self.validate_and_fix_json(json_data, note_number)
610
+
611
+ # Final validation
612
+ is_valid, validation_msg = self.validate_json_structure(json_data, note_number)
613
+ if not is_valid:
614
+ logger.warning(f"JSON validation warning after auto-fix: {validation_msg}")
615
+
616
+ # Convert to lakhs if needed
617
  json_data = convert_note_json_to_lakhs(json_data)
618
+
619
+ # Save JSON
620
  with open(json_output_path, 'w', encoding='utf-8') as f:
621
  json.dump(json_data, f, indent=2, ensure_ascii=False)
622
  logger.info(f"JSON saved to {json_output_path}")
623
+
624
+ # Generate and save markdown
625
+ md_content = json_data.get('markdown_content', '')
626
  if not md_content:
627
+ # Generate markdown from structure
628
+ md_content = self._generate_markdown_from_structure(json_data)
629
+ logger.info("Auto-generated markdown from structure array")
630
+
631
  with open(formatted_md_path, 'w', encoding='utf-8') as f:
632
  f.write(md_content)
633
+
634
  return True
635
  else:
636
+ # Create fallback JSON with all required fields
637
+ template = self.note_templates.get(note_number, {})
638
  fallback_json = {
639
+ "title": template.get("title", f"Note {note_number}"),
640
+ "full_title": template.get("full_title", f"{note_number}. Financial Note"),
641
+ "structure": [
642
+ {
643
+ "category": "In Lakhs",
644
+ "subcategories": [
645
+ {"label": "March 31, 2024", "value": 0.00},
646
+ {"label": "March 31, 2023", "value": 0.00}
647
+ ]
648
+ },
649
+ {
650
+ "category": "Error - No data",
651
+ "subcategories": [
652
+ {"label": "Could not parse response", "value": 0.00, "previous_value": 0.00}
653
+ ],
654
+ "total": 0.00,
655
+ "previous_total": 0.00
656
+ }
657
+ ],
658
+ "metadata": {
659
+ "note_number": int(note_number) if note_number.isdigit() else note_number,
660
+ "generated_on": datetime.now().isoformat()
661
+ },
662
+ "assumptions": "Failed to parse LLM response",
663
  "raw_response": note_data,
664
+ "error": "Could not parse JSON from response"
 
665
  }
666
+
667
  with open(json_output_path, 'w', encoding='utf-8') as f:
668
  json.dump(fallback_json, f, indent=2, ensure_ascii=False)
669
+ logger.warning(f"Fallback JSON with required fields saved to {json_output_path}")
670
  return False
671
+
672
  except Exception as e:
673
  logger.error(f"Error saving files: {e}")
674
+
675
+ # Emergency fallback
676
+ try:
677
+ template = self.note_templates.get(note_number, {})
678
+ emergency_json = {
679
+ "title": template.get("title", f"Note {note_number}"),
680
+ "full_title": template.get("full_title", f"{note_number}. Financial Note"),
681
+ "structure": [
682
+ {
683
+ "category": "In Lakhs",
684
+ "subcategories": [
685
+ {"label": "March 31, 2024", "value": 0.00},
686
+ {"label": "March 31, 2023", "value": 0.00}
687
+ ]
688
+ }
689
+ ],
690
+ "metadata": {
691
+ "note_number": int(note_number) if note_number.isdigit() else note_number,
692
+ "generated_on": datetime.now().isoformat()
693
+ },
694
+ "assumptions": "Emergency fallback due to processing error",
695
+ "error": str(e)
696
+ }
697
+ with open(json_output_path, 'w', encoding='utf-8') as f:
698
+ json.dump(emergency_json, f, indent=2, ensure_ascii=False)
699
+ logger.info(f"Emergency fallback JSON saved to {json_output_path}")
700
+ except Exception as emergency_error:
701
+ logger.error(f"Emergency fallback also failed: {emergency_error}")
702
+
703
  return False
704
 
705
  def generate_note(self, note_number: str, trial_balance_path: str = settings.trial_balance_json) -> bool:
 
709
  return False
710
 
711
  logger.info(f"Starting Note {note_number} generation...")
712
+
713
+ # Load complete trial balance
714
  trial_balance = self.load_trial_balance(trial_balance_path)
715
  if not trial_balance:
716
  return False
717
 
718
+ # Build prompt with full trial balance
719
+ prompt = self.build_llm_prompt(note_number, trial_balance)
720
  if not prompt:
721
  logger.error("Failed to build prompt")
722
  return False
723
 
724
+ # Get LLM response
725
  response = self.call_openrouter_api(prompt)
726
  if not response:
727
  logger.error("Failed to get API response")
728
  return False
729
 
730
+ # Save the generated note
731
  success = self.save_generated_note(response, note_number)
732
  logger.info(f"Note {note_number} {'generated successfully' if success else 'generated with issues'}")
733
  return success
 
737
  logger.info(f"Starting generation of all {len(self.note_templates)} notes...")
738
  results = {}
739
  all_notes = []
740
+
741
+ # Load trial balance once
742
+ trial_balance = self.load_trial_balance(trial_balance_path)
743
+ if not trial_balance:
744
+ logger.error("Failed to load trial balance")
745
+ return {note: False for note in self.note_templates.keys()}
746
+
747
  for note_number in self.note_templates.keys():
748
  logger.info(f"Processing Note {note_number}")
749
+
750
+ # Build prompt for this note
751
+ prompt = self.build_llm_prompt(note_number, trial_balance)
 
 
 
752
  if not prompt:
753
  results[note_number] = False
754
  continue
755
+
756
+ # Get LLM response
757
  response = self.call_openrouter_api(prompt)
758
  if not response:
759
  results[note_number] = False
760
  continue
761
+
762
+ # Parse JSON response
763
  json_data, _ = self.extract_json_from_markdown(response)
764
  if json_data:
765
+ # Auto-fix and validate
766
+ json_data = self.validate_and_fix_json(json_data, note_number)
767
+ is_valid, validation_msg = self.validate_json_structure(json_data, note_number)
768
+
769
+ if is_valid:
770
+ json_data = convert_note_json_to_lakhs(json_data)
771
+ all_notes.append(json_data)
772
+ results[note_number] = True
773
+ logger.info(f"Note {note_number} processed successfully")
774
+ else:
775
+ logger.warning(f"Note {note_number} validation failed even after auto-fix: {validation_msg}")
776
+ # Still include it but mark as failed
777
+ json_data = convert_note_json_to_lakhs(json_data)
778
+ all_notes.append(json_data)
779
+ results[note_number] = False
780
  else:
781
+ logger.error(f"Note {note_number}: Could not parse JSON from response")
782
+ # Create fallback note with new structure
783
+ template = self.note_templates.get(note_number, {})
784
+ fallback_note = {
785
+ "title": template.get("title", f"Note {note_number}"),
786
+ "full_title": template.get("full_title", f"{note_number}. Financial Note"),
787
+ "structure": [
788
+ {
789
+ "category": "In Lakhs",
790
+ "subcategories": [
791
+ {"label": "March 31, 2024", "value": 0.00},
792
+ {"label": "March 31, 2023", "value": 0.00}
793
+ ]
794
+ },
795
+ {
796
+ "category": "Error",
797
+ "subcategories": [
798
+ {"label": "Failed to generate from LLM response", "value": 0.00, "previous_value": 0.00}
799
+ ],
800
+ "total": 0.00,
801
+ "previous_total": 0.00
802
+ }
803
+ ],
804
+ "metadata": {
805
+ "note_number": int(note_number) if note_number.isdigit() else note_number,
806
+ "generated_on": datetime.now().isoformat()
807
+ },
808
+ "assumptions": "LLM response parsing failed",
809
+ "error": "JSON parsing failed"
810
+ }
811
+ all_notes.append(fallback_note)
812
  results[note_number] = False
813
+
814
+ # Brief pause between API calls
815
  import time
816
+ time.sleep(2)
817
+
818
+ # Save all notes in consolidated file
819
  output_dir = settings.output_dir
820
  Path(output_dir).mkdir(parents=True, exist_ok=True)
821
+
822
+ consolidated_output = {
823
+ "notes": all_notes,
824
+ "generation_summary": {
825
+ "total_notes": len(self.note_templates),
826
+ "successful_notes": sum(1 for success in results.values() if success),
827
+ "failed_notes": sum(1 for success in results.values() if not success),
828
+ "generated_on": datetime.now().isoformat(),
829
+ "results": results
830
+ }
831
+ }
832
+
833
  with open(f"{output_dir}/notes.json", "w", encoding="utf-8") as f:
834
+ json.dump(consolidated_output, f, indent=2, ensure_ascii=False)
835
+
836
  successful = sum(1 for success in results.values() if success)
837
  total = len(results)
838
  logger.info(f"GENERATION SUMMARY: {successful}/{total} notes generated successfully")
839
  logger.info(f"All notes saved to {output_dir}/notes.json")
840
+
841
  return results
842
 
843
  def main() -> None:
844
  """Main function to run the flexible note generator"""
845
  try:
846
+ # Initialize generator
847
+ generator = FlexibleFinancialNoteGenerator()
848
+ if not generator.note_templates:
849
+ logger.error("No note templates loaded. Check notes_template.py")
850
+ return
851
+
852
+ logger.info(f"Loaded {len(generator.note_templates)} note templates")
853
+
854
  # Check for command line arguments
855
  if len(sys.argv) > 1:
856
  # Command line mode
 
863
  mode = sys.argv[1].lower()
864
  note_numbers = sys.argv[2] if len(sys.argv) > 2 else ""
865
 
 
 
 
 
 
 
 
866
  if mode == "specific":
867
  if not note_numbers:
868
  logger.error("Note numbers required for specific mode")
 
870
 
871
  note_list = [n.strip() for n in note_numbers.split(",")]
872
  all_notes = []
873
+ successful_notes = []
874
 
875
  for note_number in note_list:
876
  if note_number in generator.note_templates:
877
  success = generator.generate_note(note_number)
878
  if success:
879
  # Load the generated note
880
+ try:
881
+ with open("data/generated_notes/notes.json", "r", encoding="utf-8") as f:
882
+ note_data = json.load(f)
883
+ all_notes.append(note_data)
884
+ successful_notes.append(note_number)
885
+ logger.info(f"Note {note_number} generated successfully")
886
+ except Exception as e:
887
+ logger.error(f"Failed to load generated note {note_number}: {e}")
888
  else:
889
  logger.error(f"Failed to generate note {note_number}")
890
  else:
891
  logger.error(f"Note {note_number} not found in templates")
892
 
893
+ # Save consolidated notes
894
+ if all_notes:
895
+ output_dir = settings.output_dir
896
+ Path(output_dir).mkdir(parents=True, exist_ok=True)
897
+ consolidated = {
898
+ "notes": all_notes,
899
+ "generation_summary": {
900
+ "requested_notes": note_list,
901
+ "successful_notes": successful_notes,
902
+ "total_successful": len(successful_notes),
903
+ "generated_on": datetime.now().isoformat()
904
+ }
905
+ }
906
+ with open(f"{output_dir}/notes.json", "w", encoding="utf-8") as f:
907
+ json.dump(consolidated, f, indent=2, ensure_ascii=False)
908
+ logger.info(f"Consolidated notes saved to {output_dir}/notes.json")
909
 
910
  elif mode == "all":
911
  results = generator.generate_all_notes()
912
  successful = sum(1 for success in results.values() if success)
913
  total = len(results)
914
  logger.info(f"{successful}/{total} notes generated successfully")
915
+
916
+ # Print detailed results
917
+ for note, success in results.items():
918
+ status = "βœ… SUCCESS" if success else "❌ FAILED"
919
+ logger.info(f" Note {note}: {status}")
920
+
921
  else:
922
  logger.error("Invalid mode. Use 'specific' or 'all'")
923
  sys.exit(1)
924
+
925
  else:
926
+ # Interactive mode
 
 
 
 
 
 
927
  choice = input("\nGenerate (1) specific note or (2) all notes? Enter 1 or 2: ").strip()
928
 
929
  if choice == "1":
930
  available_notes = list(generator.note_templates.keys())
931
  print(f"Available notes: {', '.join(available_notes)}")
932
  note_number = input("Enter note number: ").strip()
933
+
934
  if note_number in available_notes:
935
  success = generator.generate_note(note_number)
936
  logger.info(f"Note {note_number} {'generated successfully' if success else 'generated with issues'}")
937
  else:
938
  logger.error(f"Note {note_number} not found")
939
+
940
  elif choice == "2":
941
  results = generator.generate_all_notes()
942
  successful = sum(1 for success in results.values() if success)
943
  total = len(results)
944
  logger.info(f"{successful}/{total} notes generated successfully")
945
+
946
+ # Print summary
947
+ print("\n" + "="*50)
948
+ print("GENERATION SUMMARY")
949
+ print("="*50)
950
+ for note, success in results.items():
951
+ status = "βœ… SUCCESS" if success else "❌ FAILED"
952
+ print(f"Note {note}: {status}")
953
+ print("="*50)
954
+
955
  else:
956
  logger.error("Invalid choice. Enter 1 or 2.")
957
+
958
+ except KeyboardInterrupt:
959
+ logger.info("Generation interrupted by user")
960
+ sys.exit(0)
961
  except Exception as e:
962
  logger.error(f"Error: {e}", exc_info=True)
963
  sys.exit(1)
964
 
965
  if __name__ == "__main__":
966
+ main()