Spaces:
Runtime error
Runtime error
Update notes/llm_notes_generator.py
Browse files- notes/llm_notes_generator.py +28 -107
notes/llm_notes_generator.py
CHANGED
|
@@ -1,14 +1,11 @@
|
|
| 1 |
-
# Minimal placeholder for FlexibleFinancialNoteGenerator
|
| 2 |
class FlexibleFinancialNoteGenerator:
|
| 3 |
def __init__(self):
|
| 4 |
pass
|
| 5 |
|
| 6 |
def generate_note(self, note_number, trial_balance_path=None):
|
| 7 |
-
# Placeholder logic
|
| 8 |
return True
|
| 9 |
|
| 10 |
def generate_all_notes(self, trial_balance_path=None):
|
| 11 |
-
# Placeholder logic
|
| 12 |
return {"dummy": True}
|
| 13 |
|
| 14 |
import json
|
|
@@ -27,15 +24,12 @@ from pydantic_settings import BaseSettings
|
|
| 27 |
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 28 |
from utils.utils import convert_note_json_to_lakhs
|
| 29 |
|
| 30 |
-
# Load environment variables
|
| 31 |
load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
|
| 32 |
|
| 33 |
-
# Configure logging
|
| 34 |
logging.basicConfig(level=logging.INFO)
|
| 35 |
logger = logging.getLogger(__name__)
|
| 36 |
|
| 37 |
class Settings(BaseSettings):
|
| 38 |
-
"""Application settings loaded from environment variables or .env file."""
|
| 39 |
openrouter_api_key: str = os.getenv('OPENROUTER_API_KEY', '')
|
| 40 |
api_url: str = "https://openrouter.ai/api/v1/chat/completions"
|
| 41 |
output_dir: str = "data/generated_notes"
|
|
@@ -51,7 +45,6 @@ class Account(BaseModel):
|
|
| 51 |
class NoteTemplate(BaseModel):
|
| 52 |
title: str
|
| 53 |
full_title: str
|
| 54 |
-
# Add other fields as needed for your template structure
|
| 55 |
|
| 56 |
class GeneratedNote(BaseModel):
|
| 57 |
note_number: str
|
|
@@ -59,14 +52,19 @@ class GeneratedNote(BaseModel):
|
|
| 59 |
grand_total_lakhs: float
|
| 60 |
generated_on: str
|
| 61 |
assumptions: Optional[str] = None
|
| 62 |
-
# Add other fields as needed
|
| 63 |
|
| 64 |
class FlexibleFinancialNoteGenerator:
|
| 65 |
-
def __init__(self):
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
logger.
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
self.api_url = settings.api_url
|
| 71 |
self.headers = {
|
| 72 |
"Authorization": f"Bearer {self.openrouter_api_key}",
|
|
@@ -75,18 +73,13 @@ class FlexibleFinancialNoteGenerator:
|
|
| 75 |
"X-Title": "Financial Note Generator"
|
| 76 |
}
|
| 77 |
self.note_templates = self.load_note_templates()
|
| 78 |
-
# Updated model list with DeepSeek as first choice
|
| 79 |
self.recommended_models = [
|
| 80 |
-
|
| 81 |
-
"deepseek/deepseek-r1",
|
| 82 |
-
#"deepseek/deepseek-coder",
|
| 83 |
"mistralai/mixtral-8x7b-instruct"
|
| 84 |
]
|
| 85 |
|
| 86 |
def load_note_templates(self) -> Dict[str, Any]:
|
| 87 |
-
"""Load note templates from notes_template.py file."""
|
| 88 |
try:
|
| 89 |
-
# Add parent directory to path for imports when run as script
|
| 90 |
if __name__ == "__main__":
|
| 91 |
sys.path.append(str(Path(__file__).parent.parent))
|
| 92 |
|
|
@@ -100,7 +93,6 @@ class FlexibleFinancialNoteGenerator:
|
|
| 100 |
return {}
|
| 101 |
|
| 102 |
def load_trial_balance(self, file_path: str = settings.trial_balance_json) -> Optional[Dict[str, Any]]:
|
| 103 |
-
"""Load the complete trial balance from Excel or JSON."""
|
| 104 |
try:
|
| 105 |
if file_path.endswith('.json'):
|
| 106 |
with open(file_path, 'r', encoding='utf-8') as f:
|
|
@@ -130,14 +122,12 @@ class FlexibleFinancialNoteGenerator:
|
|
| 130 |
return None
|
| 131 |
|
| 132 |
def build_llm_prompt(self, note_number: str, trial_balance_data: Dict[str, Any]) -> Optional[str]:
|
| 133 |
-
"""Build comprehensive LLM prompt with strict JSON output requirements"""
|
| 134 |
if note_number not in self.note_templates:
|
| 135 |
return None
|
| 136 |
|
| 137 |
template = self.note_templates[note_number]
|
| 138 |
all_accounts = trial_balance_data.get("accounts", [])
|
| 139 |
|
| 140 |
-
# Build context with full trial balance
|
| 141 |
context = {
|
| 142 |
"note_info": {
|
| 143 |
"number": note_number,
|
|
@@ -152,18 +142,17 @@ class FlexibleFinancialNoteGenerator:
|
|
| 152 |
"financial_year": "2023-24"
|
| 153 |
}
|
| 154 |
|
| 155 |
-
# Get note-specific classification guidance
|
| 156 |
classification_guide = self._get_classification_guide(note_number)
|
| 157 |
|
| 158 |
prompt = f"""You are a senior financial analyst and chartered accountant with expertise in Indian accounting standards and Schedule III of the Companies Act 2013.
|
| 159 |
|
| 160 |
-
|
| 161 |
1. OUTPUT ONLY VALID JSON - NO MARKDOWN, NO EXPLANATIONS, NO TEXT OUTSIDE JSON
|
| 162 |
2. START YOUR RESPONSE WITH {{ and END WITH }}
|
| 163 |
3. DO NOT USE ```json``` CODE BLOCKS
|
| 164 |
4. DO NOT ADD ANY COMMENTARY OR EXPLANATIONS
|
| 165 |
|
| 166 |
-
|
| 167 |
{{
|
| 168 |
"title": "{template.get('title', '')}",
|
| 169 |
"full_title": "{template.get('full_title', '')}",
|
|
@@ -191,7 +180,7 @@ class FlexibleFinancialNoteGenerator:
|
|
| 191 |
"assumptions": "List any assumptions made during classification"
|
| 192 |
}}
|
| 193 |
|
| 194 |
-
|
| 195 |
- First element: Header row with column labels (March 31, 2024, March 31, 2023)
|
| 196 |
- Subsequent elements: Data categories with subcategories
|
| 197 |
- Each data category must have:
|
|
@@ -200,15 +189,15 @@ class FlexibleFinancialNoteGenerator:
|
|
| 200 |
* "total": Sum of current year values in subcategories
|
| 201 |
* "previous_total": Sum of previous year values in subcategories
|
| 202 |
|
| 203 |
-
|
| 204 |
1. Analyze ALL trial balance accounts provided below
|
| 205 |
2. Identify accounts that belong to "{template['full_title']}"
|
| 206 |
3. Classify into appropriate subcategories per Schedule III
|
| 207 |
-
4. Convert all amounts to lakhs (
|
| 208 |
5. Calculate accurate totals ensuring mathematical consistency
|
| 209 |
6. Structure output in hierarchical "structure" array format
|
| 210 |
|
| 211 |
-
|
| 212 |
- All amounts MUST be in lakhs (divide original by 100,000)
|
| 213 |
- All subtotals MUST equal the grand total exactly
|
| 214 |
- Use 0.00 for March 2023 if data missing
|
|
@@ -216,16 +205,16 @@ class FlexibleFinancialNoteGenerator:
|
|
| 216 |
- Ensure "total" = sum of "value" in subcategories
|
| 217 |
- Ensure "previous_total" = sum of "previous_value" in subcategories
|
| 218 |
|
| 219 |
-
|
| 220 |
{classification_guide}
|
| 221 |
|
| 222 |
-
|
| 223 |
{json.dumps(context, indent=2)}
|
| 224 |
|
| 225 |
-
|
| 226 |
{json.dumps(template, indent=2)}
|
| 227 |
|
| 228 |
-
|
| 229 |
- If no accounts match this note category, use empty categories with 0.00 totals
|
| 230 |
- Ensure "metadata.note_number" exactly matches {note_number}
|
| 231 |
- Document classification logic in "assumptions" field
|
|
@@ -236,7 +225,6 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
|
|
| 236 |
return prompt
|
| 237 |
|
| 238 |
def _get_classification_guide(self, note_number: str) -> str:
|
| 239 |
-
"""Get note-specific classification guidance"""
|
| 240 |
guides = {
|
| 241 |
"10": """
|
| 242 |
**Note 10 - Long Term Loans and Advances:**
|
|
@@ -258,7 +246,7 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
|
|
| 258 |
""",
|
| 259 |
"13": """
|
| 260 |
**Note 13 - Cash and Cash Equivalents:**
|
| 261 |
-
- Include: Cash on hand, balances with banks (current/savings), short-term deposits (
|
| 262 |
- Separate: Cash and cash equivalents vs Other bank balances (FDs >3 months)
|
| 263 |
- Show: Balances in current accounts, savings accounts, fixed deposits separately
|
| 264 |
""",
|
|
@@ -279,7 +267,6 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
|
|
| 279 |
return guides.get(note_number, f"**Note {note_number}:** Classify accounts logically based on their nature and the note title.")
|
| 280 |
|
| 281 |
def call_openrouter_api(self, prompt: str) -> Optional[str]:
|
| 282 |
-
"""Make API call to OpenRouter with model fallback"""
|
| 283 |
for model in self.recommended_models:
|
| 284 |
logger.info(f"Trying model: {model}")
|
| 285 |
payload = {
|
|
@@ -312,6 +299,9 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
|
|
| 312 |
logger.warning(f"Model {model} not found (404), trying next model")
|
| 313 |
elif e.response.status_code == 402:
|
| 314 |
logger.warning(f"Model {model} requires payment (402), trying next model")
|
|
|
|
|
|
|
|
|
|
| 315 |
else:
|
| 316 |
logger.error(f"HTTP error with {model}: {e}")
|
| 317 |
except Exception as e:
|
|
@@ -321,11 +311,8 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
|
|
| 321 |
return None
|
| 322 |
|
| 323 |
def extract_json_from_markdown(self, response_text: str) -> Tuple[Optional[Dict[str, Any]], Optional[str]]:
|
| 324 |
-
"""Extract JSON from response, handling markdown code blocks and cleaning"""
|
| 325 |
response_text = response_text.strip()
|
| 326 |
|
| 327 |
-
# CRITICAL FIX: Handle concatenated/duplicate JSON (e.g., "}{\n{")
|
| 328 |
-
# Find the first complete JSON object
|
| 329 |
json_objects = []
|
| 330 |
brace_count = 0
|
| 331 |
start_idx = -1
|
|
@@ -338,12 +325,10 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
|
|
| 338 |
elif char == '}':
|
| 339 |
brace_count -= 1
|
| 340 |
if brace_count == 0 and start_idx != -1:
|
| 341 |
-
# Found complete JSON object
|
| 342 |
potential_json = response_text[start_idx:i+1]
|
| 343 |
try:
|
| 344 |
parsed = json.loads(potential_json)
|
| 345 |
json_objects.append((parsed, potential_json))
|
| 346 |
-
# Use the first valid JSON object
|
| 347 |
break
|
| 348 |
except json.JSONDecodeError:
|
| 349 |
continue
|
|
@@ -352,8 +337,6 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
|
|
| 352 |
logger.info("Successfully extracted first valid JSON object from response")
|
| 353 |
return json_objects[0]
|
| 354 |
|
| 355 |
-
# Fallback: Try original extraction methods
|
| 356 |
-
# Remove any leading/trailing text outside JSON
|
| 357 |
json_patterns = [
|
| 358 |
r'```json\s*(.*?)\s*```',
|
| 359 |
r'```\s*(.*?)\s*```',
|
|
@@ -370,12 +353,10 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
|
|
| 370 |
except json.JSONDecodeError:
|
| 371 |
continue
|
| 372 |
|
| 373 |
-
# Try parsing the entire response as JSON
|
| 374 |
try:
|
| 375 |
json_data = json.loads(response_text)
|
| 376 |
return json_data, response_text
|
| 377 |
except json.JSONDecodeError:
|
| 378 |
-
# Last attempt: find JSON-like structure
|
| 379 |
try:
|
| 380 |
start = response_text.find('{')
|
| 381 |
end = response_text.rfind('}') + 1
|
|
@@ -389,13 +370,10 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
|
|
| 389 |
return None, None
|
| 390 |
|
| 391 |
def validate_and_fix_json(self, json_data: Dict[str, Any], note_number: str) -> Dict[str, Any]:
|
| 392 |
-
"""Validate JSON structure and auto-fix missing required fields"""
|
| 393 |
fixed_data = json_data.copy()
|
| 394 |
|
| 395 |
-
# Get template for this note
|
| 396 |
template = self.note_templates.get(note_number, {})
|
| 397 |
|
| 398 |
-
# Auto-fix title fields
|
| 399 |
if "title" not in fixed_data or not fixed_data["title"]:
|
| 400 |
fixed_data["title"] = template.get("title", f"Note {note_number}")
|
| 401 |
logger.info(f"Auto-fixed missing title field")
|
|
@@ -404,18 +382,14 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
|
|
| 404 |
fixed_data["full_title"] = template.get("full_title", f"{note_number}. {fixed_data.get('title', 'Financial Note')}")
|
| 405 |
logger.info(f"Auto-fixed missing full_title field")
|
| 406 |
|
| 407 |
-
# Auto-fix or create metadata
|
| 408 |
if "metadata" not in fixed_data or not isinstance(fixed_data["metadata"], dict):
|
| 409 |
fixed_data["metadata"] = {}
|
| 410 |
logger.info("Auto-created metadata object")
|
| 411 |
|
| 412 |
-
# CRITICAL FIX: Ensure note_number is correct integer, not 0.0
|
| 413 |
metadata_note_num = fixed_data["metadata"].get("note_number")
|
| 414 |
try:
|
| 415 |
-
# Convert note_number string to int
|
| 416 |
expected_note_num = int(note_number)
|
| 417 |
|
| 418 |
-
# Check if metadata note_number is wrong (0, 0.0, or mismatch)
|
| 419 |
if (metadata_note_num is None or
|
| 420 |
metadata_note_num == 0 or
|
| 421 |
metadata_note_num == 0.0 or
|
|
@@ -431,7 +405,6 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
|
|
| 431 |
fixed_data["metadata"]["generated_on"] = datetime.now().isoformat()
|
| 432 |
logger.info("Auto-fixed missing metadata.generated_on field")
|
| 433 |
|
| 434 |
-
# Auto-fix or create structure array
|
| 435 |
if "structure" not in fixed_data or not isinstance(fixed_data["structure"], list):
|
| 436 |
logger.warning("Structure array missing, creating default structure")
|
| 437 |
fixed_data["structure"] = [
|
|
@@ -452,7 +425,6 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
|
|
| 452 |
}
|
| 453 |
]
|
| 454 |
else:
|
| 455 |
-
# Validate and fix structure elements
|
| 456 |
if len(fixed_data["structure"]) == 0:
|
| 457 |
logger.warning("Empty structure array, adding default elements")
|
| 458 |
fixed_data["structure"] = [
|
|
@@ -465,7 +437,6 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
|
|
| 465 |
}
|
| 466 |
]
|
| 467 |
|
| 468 |
-
# Ensure each structure element has required fields
|
| 469 |
for i, struct_elem in enumerate(fixed_data["structure"]):
|
| 470 |
if not isinstance(struct_elem, dict):
|
| 471 |
continue
|
|
@@ -476,7 +447,6 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
|
|
| 476 |
if "subcategories" not in struct_elem or not isinstance(struct_elem["subcategories"], list):
|
| 477 |
struct_elem["subcategories"] = []
|
| 478 |
|
| 479 |
-
# For data rows (not header), ensure totals exist
|
| 480 |
if i > 0 and struct_elem.get("subcategories"):
|
| 481 |
if "total" not in struct_elem:
|
| 482 |
struct_elem["total"] = sum(
|
|
@@ -492,7 +462,6 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
|
|
| 492 |
if isinstance(sub, dict)
|
| 493 |
)
|
| 494 |
|
| 495 |
-
# Auto-fix assumptions
|
| 496 |
if "assumptions" not in fixed_data:
|
| 497 |
fixed_data["assumptions"] = "Classification based on account names and standard accounting practices"
|
| 498 |
logger.info("Auto-added default assumptions")
|
|
@@ -500,10 +469,8 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
|
|
| 500 |
return fixed_data
|
| 501 |
|
| 502 |
def validate_json_structure(self, json_data: Dict[str, Any], note_number: str) -> Tuple[bool, str]:
|
| 503 |
-
"""Validate that the JSON matches expected structure"""
|
| 504 |
required_fields = ["title", "full_title", "structure", "metadata", "assumptions"]
|
| 505 |
|
| 506 |
-
# Check required fields
|
| 507 |
missing_fields = []
|
| 508 |
for field in required_fields:
|
| 509 |
if field not in json_data:
|
|
@@ -512,7 +479,6 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
|
|
| 512 |
if missing_fields:
|
| 513 |
return False, f"Missing required fields: {', '.join(missing_fields)}"
|
| 514 |
|
| 515 |
-
# Check metadata structure
|
| 516 |
if not isinstance(json_data.get("metadata"), dict):
|
| 517 |
return False, "metadata must be an object"
|
| 518 |
|
|
@@ -523,7 +489,6 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
|
|
| 523 |
if str(metadata.get("note_number", "")) != str(note_number):
|
| 524 |
return False, f"Note number mismatch: expected {note_number}, got {metadata.get('note_number')}"
|
| 525 |
|
| 526 |
-
# Check structure array
|
| 527 |
if not isinstance(json_data.get("structure"), list):
|
| 528 |
return False, "structure must be an array"
|
| 529 |
|
|
@@ -533,7 +498,6 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
|
|
| 533 |
return True, "Validation passed"
|
| 534 |
|
| 535 |
def _generate_markdown_from_structure(self, json_data: Dict[str, Any]) -> str:
|
| 536 |
-
"""Generate markdown table from structure array"""
|
| 537 |
try:
|
| 538 |
title = json_data.get("full_title", json_data.get("title", "Financial Note"))
|
| 539 |
structure = json_data.get("structure", [])
|
|
@@ -541,44 +505,36 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
|
|
| 541 |
if not structure:
|
| 542 |
return f"# {title}\n\n*No data available*"
|
| 543 |
|
| 544 |
-
# Start markdown
|
| 545 |
md_lines = [f"# {title}\n"]
|
| 546 |
|
| 547 |
-
# Get header row (first element)
|
| 548 |
header_elem = structure[0] if len(structure) > 0 else None
|
| 549 |
if header_elem and header_elem.get("subcategories"):
|
| 550 |
headers = [sub.get("label", "") for sub in header_elem["subcategories"]]
|
| 551 |
md_lines.append("| Particulars | " + " | ".join(headers) + " |")
|
| 552 |
md_lines.append("|" + "---|" * (len(headers) + 1))
|
| 553 |
|
| 554 |
-
# Process data rows
|
| 555 |
for i in range(1, len(structure)):
|
| 556 |
elem = structure[i]
|
| 557 |
category = elem.get("category", "")
|
| 558 |
subcategories = elem.get("subcategories", [])
|
| 559 |
|
| 560 |
-
# Add category header if exists
|
| 561 |
if category:
|
| 562 |
md_lines.append(f"\n**{category}**\n")
|
| 563 |
|
| 564 |
-
# Add subcategory rows
|
| 565 |
for sub in subcategories:
|
| 566 |
label = sub.get("label", "")
|
| 567 |
value = sub.get("value", 0.00)
|
| 568 |
previous_value = sub.get("previous_value", 0.00)
|
| 569 |
md_lines.append(f"| {label} | {value:.2f} | {previous_value:.2f} |")
|
| 570 |
|
| 571 |
-
# Add total row if exists
|
| 572 |
if "total" in elem:
|
| 573 |
total = elem.get("total", 0.00)
|
| 574 |
previous_total = elem.get("previous_total", 0.00)
|
| 575 |
md_lines.append(f"| **Total {category}** | **{total:.2f}** | **{previous_total:.2f}** |")
|
| 576 |
|
| 577 |
-
# Add metadata
|
| 578 |
metadata = json_data.get("metadata", {})
|
| 579 |
md_lines.append(f"\n\n*Generated on: {metadata.get('generated_on', 'Unknown')}*")
|
| 580 |
|
| 581 |
-
# Add assumptions if present
|
| 582 |
assumptions = json_data.get("assumptions", "")
|
| 583 |
if assumptions:
|
| 584 |
md_lines.append(f"\n\n**Assumptions:** {assumptions}")
|
|
@@ -590,41 +546,32 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
|
|
| 590 |
return f"# {json_data.get('full_title', 'Financial Note')}\n\n*Error generating markdown table*"
|
| 591 |
|
| 592 |
def save_generated_note(self, note_data: str, note_number: str, output_dir: str = settings.output_dir) -> bool:
|
| 593 |
-
"""Save the generated note to file with robust validation and auto-fixing"""
|
| 594 |
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
| 595 |
json_output_path = f"{output_dir}/notes.json"
|
| 596 |
raw_output_path = f"{output_dir}/notes_raw.txt"
|
| 597 |
formatted_md_path = f"{output_dir}/notes_formatted.md"
|
| 598 |
|
| 599 |
try:
|
| 600 |
-
# Always save raw response for debugging
|
| 601 |
with open(raw_output_path, 'w', encoding='utf-8') as f:
|
| 602 |
f.write(note_data)
|
| 603 |
|
| 604 |
-
# Extract and validate JSON
|
| 605 |
json_data, json_string = self.extract_json_from_markdown(note_data)
|
| 606 |
|
| 607 |
if json_data:
|
| 608 |
-
# Auto-fix missing or incorrect fields
|
| 609 |
json_data = self.validate_and_fix_json(json_data, note_number)
|
| 610 |
|
| 611 |
-
# Final validation
|
| 612 |
is_valid, validation_msg = self.validate_json_structure(json_data, note_number)
|
| 613 |
if not is_valid:
|
| 614 |
logger.warning(f"JSON validation warning after auto-fix: {validation_msg}")
|
| 615 |
|
| 616 |
-
# Convert to lakhs if needed
|
| 617 |
json_data = convert_note_json_to_lakhs(json_data)
|
| 618 |
|
| 619 |
-
# Save JSON
|
| 620 |
with open(json_output_path, 'w', encoding='utf-8') as f:
|
| 621 |
json.dump(json_data, f, indent=2, ensure_ascii=False)
|
| 622 |
logger.info(f"JSON saved to {json_output_path}")
|
| 623 |
|
| 624 |
-
# Generate and save markdown
|
| 625 |
md_content = json_data.get('markdown_content', '')
|
| 626 |
if not md_content:
|
| 627 |
-
# Generate markdown from structure
|
| 628 |
md_content = self._generate_markdown_from_structure(json_data)
|
| 629 |
logger.info("Auto-generated markdown from structure array")
|
| 630 |
|
|
@@ -633,7 +580,6 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
|
|
| 633 |
|
| 634 |
return True
|
| 635 |
else:
|
| 636 |
-
# Create fallback JSON with all required fields
|
| 637 |
template = self.note_templates.get(note_number, {})
|
| 638 |
fallback_json = {
|
| 639 |
"title": template.get("title", f"Note {note_number}"),
|
|
@@ -672,7 +618,6 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
|
|
| 672 |
except Exception as e:
|
| 673 |
logger.error(f"Error saving files: {e}")
|
| 674 |
|
| 675 |
-
# Emergency fallback
|
| 676 |
try:
|
| 677 |
template = self.note_templates.get(note_number, {})
|
| 678 |
emergency_json = {
|
|
@@ -703,42 +648,35 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
|
|
| 703 |
return False
|
| 704 |
|
| 705 |
def generate_note(self, note_number: str, trial_balance_path: str = settings.trial_balance_json) -> bool:
|
| 706 |
-
"""Generate a specific note based on note number"""
|
| 707 |
if note_number not in self.note_templates:
|
| 708 |
logger.error(f"Note template {note_number} not found")
|
| 709 |
return False
|
| 710 |
|
| 711 |
logger.info(f"Starting Note {note_number} generation...")
|
| 712 |
|
| 713 |
-
# Load complete trial balance
|
| 714 |
trial_balance = self.load_trial_balance(trial_balance_path)
|
| 715 |
if not trial_balance:
|
| 716 |
return False
|
| 717 |
|
| 718 |
-
# Build prompt with full trial balance
|
| 719 |
prompt = self.build_llm_prompt(note_number, trial_balance)
|
| 720 |
if not prompt:
|
| 721 |
logger.error("Failed to build prompt")
|
| 722 |
return False
|
| 723 |
|
| 724 |
-
# Get LLM response
|
| 725 |
response = self.call_openrouter_api(prompt)
|
| 726 |
if not response:
|
| 727 |
logger.error("Failed to get API response")
|
| 728 |
return False
|
| 729 |
|
| 730 |
-
# Save the generated note
|
| 731 |
success = self.save_generated_note(response, note_number)
|
| 732 |
logger.info(f"Note {note_number} {'generated successfully' if success else 'generated with issues'}")
|
| 733 |
return success
|
| 734 |
|
| 735 |
def generate_all_notes(self, trial_balance_path: str = settings.trial_balance_json) -> Dict[str, bool]:
|
| 736 |
-
"""Generate all available notes and save them in a single notes.json file."""
|
| 737 |
logger.info(f"Starting generation of all {len(self.note_templates)} notes...")
|
| 738 |
results = {}
|
| 739 |
all_notes = []
|
| 740 |
|
| 741 |
-
# Load trial balance once
|
| 742 |
trial_balance = self.load_trial_balance(trial_balance_path)
|
| 743 |
if not trial_balance:
|
| 744 |
logger.error("Failed to load trial balance")
|
|
@@ -747,22 +685,18 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
|
|
| 747 |
for note_number in self.note_templates.keys():
|
| 748 |
logger.info(f"Processing Note {note_number}")
|
| 749 |
|
| 750 |
-
# Build prompt for this note
|
| 751 |
prompt = self.build_llm_prompt(note_number, trial_balance)
|
| 752 |
if not prompt:
|
| 753 |
results[note_number] = False
|
| 754 |
continue
|
| 755 |
|
| 756 |
-
# Get LLM response
|
| 757 |
response = self.call_openrouter_api(prompt)
|
| 758 |
if not response:
|
| 759 |
results[note_number] = False
|
| 760 |
continue
|
| 761 |
|
| 762 |
-
# Parse JSON response
|
| 763 |
json_data, _ = self.extract_json_from_markdown(response)
|
| 764 |
if json_data:
|
| 765 |
-
# Auto-fix and validate
|
| 766 |
json_data = self.validate_and_fix_json(json_data, note_number)
|
| 767 |
is_valid, validation_msg = self.validate_json_structure(json_data, note_number)
|
| 768 |
|
|
@@ -773,13 +707,11 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
|
|
| 773 |
logger.info(f"Note {note_number} processed successfully")
|
| 774 |
else:
|
| 775 |
logger.warning(f"Note {note_number} validation failed even after auto-fix: {validation_msg}")
|
| 776 |
-
# Still include it but mark as failed
|
| 777 |
json_data = convert_note_json_to_lakhs(json_data)
|
| 778 |
all_notes.append(json_data)
|
| 779 |
results[note_number] = False
|
| 780 |
else:
|
| 781 |
logger.error(f"Note {note_number}: Could not parse JSON from response")
|
| 782 |
-
# Create fallback note with new structure
|
| 783 |
template = self.note_templates.get(note_number, {})
|
| 784 |
fallback_note = {
|
| 785 |
"title": template.get("title", f"Note {note_number}"),
|
|
@@ -811,11 +743,9 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
|
|
| 811 |
all_notes.append(fallback_note)
|
| 812 |
results[note_number] = False
|
| 813 |
|
| 814 |
-
# Brief pause between API calls
|
| 815 |
import time
|
| 816 |
time.sleep(2)
|
| 817 |
|
| 818 |
-
# Save all notes in consolidated file
|
| 819 |
output_dir = settings.output_dir
|
| 820 |
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
| 821 |
|
|
@@ -841,9 +771,7 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
|
|
| 841 |
return results
|
| 842 |
|
| 843 |
def main() -> None:
|
| 844 |
-
"""Main function to run the flexible note generator"""
|
| 845 |
try:
|
| 846 |
-
# Initialize generator
|
| 847 |
generator = FlexibleFinancialNoteGenerator()
|
| 848 |
if not generator.note_templates:
|
| 849 |
logger.error("No note templates loaded. Check notes_template.py")
|
|
@@ -851,9 +779,7 @@ def main() -> None:
|
|
| 851 |
|
| 852 |
logger.info(f"Loaded {len(generator.note_templates)} note templates")
|
| 853 |
|
| 854 |
-
# Check for command line arguments
|
| 855 |
if len(sys.argv) > 1:
|
| 856 |
-
# Command line mode
|
| 857 |
if len(sys.argv) < 3:
|
| 858 |
logger.error("Usage: python llm_notes_generator.py <mode> <note_numbers>")
|
| 859 |
logger.error(" mode: 'specific' or 'all'")
|
|
@@ -876,7 +802,6 @@ def main() -> None:
|
|
| 876 |
if note_number in generator.note_templates:
|
| 877 |
success = generator.generate_note(note_number)
|
| 878 |
if success:
|
| 879 |
-
# Load the generated note
|
| 880 |
try:
|
| 881 |
with open("data/generated_notes/notes.json", "r", encoding="utf-8") as f:
|
| 882 |
note_data = json.load(f)
|
|
@@ -890,7 +815,6 @@ def main() -> None:
|
|
| 890 |
else:
|
| 891 |
logger.error(f"Note {note_number} not found in templates")
|
| 892 |
|
| 893 |
-
# Save consolidated notes
|
| 894 |
if all_notes:
|
| 895 |
output_dir = settings.output_dir
|
| 896 |
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
|
@@ -913,9 +837,8 @@ def main() -> None:
|
|
| 913 |
total = len(results)
|
| 914 |
logger.info(f"{successful}/{total} notes generated successfully")
|
| 915 |
|
| 916 |
-
# Print detailed results
|
| 917 |
for note, success in results.items():
|
| 918 |
-
status = "
|
| 919 |
logger.info(f" Note {note}: {status}")
|
| 920 |
|
| 921 |
else:
|
|
@@ -923,7 +846,6 @@ def main() -> None:
|
|
| 923 |
sys.exit(1)
|
| 924 |
|
| 925 |
else:
|
| 926 |
-
# Interactive mode
|
| 927 |
choice = input("\nGenerate (1) specific note or (2) all notes? Enter 1 or 2: ").strip()
|
| 928 |
|
| 929 |
if choice == "1":
|
|
@@ -943,12 +865,11 @@ def main() -> None:
|
|
| 943 |
total = len(results)
|
| 944 |
logger.info(f"{successful}/{total} notes generated successfully")
|
| 945 |
|
| 946 |
-
# Print summary
|
| 947 |
print("\n" + "="*50)
|
| 948 |
print("GENERATION SUMMARY")
|
| 949 |
print("="*50)
|
| 950 |
for note, success in results.items():
|
| 951 |
-
status = "
|
| 952 |
print(f"Note {note}: {status}")
|
| 953 |
print("="*50)
|
| 954 |
|
|
|
|
|
|
|
| 1 |
class FlexibleFinancialNoteGenerator:
|
| 2 |
def __init__(self):
|
| 3 |
pass
|
| 4 |
|
| 5 |
def generate_note(self, note_number, trial_balance_path=None):
|
|
|
|
| 6 |
return True
|
| 7 |
|
| 8 |
def generate_all_notes(self, trial_balance_path=None):
|
|
|
|
| 9 |
return {"dummy": True}
|
| 10 |
|
| 11 |
import json
|
|
|
|
| 24 |
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 25 |
from utils.utils import convert_note_json_to_lakhs
|
| 26 |
|
|
|
|
| 27 |
load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
|
| 28 |
|
|
|
|
| 29 |
logging.basicConfig(level=logging.INFO)
|
| 30 |
logger = logging.getLogger(__name__)
|
| 31 |
|
| 32 |
class Settings(BaseSettings):
|
|
|
|
| 33 |
openrouter_api_key: str = os.getenv('OPENROUTER_API_KEY', '')
|
| 34 |
api_url: str = "https://openrouter.ai/api/v1/chat/completions"
|
| 35 |
output_dir: str = "data/generated_notes"
|
|
|
|
| 45 |
class NoteTemplate(BaseModel):
|
| 46 |
title: str
|
| 47 |
full_title: str
|
|
|
|
| 48 |
|
| 49 |
class GeneratedNote(BaseModel):
|
| 50 |
note_number: str
|
|
|
|
| 52 |
grand_total_lakhs: float
|
| 53 |
generated_on: str
|
| 54 |
assumptions: Optional[str] = None
|
|
|
|
| 55 |
|
| 56 |
class FlexibleFinancialNoteGenerator:
|
| 57 |
+
def __init__(self, user_api_key: Optional[str] = None):
|
| 58 |
+
if user_api_key:
|
| 59 |
+
self.openrouter_api_key = user_api_key
|
| 60 |
+
logger.info("Using user-provided API key")
|
| 61 |
+
else:
|
| 62 |
+
self.openrouter_api_key = settings.openrouter_api_key
|
| 63 |
+
if not self.openrouter_api_key:
|
| 64 |
+
logger.error("OPENROUTER_API_KEY not found in .env file and no user key provided")
|
| 65 |
+
raise ValueError("OPENROUTER_API_KEY not found in .env file and no user key provided")
|
| 66 |
+
logger.info("Using API key from .env file")
|
| 67 |
+
|
| 68 |
self.api_url = settings.api_url
|
| 69 |
self.headers = {
|
| 70 |
"Authorization": f"Bearer {self.openrouter_api_key}",
|
|
|
|
| 73 |
"X-Title": "Financial Note Generator"
|
| 74 |
}
|
| 75 |
self.note_templates = self.load_note_templates()
|
|
|
|
| 76 |
self.recommended_models = [
|
| 77 |
+
"deepseek/deepseek-r1",
|
|
|
|
|
|
|
| 78 |
"mistralai/mixtral-8x7b-instruct"
|
| 79 |
]
|
| 80 |
|
| 81 |
def load_note_templates(self) -> Dict[str, Any]:
|
|
|
|
| 82 |
try:
|
|
|
|
| 83 |
if __name__ == "__main__":
|
| 84 |
sys.path.append(str(Path(__file__).parent.parent))
|
| 85 |
|
|
|
|
| 93 |
return {}
|
| 94 |
|
| 95 |
def load_trial_balance(self, file_path: str = settings.trial_balance_json) -> Optional[Dict[str, Any]]:
|
|
|
|
| 96 |
try:
|
| 97 |
if file_path.endswith('.json'):
|
| 98 |
with open(file_path, 'r', encoding='utf-8') as f:
|
|
|
|
| 122 |
return None
|
| 123 |
|
| 124 |
def build_llm_prompt(self, note_number: str, trial_balance_data: Dict[str, Any]) -> Optional[str]:
|
|
|
|
| 125 |
if note_number not in self.note_templates:
|
| 126 |
return None
|
| 127 |
|
| 128 |
template = self.note_templates[note_number]
|
| 129 |
all_accounts = trial_balance_data.get("accounts", [])
|
| 130 |
|
|
|
|
| 131 |
context = {
|
| 132 |
"note_info": {
|
| 133 |
"number": note_number,
|
|
|
|
| 142 |
"financial_year": "2023-24"
|
| 143 |
}
|
| 144 |
|
|
|
|
| 145 |
classification_guide = self._get_classification_guide(note_number)
|
| 146 |
|
| 147 |
prompt = f"""You are a senior financial analyst and chartered accountant with expertise in Indian accounting standards and Schedule III of the Companies Act 2013.
|
| 148 |
|
| 149 |
+
ðŸ"´ CRITICAL INSTRUCTIONS - MUST FOLLOW EXACTLY:
|
| 150 |
1. OUTPUT ONLY VALID JSON - NO MARKDOWN, NO EXPLANATIONS, NO TEXT OUTSIDE JSON
|
| 151 |
2. START YOUR RESPONSE WITH {{ and END WITH }}
|
| 152 |
3. DO NOT USE ```json``` CODE BLOCKS
|
| 153 |
4. DO NOT ADD ANY COMMENTARY OR EXPLANATIONS
|
| 154 |
|
| 155 |
+
ðŸ"´ REQUIRED JSON STRUCTURE - ALL FIELDS MANDATORY:
|
| 156 |
{{
|
| 157 |
"title": "{template.get('title', '')}",
|
| 158 |
"full_title": "{template.get('full_title', '')}",
|
|
|
|
| 180 |
"assumptions": "List any assumptions made during classification"
|
| 181 |
}}
|
| 182 |
|
| 183 |
+
ðŸ"´ STRUCTURE ARRAY EXPLAINED:
|
| 184 |
- First element: Header row with column labels (March 31, 2024, March 31, 2023)
|
| 185 |
- Subsequent elements: Data categories with subcategories
|
| 186 |
- Each data category must have:
|
|
|
|
| 189 |
* "total": Sum of current year values in subcategories
|
| 190 |
* "previous_total": Sum of previous year values in subcategories
|
| 191 |
|
| 192 |
+
ðŸ"´ YOUR TASK:
|
| 193 |
1. Analyze ALL trial balance accounts provided below
|
| 194 |
2. Identify accounts that belong to "{template['full_title']}"
|
| 195 |
3. Classify into appropriate subcategories per Schedule III
|
| 196 |
+
4. Convert all amounts to lakhs (₹ ÷ 100,000) with 2 decimal places
|
| 197 |
5. Calculate accurate totals ensuring mathematical consistency
|
| 198 |
6. Structure output in hierarchical "structure" array format
|
| 199 |
|
| 200 |
+
ðŸ"´ MATHEMATICAL REQUIREMENTS:
|
| 201 |
- All amounts MUST be in lakhs (divide original by 100,000)
|
| 202 |
- All subtotals MUST equal the grand total exactly
|
| 203 |
- Use 0.00 for March 2023 if data missing
|
|
|
|
| 205 |
- Ensure "total" = sum of "value" in subcategories
|
| 206 |
- Ensure "previous_total" = sum of "previous_value" in subcategories
|
| 207 |
|
| 208 |
+
ðŸ"´ CLASSIFICATION GUIDANCE FOR NOTE {note_number}:
|
| 209 |
{classification_guide}
|
| 210 |
|
| 211 |
+
ðŸ"´ COMPLETE TRIAL BALANCE DATA:
|
| 212 |
{json.dumps(context, indent=2)}
|
| 213 |
|
| 214 |
+
ðŸ"´ TEMPLATE STRUCTURE TO FOLLOW:
|
| 215 |
{json.dumps(template, indent=2)}
|
| 216 |
|
| 217 |
+
ðŸ"´ VALIDATION RULES:
|
| 218 |
- If no accounts match this note category, use empty categories with 0.00 totals
|
| 219 |
- Ensure "metadata.note_number" exactly matches {note_number}
|
| 220 |
- Document classification logic in "assumptions" field
|
|
|
|
| 225 |
return prompt
|
| 226 |
|
| 227 |
def _get_classification_guide(self, note_number: str) -> str:
|
|
|
|
| 228 |
guides = {
|
| 229 |
"10": """
|
| 230 |
**Note 10 - Long Term Loans and Advances:**
|
|
|
|
| 246 |
""",
|
| 247 |
"13": """
|
| 248 |
**Note 13 - Cash and Cash Equivalents:**
|
| 249 |
+
- Include: Cash on hand, balances with banks (current/savings), short-term deposits (≤3 months)
|
| 250 |
- Separate: Cash and cash equivalents vs Other bank balances (FDs >3 months)
|
| 251 |
- Show: Balances in current accounts, savings accounts, fixed deposits separately
|
| 252 |
""",
|
|
|
|
| 267 |
return guides.get(note_number, f"**Note {note_number}:** Classify accounts logically based on their nature and the note title.")
|
| 268 |
|
| 269 |
def call_openrouter_api(self, prompt: str) -> Optional[str]:
|
|
|
|
| 270 |
for model in self.recommended_models:
|
| 271 |
logger.info(f"Trying model: {model}")
|
| 272 |
payload = {
|
|
|
|
| 299 |
logger.warning(f"Model {model} not found (404), trying next model")
|
| 300 |
elif e.response.status_code == 402:
|
| 301 |
logger.warning(f"Model {model} requires payment (402), trying next model")
|
| 302 |
+
elif e.response.status_code == 401:
|
| 303 |
+
logger.error(f"Invalid API key (401)")
|
| 304 |
+
return None
|
| 305 |
else:
|
| 306 |
logger.error(f"HTTP error with {model}: {e}")
|
| 307 |
except Exception as e:
|
|
|
|
| 311 |
return None
|
| 312 |
|
| 313 |
def extract_json_from_markdown(self, response_text: str) -> Tuple[Optional[Dict[str, Any]], Optional[str]]:
|
|
|
|
| 314 |
response_text = response_text.strip()
|
| 315 |
|
|
|
|
|
|
|
| 316 |
json_objects = []
|
| 317 |
brace_count = 0
|
| 318 |
start_idx = -1
|
|
|
|
| 325 |
elif char == '}':
|
| 326 |
brace_count -= 1
|
| 327 |
if brace_count == 0 and start_idx != -1:
|
|
|
|
| 328 |
potential_json = response_text[start_idx:i+1]
|
| 329 |
try:
|
| 330 |
parsed = json.loads(potential_json)
|
| 331 |
json_objects.append((parsed, potential_json))
|
|
|
|
| 332 |
break
|
| 333 |
except json.JSONDecodeError:
|
| 334 |
continue
|
|
|
|
| 337 |
logger.info("Successfully extracted first valid JSON object from response")
|
| 338 |
return json_objects[0]
|
| 339 |
|
|
|
|
|
|
|
| 340 |
json_patterns = [
|
| 341 |
r'```json\s*(.*?)\s*```',
|
| 342 |
r'```\s*(.*?)\s*```',
|
|
|
|
| 353 |
except json.JSONDecodeError:
|
| 354 |
continue
|
| 355 |
|
|
|
|
| 356 |
try:
|
| 357 |
json_data = json.loads(response_text)
|
| 358 |
return json_data, response_text
|
| 359 |
except json.JSONDecodeError:
|
|
|
|
| 360 |
try:
|
| 361 |
start = response_text.find('{')
|
| 362 |
end = response_text.rfind('}') + 1
|
|
|
|
| 370 |
return None, None
|
| 371 |
|
| 372 |
def validate_and_fix_json(self, json_data: Dict[str, Any], note_number: str) -> Dict[str, Any]:
|
|
|
|
| 373 |
fixed_data = json_data.copy()
|
| 374 |
|
|
|
|
| 375 |
template = self.note_templates.get(note_number, {})
|
| 376 |
|
|
|
|
| 377 |
if "title" not in fixed_data or not fixed_data["title"]:
|
| 378 |
fixed_data["title"] = template.get("title", f"Note {note_number}")
|
| 379 |
logger.info(f"Auto-fixed missing title field")
|
|
|
|
| 382 |
fixed_data["full_title"] = template.get("full_title", f"{note_number}. {fixed_data.get('title', 'Financial Note')}")
|
| 383 |
logger.info(f"Auto-fixed missing full_title field")
|
| 384 |
|
|
|
|
| 385 |
if "metadata" not in fixed_data or not isinstance(fixed_data["metadata"], dict):
|
| 386 |
fixed_data["metadata"] = {}
|
| 387 |
logger.info("Auto-created metadata object")
|
| 388 |
|
|
|
|
| 389 |
metadata_note_num = fixed_data["metadata"].get("note_number")
|
| 390 |
try:
|
|
|
|
| 391 |
expected_note_num = int(note_number)
|
| 392 |
|
|
|
|
| 393 |
if (metadata_note_num is None or
|
| 394 |
metadata_note_num == 0 or
|
| 395 |
metadata_note_num == 0.0 or
|
|
|
|
| 405 |
fixed_data["metadata"]["generated_on"] = datetime.now().isoformat()
|
| 406 |
logger.info("Auto-fixed missing metadata.generated_on field")
|
| 407 |
|
|
|
|
| 408 |
if "structure" not in fixed_data or not isinstance(fixed_data["structure"], list):
|
| 409 |
logger.warning("Structure array missing, creating default structure")
|
| 410 |
fixed_data["structure"] = [
|
|
|
|
| 425 |
}
|
| 426 |
]
|
| 427 |
else:
|
|
|
|
| 428 |
if len(fixed_data["structure"]) == 0:
|
| 429 |
logger.warning("Empty structure array, adding default elements")
|
| 430 |
fixed_data["structure"] = [
|
|
|
|
| 437 |
}
|
| 438 |
]
|
| 439 |
|
|
|
|
| 440 |
for i, struct_elem in enumerate(fixed_data["structure"]):
|
| 441 |
if not isinstance(struct_elem, dict):
|
| 442 |
continue
|
|
|
|
| 447 |
if "subcategories" not in struct_elem or not isinstance(struct_elem["subcategories"], list):
|
| 448 |
struct_elem["subcategories"] = []
|
| 449 |
|
|
|
|
| 450 |
if i > 0 and struct_elem.get("subcategories"):
|
| 451 |
if "total" not in struct_elem:
|
| 452 |
struct_elem["total"] = sum(
|
|
|
|
| 462 |
if isinstance(sub, dict)
|
| 463 |
)
|
| 464 |
|
|
|
|
| 465 |
if "assumptions" not in fixed_data:
|
| 466 |
fixed_data["assumptions"] = "Classification based on account names and standard accounting practices"
|
| 467 |
logger.info("Auto-added default assumptions")
|
|
|
|
| 469 |
return fixed_data
|
| 470 |
|
| 471 |
def validate_json_structure(self, json_data: Dict[str, Any], note_number: str) -> Tuple[bool, str]:
|
|
|
|
| 472 |
required_fields = ["title", "full_title", "structure", "metadata", "assumptions"]
|
| 473 |
|
|
|
|
| 474 |
missing_fields = []
|
| 475 |
for field in required_fields:
|
| 476 |
if field not in json_data:
|
|
|
|
| 479 |
if missing_fields:
|
| 480 |
return False, f"Missing required fields: {', '.join(missing_fields)}"
|
| 481 |
|
|
|
|
| 482 |
if not isinstance(json_data.get("metadata"), dict):
|
| 483 |
return False, "metadata must be an object"
|
| 484 |
|
|
|
|
| 489 |
if str(metadata.get("note_number", "")) != str(note_number):
|
| 490 |
return False, f"Note number mismatch: expected {note_number}, got {metadata.get('note_number')}"
|
| 491 |
|
|
|
|
| 492 |
if not isinstance(json_data.get("structure"), list):
|
| 493 |
return False, "structure must be an array"
|
| 494 |
|
|
|
|
| 498 |
return True, "Validation passed"
|
| 499 |
|
| 500 |
def _generate_markdown_from_structure(self, json_data: Dict[str, Any]) -> str:
|
|
|
|
| 501 |
try:
|
| 502 |
title = json_data.get("full_title", json_data.get("title", "Financial Note"))
|
| 503 |
structure = json_data.get("structure", [])
|
|
|
|
| 505 |
if not structure:
|
| 506 |
return f"# {title}\n\n*No data available*"
|
| 507 |
|
|
|
|
| 508 |
md_lines = [f"# {title}\n"]
|
| 509 |
|
|
|
|
| 510 |
header_elem = structure[0] if len(structure) > 0 else None
|
| 511 |
if header_elem and header_elem.get("subcategories"):
|
| 512 |
headers = [sub.get("label", "") for sub in header_elem["subcategories"]]
|
| 513 |
md_lines.append("| Particulars | " + " | ".join(headers) + " |")
|
| 514 |
md_lines.append("|" + "---|" * (len(headers) + 1))
|
| 515 |
|
|
|
|
| 516 |
for i in range(1, len(structure)):
|
| 517 |
elem = structure[i]
|
| 518 |
category = elem.get("category", "")
|
| 519 |
subcategories = elem.get("subcategories", [])
|
| 520 |
|
|
|
|
| 521 |
if category:
|
| 522 |
md_lines.append(f"\n**{category}**\n")
|
| 523 |
|
|
|
|
| 524 |
for sub in subcategories:
|
| 525 |
label = sub.get("label", "")
|
| 526 |
value = sub.get("value", 0.00)
|
| 527 |
previous_value = sub.get("previous_value", 0.00)
|
| 528 |
md_lines.append(f"| {label} | {value:.2f} | {previous_value:.2f} |")
|
| 529 |
|
|
|
|
| 530 |
if "total" in elem:
|
| 531 |
total = elem.get("total", 0.00)
|
| 532 |
previous_total = elem.get("previous_total", 0.00)
|
| 533 |
md_lines.append(f"| **Total {category}** | **{total:.2f}** | **{previous_total:.2f}** |")
|
| 534 |
|
|
|
|
| 535 |
metadata = json_data.get("metadata", {})
|
| 536 |
md_lines.append(f"\n\n*Generated on: {metadata.get('generated_on', 'Unknown')}*")
|
| 537 |
|
|
|
|
| 538 |
assumptions = json_data.get("assumptions", "")
|
| 539 |
if assumptions:
|
| 540 |
md_lines.append(f"\n\n**Assumptions:** {assumptions}")
|
|
|
|
| 546 |
return f"# {json_data.get('full_title', 'Financial Note')}\n\n*Error generating markdown table*"
|
| 547 |
|
| 548 |
def save_generated_note(self, note_data: str, note_number: str, output_dir: str = settings.output_dir) -> bool:
|
|
|
|
| 549 |
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
| 550 |
json_output_path = f"{output_dir}/notes.json"
|
| 551 |
raw_output_path = f"{output_dir}/notes_raw.txt"
|
| 552 |
formatted_md_path = f"{output_dir}/notes_formatted.md"
|
| 553 |
|
| 554 |
try:
|
|
|
|
| 555 |
with open(raw_output_path, 'w', encoding='utf-8') as f:
|
| 556 |
f.write(note_data)
|
| 557 |
|
|
|
|
| 558 |
json_data, json_string = self.extract_json_from_markdown(note_data)
|
| 559 |
|
| 560 |
if json_data:
|
|
|
|
| 561 |
json_data = self.validate_and_fix_json(json_data, note_number)
|
| 562 |
|
|
|
|
| 563 |
is_valid, validation_msg = self.validate_json_structure(json_data, note_number)
|
| 564 |
if not is_valid:
|
| 565 |
logger.warning(f"JSON validation warning after auto-fix: {validation_msg}")
|
| 566 |
|
|
|
|
| 567 |
json_data = convert_note_json_to_lakhs(json_data)
|
| 568 |
|
|
|
|
| 569 |
with open(json_output_path, 'w', encoding='utf-8') as f:
|
| 570 |
json.dump(json_data, f, indent=2, ensure_ascii=False)
|
| 571 |
logger.info(f"JSON saved to {json_output_path}")
|
| 572 |
|
|
|
|
| 573 |
md_content = json_data.get('markdown_content', '')
|
| 574 |
if not md_content:
|
|
|
|
| 575 |
md_content = self._generate_markdown_from_structure(json_data)
|
| 576 |
logger.info("Auto-generated markdown from structure array")
|
| 577 |
|
|
|
|
| 580 |
|
| 581 |
return True
|
| 582 |
else:
|
|
|
|
| 583 |
template = self.note_templates.get(note_number, {})
|
| 584 |
fallback_json = {
|
| 585 |
"title": template.get("title", f"Note {note_number}"),
|
|
|
|
| 618 |
except Exception as e:
|
| 619 |
logger.error(f"Error saving files: {e}")
|
| 620 |
|
|
|
|
| 621 |
try:
|
| 622 |
template = self.note_templates.get(note_number, {})
|
| 623 |
emergency_json = {
|
|
|
|
| 648 |
return False
|
| 649 |
|
| 650 |
def generate_note(self, note_number: str, trial_balance_path: str = settings.trial_balance_json) -> bool:
|
|
|
|
| 651 |
if note_number not in self.note_templates:
|
| 652 |
logger.error(f"Note template {note_number} not found")
|
| 653 |
return False
|
| 654 |
|
| 655 |
logger.info(f"Starting Note {note_number} generation...")
|
| 656 |
|
|
|
|
| 657 |
trial_balance = self.load_trial_balance(trial_balance_path)
|
| 658 |
if not trial_balance:
|
| 659 |
return False
|
| 660 |
|
|
|
|
| 661 |
prompt = self.build_llm_prompt(note_number, trial_balance)
|
| 662 |
if not prompt:
|
| 663 |
logger.error("Failed to build prompt")
|
| 664 |
return False
|
| 665 |
|
|
|
|
| 666 |
response = self.call_openrouter_api(prompt)
|
| 667 |
if not response:
|
| 668 |
logger.error("Failed to get API response")
|
| 669 |
return False
|
| 670 |
|
|
|
|
| 671 |
success = self.save_generated_note(response, note_number)
|
| 672 |
logger.info(f"Note {note_number} {'generated successfully' if success else 'generated with issues'}")
|
| 673 |
return success
|
| 674 |
|
| 675 |
def generate_all_notes(self, trial_balance_path: str = settings.trial_balance_json) -> Dict[str, bool]:
|
|
|
|
| 676 |
logger.info(f"Starting generation of all {len(self.note_templates)} notes...")
|
| 677 |
results = {}
|
| 678 |
all_notes = []
|
| 679 |
|
|
|
|
| 680 |
trial_balance = self.load_trial_balance(trial_balance_path)
|
| 681 |
if not trial_balance:
|
| 682 |
logger.error("Failed to load trial balance")
|
|
|
|
| 685 |
for note_number in self.note_templates.keys():
|
| 686 |
logger.info(f"Processing Note {note_number}")
|
| 687 |
|
|
|
|
| 688 |
prompt = self.build_llm_prompt(note_number, trial_balance)
|
| 689 |
if not prompt:
|
| 690 |
results[note_number] = False
|
| 691 |
continue
|
| 692 |
|
|
|
|
| 693 |
response = self.call_openrouter_api(prompt)
|
| 694 |
if not response:
|
| 695 |
results[note_number] = False
|
| 696 |
continue
|
| 697 |
|
|
|
|
| 698 |
json_data, _ = self.extract_json_from_markdown(response)
|
| 699 |
if json_data:
|
|
|
|
| 700 |
json_data = self.validate_and_fix_json(json_data, note_number)
|
| 701 |
is_valid, validation_msg = self.validate_json_structure(json_data, note_number)
|
| 702 |
|
|
|
|
| 707 |
logger.info(f"Note {note_number} processed successfully")
|
| 708 |
else:
|
| 709 |
logger.warning(f"Note {note_number} validation failed even after auto-fix: {validation_msg}")
|
|
|
|
| 710 |
json_data = convert_note_json_to_lakhs(json_data)
|
| 711 |
all_notes.append(json_data)
|
| 712 |
results[note_number] = False
|
| 713 |
else:
|
| 714 |
logger.error(f"Note {note_number}: Could not parse JSON from response")
|
|
|
|
| 715 |
template = self.note_templates.get(note_number, {})
|
| 716 |
fallback_note = {
|
| 717 |
"title": template.get("title", f"Note {note_number}"),
|
|
|
|
| 743 |
all_notes.append(fallback_note)
|
| 744 |
results[note_number] = False
|
| 745 |
|
|
|
|
| 746 |
import time
|
| 747 |
time.sleep(2)
|
| 748 |
|
|
|
|
| 749 |
output_dir = settings.output_dir
|
| 750 |
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
| 751 |
|
|
|
|
| 771 |
return results
|
| 772 |
|
| 773 |
def main() -> None:
|
|
|
|
| 774 |
try:
|
|
|
|
| 775 |
generator = FlexibleFinancialNoteGenerator()
|
| 776 |
if not generator.note_templates:
|
| 777 |
logger.error("No note templates loaded. Check notes_template.py")
|
|
|
|
| 779 |
|
| 780 |
logger.info(f"Loaded {len(generator.note_templates)} note templates")
|
| 781 |
|
|
|
|
| 782 |
if len(sys.argv) > 1:
|
|
|
|
| 783 |
if len(sys.argv) < 3:
|
| 784 |
logger.error("Usage: python llm_notes_generator.py <mode> <note_numbers>")
|
| 785 |
logger.error(" mode: 'specific' or 'all'")
|
|
|
|
| 802 |
if note_number in generator.note_templates:
|
| 803 |
success = generator.generate_note(note_number)
|
| 804 |
if success:
|
|
|
|
| 805 |
try:
|
| 806 |
with open("data/generated_notes/notes.json", "r", encoding="utf-8") as f:
|
| 807 |
note_data = json.load(f)
|
|
|
|
| 815 |
else:
|
| 816 |
logger.error(f"Note {note_number} not found in templates")
|
| 817 |
|
|
|
|
| 818 |
if all_notes:
|
| 819 |
output_dir = settings.output_dir
|
| 820 |
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
|
|
|
| 837 |
total = len(results)
|
| 838 |
logger.info(f"{successful}/{total} notes generated successfully")
|
| 839 |
|
|
|
|
| 840 |
for note, success in results.items():
|
| 841 |
+
status = "✅ SUCCESS" if success else "⌠FAILED"
|
| 842 |
logger.info(f" Note {note}: {status}")
|
| 843 |
|
| 844 |
else:
|
|
|
|
| 846 |
sys.exit(1)
|
| 847 |
|
| 848 |
else:
|
|
|
|
| 849 |
choice = input("\nGenerate (1) specific note or (2) all notes? Enter 1 or 2: ").strip()
|
| 850 |
|
| 851 |
if choice == "1":
|
|
|
|
| 865 |
total = len(results)
|
| 866 |
logger.info(f"{successful}/{total} notes generated successfully")
|
| 867 |
|
|
|
|
| 868 |
print("\n" + "="*50)
|
| 869 |
print("GENERATION SUMMARY")
|
| 870 |
print("="*50)
|
| 871 |
for note, success in results.items():
|
| 872 |
+
status = "✅ SUCCESS" if success else "⌠FAILED"
|
| 873 |
print(f"Note {note}: {status}")
|
| 874 |
print("="*50)
|
| 875 |
|