Spaces:
Sleeping
Sleeping
| import json | |
| import os | |
| import logging | |
| import requests | |
| from datetime import datetime | |
| from pathlib import Path | |
| from dotenv import load_dotenv | |
| import re | |
| import sys | |
| from typing import Dict, List, Any, Optional, Tuple | |
| import pandas as pd | |
| from pydantic import BaseModel, Field, ValidationError | |
| from pydantic_settings import BaseSettings | |
| sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
| from utils.utils import convert_note_json_to_lakhs | |
| load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| class Settings(BaseSettings): | |
| api_url: str = "https://openrouter.ai/api/v1/chat/completions" | |
| output_dir: str = "data/generated_notes" | |
| trial_balance_json: str = "data/output1/parsed_trial_balance.json" | |
| model_config = { | |
| "extra": "ignore" | |
| } | |
| settings = Settings() | |
| class Account(BaseModel): | |
| account_name: str | |
| amount: float | |
| group: Optional[str] = None | |
| class NoteTemplate(BaseModel): | |
| title: str | |
| full_title: str | |
| class GeneratedNote(BaseModel): | |
| note_number: str | |
| markdown_content: str | |
| grand_total_lakhs: float | |
| generated_on: str | |
| assumptions: Optional[str] = None | |
| class FlexibleFinancialNoteGenerator: | |
| def __init__(self, user_api_key: Optional[str] = None): | |
| if not user_api_key: | |
| logger.error("OpenRouter API key is required. Please provide OPENROUTER_API_KEY.") | |
| raise ValueError("OpenRouter API key is required. Please provide OPENROUTER_API_KEY.") | |
| self.openrouter_api_key = user_api_key | |
| logger.info("Using provided OpenRouter API key") | |
| self.api_url = settings.api_url | |
| self.headers = { | |
| "Authorization": f"Bearer {self.openrouter_api_key}", | |
| "Content-Type": "application/json", | |
| "HTTP-Referer": "https://localhost:3000", | |
| "X-Title": "Financial Note Generator" | |
| } | |
| self.note_templates = self.load_note_templates() | |
| self.recommended_models = [ | |
| "mistralai/mixtral-8x7b-instruct" | |
| ] | |
| def load_note_templates(self) -> Dict[str, Any]: | |
| try: | |
| if __name__ == "__main__": | |
| sys.path.append(str(Path(__file__).parent.parent)) | |
| from notes_template import note_templates | |
| return note_templates | |
| except ImportError as e: | |
| logger.error(f"Error importing note_templates from notes_template: {e}") | |
| return {} | |
| except Exception as e: | |
| logger.error(f"Unexpected error loading note_templates: {e}") | |
| return {} | |
| def load_trial_balance(self, file_path: str = settings.trial_balance_json) -> Optional[Dict[str, Any]]: | |
| try: | |
| if file_path.endswith('.json'): | |
| with open(file_path, 'r', encoding='utf-8') as f: | |
| data = json.load(f) | |
| if isinstance(data, list): | |
| accounts = data | |
| elif isinstance(data, dict): | |
| accounts = data.get('accounts', []) | |
| else: | |
| logger.error(f"Unexpected trial balance format: {type(data)}") | |
| return None | |
| logger.info(f"Loaded trial balance with {len(accounts)} accounts") | |
| return {"accounts": accounts} | |
| elif file_path.endswith('.xlsx'): | |
| from notes.data_extraction import extract_trial_balance_data | |
| accounts = extract_trial_balance_data(file_path) | |
| logger.info(f"Extracted trial balance with {len(accounts)} accounts from Excel") | |
| return {"accounts": accounts} | |
| else: | |
| logger.error(f"Unsupported file type: {file_path}") | |
| return None | |
| except FileNotFoundError: | |
| logger.error(f"Trial balance file not found: {file_path}") | |
| return None | |
| except Exception as e: | |
| logger.error(f"Error loading trial balance: {e}") | |
| return None | |
| def build_llm_prompt(self, note_number: str, trial_balance_data: Dict[str, Any]) -> Optional[str]: | |
| if note_number not in self.note_templates: | |
| return None | |
| template = self.note_templates[note_number] | |
| all_accounts = trial_balance_data.get("accounts", []) | |
| context = { | |
| "note_info": { | |
| "number": note_number, | |
| "title": template.get("title", ""), | |
| "full_title": template.get("full_title", "") | |
| }, | |
| "trial_balance": { | |
| "total_accounts": len(all_accounts), | |
| "accounts": all_accounts | |
| }, | |
| "current_date": datetime.now().strftime("%Y-%m-%d"), | |
| "financial_year": "2023-24" | |
| } | |
| classification_guide = self._get_classification_guide(note_number) | |
| prompt = f"""You are a senior financial analyst and chartered accountant with expertise in Indian accounting standards and Schedule III of the Companies Act 2013. | |
| ðŸ"´ CRITICAL INSTRUCTIONS - MUST FOLLOW EXACTLY: | |
| 1. OUTPUT ONLY VALID JSON - NO MARKDOWN, NO EXPLANATIONS, NO TEXT OUTSIDE JSON | |
| 2. START YOUR RESPONSE WITH {{ and END WITH }} | |
| 3. DO NOT USE ```json``` CODE BLOCKS | |
| 4. DO NOT ADD ANY COMMENTARY OR EXPLANATIONS | |
| ðŸ"´ REQUIRED JSON STRUCTURE - ALL FIELDS MANDATORY: | |
| {{ | |
| "title": "{template.get('title', '')}", | |
| "full_title": "{template.get('full_title', '')}", | |
| "structure": [ | |
| {{ | |
| "category": "In Lakhs", | |
| "subcategories": [ | |
| {{ "label": "March 31, 2024", "value": 0.00 }}, | |
| {{ "label": "March 31, 2023", "value": 0.00 }} | |
| ] | |
| }}, | |
| {{ | |
| "category": "Category Name", | |
| "subcategories": [ | |
| {{ "label": "Subcategory Item", "value": 0.00, "previous_value": 0.00 }} | |
| ], | |
| "total": 0.00, | |
| "previous_total": 0.00 | |
| }} | |
| ], | |
| "metadata": {{ | |
| "note_number": {note_number}, | |
| "generated_on": "{datetime.now().isoformat()}" | |
| }}, | |
| "assumptions": "List any assumptions made during classification" | |
| }} | |
| ðŸ"´ STRUCTURE ARRAY EXPLAINED: | |
| - First element: Header row with column labels (March 31, 2024, March 31, 2023) | |
| - Subsequent elements: Data categories with subcategories | |
| - Each data category must have: | |
| * "category": Main category name | |
| * "subcategories": Array of line items with "label", "value", "previous_value" | |
| * "total": Sum of current year values in subcategories | |
| * "previous_total": Sum of previous year values in subcategories | |
| ðŸ"´ YOUR TASK: | |
| 1. Analyze ALL trial balance accounts provided below | |
| 2. Identify accounts that belong to "{template['full_title']}" | |
| 3. Classify into appropriate subcategories per Schedule III | |
| 4. Convert all amounts to lakhs (₹ ÷ 100,000) with 2 decimal places | |
| 5. Calculate accurate totals ensuring mathematical consistency | |
| 6. Structure output in hierarchical "structure" array format | |
| ðŸ"´ MATHEMATICAL REQUIREMENTS: | |
| - All amounts MUST be in lakhs (divide original by 100,000) | |
| - All subtotals MUST equal the grand total exactly | |
| - Use 0.00 for March 2023 if data missing | |
| - Round to 2 decimal places consistently | |
| - Ensure "total" = sum of "value" in subcategories | |
| - Ensure "previous_total" = sum of "previous_value" in subcategories | |
| ðŸ"´ CLASSIFICATION GUIDANCE FOR NOTE {note_number}: | |
| {classification_guide} | |
| ðŸ"´ COMPLETE TRIAL BALANCE DATA: | |
| {json.dumps(context, indent=2)} | |
| ðŸ"´ TEMPLATE STRUCTURE TO FOLLOW: | |
| {json.dumps(template, indent=2)} | |
| ðŸ"´ VALIDATION RULES: | |
| - If no accounts match this note category, use empty categories with 0.00 totals | |
| - Ensure "metadata.note_number" exactly matches {note_number} | |
| - Document classification logic in "assumptions" field | |
| - Structure must have at least 2 elements (header + data) | |
| GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):""" | |
| return prompt | |
| def _get_classification_guide(self, note_number: str) -> str: | |
| guides = { | |
| "10": """ | |
| **Note 10 - Long Term Loans and Advances:** | |
| - Include: Security deposits, long-term advances to suppliers/employees, deposits with utilities | |
| - Categories: Unsecured considered good, Unsecured considered doubtful, Doubtful (provision) | |
| - Exclude: Short-term advances, trade receivables, prepaid expenses under 1 year | |
| """, | |
| "11": """ | |
| **Note 11 - Inventories:** | |
| - Include: Raw materials, work-in-progress, finished goods, stores and spares, consumables | |
| - Value at lower of cost or net realizable value | |
| - Exclude: Advances for inventory purchases (classify as advances) | |
| """, | |
| "12": """ | |
| **Note 12 - Trade Receivables:** | |
| - Include: Amounts due from customers for goods/services, bills receivable | |
| - Categories: Unsecured considered good, Unsecured considered doubtful, Provision for doubtful debts | |
| - Exclude: Advances, deposits, other receivables | |
| """, | |
| "13": """ | |
| **Note 13 - Cash and Cash Equivalents:** | |
| - Include: Cash on hand, balances with banks (current/savings), short-term deposits (≤3 months) | |
| - Separate: Cash and cash equivalents vs Other bank balances (FDs >3 months) | |
| - Show: Balances in current accounts, savings accounts, fixed deposits separately | |
| """, | |
| "14": """ | |
| **Note 14 - Short Term Loans and Advances:** | |
| - Include: Prepaid expenses, advances to suppliers, employee advances, advance tax, TDS receivable | |
| - Categories: | |
| * Unsecured, considered good: Prepaid expenses, Other advances | |
| * Other loans and advances: Advance tax, Balances with statutory/govt authorities | |
| - Exclude: Long-term advances, trade receivables | |
| """, | |
| "15": """ | |
| **Note 15 - Other Current Assets:** | |
| - Include: Interest accrued, export incentives receivable, insurance claims, other miscellaneous current assets | |
| - Exclude: Items that fit into specific categories like trade receivables, advances, cash | |
| """ | |
| } | |
| return guides.get(note_number, f"**Note {note_number}:** Classify accounts logically based on their nature and the note title.") | |
| def call_openrouter_api(self, prompt: str) -> Optional[str]: | |
| for model in self.recommended_models: | |
| logger.info(f"Trying model: {model}") | |
| payload = { | |
| "model": model, | |
| "messages": [ | |
| { | |
| "role": "system", | |
| "content": "You are an expert chartered accountant specializing in Indian accounting standards. You MUST respond with ONLY valid JSON, never with markdown code blocks or explanations. Start with { and end with }." | |
| }, | |
| {"role": "user", "content": prompt} | |
| ], | |
| "max_tokens": 12000, | |
| "temperature": 0.1, | |
| "top_p": 0.9 | |
| } | |
| try: | |
| response = requests.post( | |
| self.api_url, | |
| headers=self.headers, | |
| json=payload, | |
| timeout=180 | |
| ) | |
| response.raise_for_status() | |
| result = response.json() | |
| content = result['choices'][0]['message']['content'] | |
| logger.info(f"Successful response from {model}") | |
| return content | |
| except requests.exceptions.Timeout: | |
| logger.warning(f"Request to {model} timed out after 180s") | |
| continue | |
| except requests.exceptions.HTTPError as e: | |
| if e.response.status_code == 404: | |
| logger.warning(f"Model {model} not found (404), trying next model") | |
| elif e.response.status_code == 402: | |
| logger.warning(f"Model {model} requires payment (402), trying next model") | |
| elif e.response.status_code == 401: | |
| logger.error(f"Invalid API key (401)") | |
| return None | |
| else: | |
| logger.error(f"HTTP error with {model}: {e}") | |
| except Exception as e: | |
| logger.error(f"Failed with {model}: {e}") | |
| continue | |
| logger.error("All models failed") | |
| return None | |
| def extract_json_from_markdown(self, response_text: str) -> Tuple[Optional[Dict[str, Any]], Optional[str]]: | |
| response_text = response_text.strip() | |
| json_objects = [] | |
| brace_count = 0 | |
| start_idx = -1 | |
| for i, char in enumerate(response_text): | |
| if char == '{': | |
| if brace_count == 0: | |
| start_idx = i | |
| brace_count += 1 | |
| elif char == '}': | |
| brace_count -= 1 | |
| if brace_count == 0 and start_idx != -1: | |
| potential_json = response_text[start_idx:i+1] | |
| try: | |
| parsed = json.loads(potential_json) | |
| json_objects.append((parsed, potential_json)) | |
| break | |
| except json.JSONDecodeError: | |
| continue | |
| if json_objects: | |
| logger.info("Successfully extracted first valid JSON object from response") | |
| return json_objects[0] | |
| json_patterns = [ | |
| r'```json\s*(.*?)\s*```', | |
| r'```\s*(.*?)\s*```', | |
| r'(\{.*?\})' | |
| ] | |
| for pattern in json_patterns: | |
| match = re.search(pattern, response_text, re.DOTALL) | |
| if match: | |
| try: | |
| json_content = match.group(1).strip() | |
| json_data = json.loads(json_content) | |
| return json_data, json_content | |
| except json.JSONDecodeError: | |
| continue | |
| try: | |
| json_data = json.loads(response_text) | |
| return json_data, response_text | |
| except json.JSONDecodeError: | |
| try: | |
| start = response_text.find('{') | |
| end = response_text.rfind('}') + 1 | |
| if start != -1 and end > start: | |
| json_part = response_text[start:end] | |
| json_data = json.loads(json_part) | |
| return json_data, json_part | |
| except json.JSONDecodeError: | |
| pass | |
| return None, None | |
| def safe_numeric_value(self, value, default=0.0): | |
| """Convert any value to a safe numeric float, defaulting to 0.0 if conversion fails.""" | |
| if value is None or value == '' or value == '-': | |
| return default | |
| try: | |
| # If already a number, return it as float | |
| if isinstance(value, (int, float)): | |
| return float(value) | |
| # Handle string numbers | |
| if isinstance(value, str): | |
| # Remove common non-numeric characters | |
| cleaned = value.replace(',', '').replace('₹', '').replace('Rs', '').replace('Rs.', '').strip() | |
| # Handle parentheses for negative numbers (accounting format) | |
| if cleaned.startswith('(') and cleaned.endswith(')'): | |
| cleaned = '-' + cleaned[1:-1] | |
| # Handle negative numbers | |
| is_negative = cleaned.startswith('-') | |
| if is_negative: | |
| cleaned = cleaned[1:] | |
| # Remove any remaining non-numeric characters except decimal point | |
| cleaned = ''.join(c for c in cleaned if c.isdigit() or c == '.') | |
| if not cleaned or cleaned == '.': | |
| return default | |
| # Convert to number | |
| result = float(cleaned) | |
| return -result if is_negative else result | |
| # Try direct conversion as last resort | |
| return float(value) | |
| except (ValueError, TypeError, AttributeError): | |
| logger.warning(f"Could not convert value '{value}' to numeric, using default {default}") | |
| return default | |
| def validate_and_fix_json(self, json_data: Dict[str, Any], note_number: str) -> Dict[str, Any]: | |
| fixed_data = json_data.copy() | |
| template = self.note_templates.get(note_number, {}) | |
| if "title" not in fixed_data or not fixed_data["title"]: | |
| fixed_data["title"] = template.get("title", f"Note {note_number}") | |
| logger.info(f"Auto-fixed missing title field") | |
| if "full_title" not in fixed_data or not fixed_data["full_title"]: | |
| fixed_data["full_title"] = template.get("full_title", f"{note_number}. {fixed_data.get('title', 'Financial Note')}") | |
| logger.info(f"Auto-fixed missing full_title field") | |
| if "metadata" not in fixed_data or not isinstance(fixed_data["metadata"], dict): | |
| fixed_data["metadata"] = {} | |
| logger.info("Auto-created metadata object") | |
| metadata_note_num = fixed_data["metadata"].get("note_number") | |
| try: | |
| expected_note_num = int(note_number); | |
| if (metadata_note_num is None or | |
| metadata_note_num == 0 or | |
| metadata_note_num == 0.0 or | |
| int(metadata_note_num) != expected_note_num): | |
| fixed_data["metadata"]["note_number"] = expected_note_num | |
| logger.info(f"Auto-corrected metadata.note_number from {metadata_note_num} to {expected_note_num}") | |
| except ValueError: | |
| fixed_data["metadata"]["note_number"] = note_number | |
| logger.info(f"Auto-set metadata.note_number to string: {note_number}") | |
| if "generated_on" not in fixed_data["metadata"]: | |
| fixed_data["metadata"]["generated_on"] = datetime.now().isoformat() | |
| logger.info("Auto-fixed missing metadata.generated_on field") | |
| if "structure" not in fixed_data or not isinstance(fixed_data["structure"], list): | |
| logger.warning("Structure array missing, creating default structure") | |
| fixed_data["structure"] = [ | |
| { | |
| "category": "In Lakhs", | |
| "subcategories": [ | |
| {"label": "March 31, 2024", "value": 0.00}, | |
| {"label": "March 31, 2023", "value": 0.00} | |
| ] | |
| }, | |
| { | |
| "category": "No data available", | |
| "subcategories": [ | |
| {"label": "Items", "value": 0.00, "previous_value": 0.00} | |
| ], | |
| "total": 0.00, | |
| "previous_total": 0.00 | |
| } | |
| ] | |
| else: | |
| if len(fixed_data["structure"]) == 0: | |
| logger.warning("Empty structure array, adding default elements") | |
| fixed_data["structure"] = [ | |
| { | |
| "category": "In Lakhs", | |
| "subcategories": [ | |
| {"label": "March 31, 2024", "value": 0.00}, | |
| {"label": "March 31, 2023", "value": 0.00} | |
| ] | |
| } | |
| ] | |
| for i, struct_elem in enumerate(fixed_data["structure"]): | |
| if not isinstance(struct_elem, dict): | |
| continue | |
| if "category" not in struct_elem: | |
| struct_elem["category"] = f"Category {i}" | |
| if "subcategories" not in struct_elem or not isinstance(struct_elem["subcategories"], list): | |
| struct_elem["subcategories"] = [] | |
| # FIX: Ensure all subcategory values are numeric before summing | |
| for sub in struct_elem.get("subcategories", []): | |
| if isinstance(sub, dict): | |
| if "value" in sub: | |
| sub["value"] = self.safe_numeric_value(sub["value"]) | |
| if "previous_value" in sub: | |
| sub["previous_value"] = self.safe_numeric_value(sub["previous_value"]) | |
| # Now safely calculate totals | |
| if i > 0 and struct_elem.get("subcategories"): | |
| if "total" not in struct_elem: | |
| struct_elem["total"] = sum( | |
| self.safe_numeric_value(sub.get("value", 0.0)) | |
| for sub in struct_elem["subcategories"] | |
| if isinstance(sub, dict) | |
| ) | |
| if "previous_total" not in struct_elem: | |
| struct_elem["previous_total"] = sum( | |
| self.safe_numeric_value(sub.get("previous_value", 0.0)) | |
| for sub in struct_elem["subcategories"] | |
| if isinstance(sub, dict) | |
| ) | |
| if "assumptions" not in fixed_data: | |
| fixed_data["assumptions"] = "Classification based on account names and standard accounting practices" | |
| logger.info("Auto-added default assumptions") | |
| return fixed_data | |
| def validate_json_structure(self, json_data: Dict[str, Any], note_number: str) -> Tuple[bool, str]: | |
| required_fields = ["title", "full_title", "structure", "metadata", "assumptions"] | |
| missing_fields = [] | |
| for field in required_fields: | |
| if field not in json_data: | |
| missing_fields.append(field) | |
| if missing_fields: | |
| return False, f"Missing required fields: {', '.join(missing_fields)}" | |
| if not isinstance(json_data.get("metadata"), dict): | |
| return False, "metadata must be an object" | |
| metadata = json_data["metadata"] | |
| if "note_number" not in metadata: | |
| return False, "metadata.note_number is required" | |
| if str(metadata.get("note_number", "")) != str(note_number): | |
| return False, f"Note number mismatch: expected {note_number}, got {metadata.get('note_number')}" | |
| if not isinstance(json_data.get("structure"), list): | |
| return False, "structure must be an array" | |
| if len(json_data["structure"]) == 0: | |
| return False, "structure array cannot be empty" | |
| return True, "Validation passed" | |
| def _generate_markdown_from_structure(self, json_data: Dict[str, Any]) -> str: | |
| try: | |
| title = json_data.get("full_title", json_data.get("title", "Financial Note")) | |
| structure = json_data.get("structure", []) | |
| if not structure: | |
| return f"# {title}\n\n*No data available*" | |
| md_lines = [f"# {title}\n"] | |
| header_elem = structure[0] if len(structure) > 0 else None | |
| if header_elem and header_elem.get("subcategories"): | |
| headers = [sub.get("label", "") for sub in header_elem["subcategories"]] | |
| md_lines.append("| Particulars | " + " | ".join(headers) + " |") | |
| md_lines.append("|" + "---|" * (len(headers) + 1)) | |
| for i in range(1, len(structure)): | |
| elem = structure[i] | |
| category = elem.get("category", "") | |
| subcategories = elem.get("subcategories", []) | |
| if category: | |
| md_lines.append(f"\n**{category}**\n") | |
| for sub in subcategories: | |
| label = sub.get("label", "") | |
| value = sub.get("value", 0.00) | |
| previous_value = sub.get("previous_value", 0.00) | |
| md_lines.append(f"| {label} | {value:.2f} | {previous_value:.2f} |") | |
| if "total" in elem: | |
| total = elem.get("total", 0.00) | |
| previous_total = elem.get("previous_total", 0.00) | |
| md_lines.append(f"| **Total {category}** | **{total:.2f}** | **{previous_total:.2f}** |") | |
| metadata = json_data.get("metadata", {}) | |
| md_lines.append(f"\n\n*Generated on: {metadata.get('generated_on', 'Unknown')}*") | |
| assumptions = json_data.get("assumptions", "") | |
| if assumptions: | |
| md_lines.append(f"\n\n**Assumptions:** {assumptions}") | |
| return "\n".join(md_lines) | |
| except Exception as e: | |
| logger.error(f"Error generating markdown from structure: {e}") | |
| return f"# {json_data.get('full_title', 'Financial Note')}\n\n*Error generating markdown table*" | |
| def save_generated_note(self, note_data: str, note_number: str, output_dir: str = settings.output_dir) -> bool: | |
| Path(output_dir).mkdir(parents=True, exist_ok=True) | |
| json_output_path = f"{output_dir}/notes.json" | |
| raw_output_path = f"{output_dir}/notes_raw.txt" | |
| formatted_md_path = f"{output_dir}/notes_formatted.md" | |
| try: | |
| with open(raw_output_path, 'w', encoding='utf-8') as f: | |
| f.write(note_data) | |
| json_data, json_string = self.extract_json_from_markdown(note_data) | |
| if json_data: | |
| json_data = self.validate_and_fix_json(json_data, note_number) | |
| is_valid, validation_msg = self.validate_json_structure(json_data, note_number) | |
| if not is_valid: | |
| logger.warning(f"JSON validation warning after auto-fix: {validation_msg}") | |
| json_data = convert_note_json_to_lakhs(json_data) | |
| with open(json_output_path, 'w', encoding='utf-8') as f: | |
| json.dump(json_data, f, indent=2, ensure_ascii=False) | |
| logger.info(f"JSON saved to {json_output_path}") | |
| md_content = json_data.get('markdown_content', '') | |
| if not md_content: | |
| md_content = self._generate_markdown_from_structure(json_data) | |
| logger.info("Auto-generated markdown from structure array") | |
| with open(formatted_md_path, 'w', encoding='utf-8') as f: | |
| f.write(md_content) | |
| return True | |
| else: | |
| template = self.note_templates.get(note_number, {}) | |
| fallback_json = { | |
| "title": template.get("title", f"Note {note_number}"), | |
| "full_title": template.get("full_title", f"{note_number}. Financial Note"), | |
| "structure": [ | |
| { | |
| "category": "In Lakhs", | |
| "subcategories": [ | |
| {"label": "March 31, 2024", "value": 0.00}, | |
| {"label": "March 31, 2023", "value": 0.00} | |
| ] | |
| }, | |
| { | |
| "category": "Error - No data", | |
| "subcategories": [ | |
| {"label": "Could not parse response", "value": 0.00, "previous_value": 0.00} | |
| ], | |
| "total": 0.00, | |
| "previous_total": 0.00 | |
| } | |
| ], | |
| "metadata": { | |
| "note_number": int(note_number) if note_number.isdigit() else note_number, | |
| "generated_on": datetime.now().isoformat() | |
| }, | |
| "assumptions": "Failed to parse LLM response", | |
| "raw_response": note_data, | |
| "error": "Could not parse JSON from response" | |
| } | |
| with open(json_output_path, 'w', encoding='utf-8') as f: | |
| json.dump(fallback_json, f, indent=2, ensure_ascii=False) | |
| logger.warning(f"Fallback JSON with required fields saved to {json_output_path}") | |
| return False | |
| except Exception as e: | |
| logger.error(f"Error saving files: {e}") | |
| try: | |
| template = self.note_templates.get(note_number, {}) | |
| emergency_json = { | |
| "title": template.get("title", f"Note {note_number}"), | |
| "full_title": template.get("full_title", f"{note_number}. Financial Note"), | |
| "structure": [ | |
| { | |
| "category": "In Lakhs", | |
| "subcategories": [ | |
| {"label": "March 31, 2024", "value": 0.00}, | |
| {"label": "March 31, 2023", "value": 0.00} | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "note_number": int(note_number) if note_number.isdigit() else note_number, | |
| "generated_on": datetime.now().isoformat() | |
| }, | |
| "assumptions": "Emergency fallback due to processing error", | |
| "error": str(e) | |
| } | |
| with open(json_output_path, 'w', encoding='utf-8') as f: | |
| json.dump(emergency_json, f, indent=2, ensure_ascii=False) | |
| logger.info(f"Emergency fallback JSON saved to {json_output_path}") | |
| except Exception as emergency_error: | |
| logger.error(f"Emergency fallback also failed: {emergency_error}") | |
| return False | |
| def generate_note(self, note_number: str, trial_balance_path: str = settings.trial_balance_json) -> bool: | |
| if note_number not in self.note_templates: | |
| logger.error(f"Note template {note_number} not found") | |
| return False | |
| logger.info(f"Starting Note {note_number} generation...") | |
| trial_balance = self.load_trial_balance(trial_balance_path) | |
| if not trial_balance: | |
| return False | |
| prompt = self.build_llm_prompt(note_number, trial_balance) | |
| if not prompt: | |
| logger.error("Failed to build prompt") | |
| return False | |
| response = self.call_openrouter_api(prompt) | |
| if not response: | |
| logger.error("Failed to get API response") | |
| return False | |
| success = self.save_generated_note(response, note_number) | |
| logger.info(f"Note {note_number} {'generated successfully' if success else 'generated with issues'}") | |
| return success | |
| def generate_all_notes(self, trial_balance_path: str = settings.trial_balance_json) -> Dict[str, bool]: | |
| logger.info(f"Starting generation of all {len(self.note_templates)} notes...") | |
| results = {} | |
| all_notes = [] | |
| trial_balance = self.load_trial_balance(trial_balance_path) | |
| if not trial_balance: | |
| logger.error("Failed to load trial balance") | |
| return {note: False for note in self.note_templates.keys()} | |
| for note_number in self.note_templates.keys(): | |
| logger.info(f"Processing Note {note_number}") | |
| prompt = self.build_llm_prompt(note_number, trial_balance) | |
| if not prompt: | |
| results[note_number] = False | |
| continue | |
| response = self.call_openrouter_api(prompt) | |
| if not response: | |
| results[note_number] = False | |
| continue | |
| json_data, _ = self.extract_json_from_markdown(response) | |
| if json_data: | |
| json_data = self.validate_and_fix_json(json_data, note_number) | |
| is_valid, validation_msg = self.validate_json_structure(json_data, note_number) | |
| if is_valid: | |
| json_data = convert_note_json_to_lakhs(json_data) | |
| all_notes.append(json_data) | |
| results[note_number] = True | |
| logger.info(f"Note {note_number} processed successfully") | |
| else: | |
| logger.warning(f"Note {note_number} validation failed even after auto-fix: {validation_msg}") | |
| json_data = convert_note_json_to_lakhs(json_data) | |
| all_notes.append(json_data) | |
| results[note_number] = False | |
| else: | |
| logger.error(f"Note {note_number}: Could not parse JSON from response") | |
| template = self.note_templates.get(note_number, {}) | |
| fallback_note = { | |
| "title": template.get("title", f"Note {note_number}"), | |
| "full_title": template.get("full_title", f"{note_number}. Financial Note"), | |
| "structure": [ | |
| { | |
| "category": "In Lakhs", | |
| "subcategories": [ | |
| {"label": "March 31, 2024", "value": 0.00}, | |
| {"label": "March 31, 2023", "value": 0.00} | |
| ] | |
| }, | |
| { | |
| "category": "Error", | |
| "subcategories": [ | |
| {"label": "Failed to generate from LLM response", "value": 0.00, "previous_value": 0.00} | |
| ], | |
| "total": 0.00, | |
| "previous_total": 0.00 | |
| } | |
| ], | |
| "metadata": { | |
| "note_number": int(note_number) if note_number.isdigit() else note_number, | |
| "generated_on": datetime.now().isoformat() | |
| }, | |
| "assumptions": "LLM response parsing failed", | |
| "error": "JSON parsing failed" | |
| } | |
| all_notes.append(fallback_note) | |
| results[note_number] = False | |
| import time | |
| time.sleep(2) | |
| output_dir = settings.output_dir | |
| Path(output_dir).mkdir(parents=True, exist_ok=True) | |
| consolidated_output = { | |
| "notes": all_notes, | |
| "generation_summary": { | |
| "total_notes": len(self.note_templates), | |
| "successful_notes": sum(1 for success in results.values() if success), | |
| "failed_notes": sum(1 for success in results.values() if not success), | |
| "generated_on": datetime.now().isoformat(), | |
| "results": results | |
| } | |
| } | |
| with open(f"{output_dir}/notes.json", "w", encoding="utf-8") as f: | |
| json.dump(consolidated_output, f, indent=2, ensure_ascii=False) | |
| successful = sum(1 for success in results.values() if success) | |
| total = len(results) | |
| logger.info(f"GENERATION SUMMARY: {successful}/{total} notes generated successfully") | |
| logger.info(f"All notes saved to {output_dir}/notes.json") | |
| return results | |
| def main() -> None: | |
| try: | |
| # Get API key from environment | |
| user_api_key = os.environ.get("OPENROUTER_API_KEY") | |
| if not user_api_key: | |
| logger.error("OpenRouter API key is required. Please provide OPENROUTER_API_KEY.") | |
| sys.exit(1) | |
| generator = FlexibleFinancialNoteGenerator(user_api_key=user_api_key) | |
| if not generator.note_templates: | |
| logger.error("No note templates loaded. Check notes_template.py") | |
| return | |
| logger.info(f"Loaded {len(generator.note_templates)} note templates") | |
| if len(sys.argv) > 1: | |
| if len(sys.argv) < 3: | |
| logger.error("Usage: python llm_notes_generator.py <mode> <note_numbers>") | |
| logger.error(" mode: 'specific' or 'all'") | |
| logger.error(" note_numbers: comma-separated note numbers (for specific mode)") | |
| sys.exit(1) | |
| mode = sys.argv[1].lower() | |
| note_numbers = sys.argv[2] if len(sys.argv) > 2 else "" | |
| if mode == "specific": | |
| if not note_numbers: | |
| logger.error("Note numbers required for specific mode") | |
| sys.exit(1) | |
| note_list = [n.strip() for n in note_numbers.split(",")] | |
| all_notes = [] | |
| successful_notes = [] | |
| for note_number in note_list: | |
| if note_number in generator.note_templates: | |
| success = generator.generate_note(note_number) | |
| if success: | |
| try: | |
| with open("data/generated_notes/notes.json", "r", encoding="utf-8") as f: | |
| note_data = json.load(f) | |
| all_notes.append(note_data) | |
| successful_notes.append(note_number) | |
| logger.info(f"Note {note_number} generated successfully") | |
| except Exception as e: | |
| logger.error(f"Failed to load generated note {note_number}: {e}") | |
| else: | |
| logger.error(f"Failed to generate note {note_number}") | |
| else: | |
| logger.error(f"Note {note_number} not found in templates") | |
| if all_notes: | |
| output_dir = settings.output_dir | |
| Path(output_dir).mkdir(parents=True, exist_ok=True) | |
| consolidated = { | |
| "notes": all_notes, | |
| "generation_summary": { | |
| "requested_notes": note_list, | |
| "successful_notes": successful_notes, | |
| "total_successful": len(successful_notes), | |
| "generated_on": datetime.now().isoformat() | |
| } | |
| } | |
| with open(f"{output_dir}/notes.json", "w", encoding="utf-8") as f: | |
| json.dump(consolidated, f, indent=2, ensure_ascii=False) | |
| logger.info(f"Consolidated notes saved to {output_dir}/notes.json") | |
| elif mode == "all": | |
| results = generator.generate_all_notes() | |
| successful = sum(1 for success in results.values() if success) | |
| total = len(results) | |
| logger.info(f"{successful}/{total} notes generated successfully") | |
| for note, success in results.items(): | |
| status = "✅ SUCCESS" if success else "⌠FAILED" | |
| logger.info(f" Note {note}: {status}") | |
| else: | |
| logger.error("Invalid mode. Use 'specific' or 'all'") | |
| sys.exit(1) | |
| else: | |
| choice = input("\nGenerate (1) specific note or (2) all notes? Enter 1 or 2: ").strip() | |
| if choice == "1": | |
| available_notes = list(generator.note_templates.keys()) | |
| print(f"Available notes: {', '.join(available_notes)}") | |
| note_number = input("Enter note number: ").strip() | |
| if note_number in available_notes: | |
| success = generator.generate_note(note_number) | |
| logger.info(f"Note {note_number} {'generated successfully' if success else 'generated with issues'}") | |
| else: | |
| logger.error(f"Note {note_number} not found") | |
| elif choice == "2": | |
| results = generator.generate_all_notes() | |
| successful = sum(1 for success in results.values() if success) | |
| total = len(results) | |
| logger.info(f"{successful}/{total} notes generated successfully") | |
| print("\n" + "="*50) | |
| print("GENERATION SUMMARY") | |
| print("="*50) | |
| for note, success in results.items(): | |
| status = "✅ SUCCESS" if success else "⌠FAILED" | |
| print(f"Note {note}: {status}") | |
| print("="*50) | |
| else: | |
| logger.error("Invalid choice. Enter 1 or 2.") | |
| except KeyboardInterrupt: | |
| logger.info("Generation interrupted by user") | |
| sys.exit(0) | |
| except Exception as e: | |
| logger.error(f"Error: {e}", exc_info=True) | |
| sys.exit(1) | |
| if __name__ == "__main__": | |
| main() |