Spaces:

point9
/

finryver-dev

Runtime error

App Files Files Community

dipan004 commited on Nov 11, 2025

Commit

2623d94

verified ·

1 Parent(s): b261ad9

Update notes/llm_notes_generator.py

Browse files

Files changed (1) hide show

notes/llm_notes_generator.py +28 -107

notes/llm_notes_generator.py CHANGED Viewed

@@ -1,14 +1,11 @@
-# Minimal placeholder for FlexibleFinancialNoteGenerator
 class FlexibleFinancialNoteGenerator:
 	def __init__(self):
 		pass
 	def generate_note(self, note_number, trial_balance_path=None):
-		# Placeholder logic
 		return True
 	def generate_all_notes(self, trial_balance_path=None):
-		# Placeholder logic
 		return {"dummy": True}
 import json
@@ -27,15 +24,12 @@ from pydantic_settings import BaseSettings
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 from utils.utils import convert_note_json_to_lakhs
-# Load environment variables
 load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
-# Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 class Settings(BaseSettings):
-	"""Application settings loaded from environment variables or .env file."""
 	openrouter_api_key: str = os.getenv('OPENROUTER_API_KEY', '')
 	api_url: str = "https://openrouter.ai/api/v1/chat/completions"
 	output_dir: str = "data/generated_notes"
@@ -51,7 +45,6 @@ class Account(BaseModel):
 class NoteTemplate(BaseModel):
     title: str
     full_title: str
-    # Add other fields as needed for your template structure
 class GeneratedNote(BaseModel):
     note_number: str
@@ -59,14 +52,19 @@ class GeneratedNote(BaseModel):
     grand_total_lakhs: float
     generated_on: str
     assumptions: Optional[str] = None
-    # Add other fields as needed
 class FlexibleFinancialNoteGenerator:
-    def __init__(self):
-        self.openrouter_api_key = settings.openrouter_api_key
-        if not self.openrouter_api_key:
-            logger.error("OPENROUTER_API_KEY not found in .env file")
-            raise ValueError("OPENROUTER_API_KEY not found in .env file")
         self.api_url = settings.api_url
         self.headers = {
             "Authorization": f"Bearer {self.openrouter_api_key}",
@@ -75,18 +73,13 @@ class FlexibleFinancialNoteGenerator:
             "X-Title": "Financial Note Generator"
         }
         self.note_templates = self.load_note_templates()
-        # Updated model list with DeepSeek as first choice
         self.recommended_models = [
-           "deepseek/deepseek-r1",
-            #"deepseek/deepseek-coder",
             "mistralai/mixtral-8x7b-instruct"
         ]
     def load_note_templates(self) -> Dict[str, Any]:
-        """Load note templates from notes_template.py file."""
         try:
-            # Add parent directory to path for imports when run as script
             if __name__ == "__main__":
                 sys.path.append(str(Path(__file__).parent.parent))
@@ -100,7 +93,6 @@ class FlexibleFinancialNoteGenerator:
             return {}
     def load_trial_balance(self, file_path: str = settings.trial_balance_json) -> Optional[Dict[str, Any]]:
-        """Load the complete trial balance from Excel or JSON."""
         try:
             if file_path.endswith('.json'):
                 with open(file_path, 'r', encoding='utf-8') as f:
@@ -130,14 +122,12 @@ class FlexibleFinancialNoteGenerator:
             return None
     def build_llm_prompt(self, note_number: str, trial_balance_data: Dict[str, Any]) -> Optional[str]:
-        """Build comprehensive LLM prompt with strict JSON output requirements"""
         if note_number not in self.note_templates:
             return None
         template = self.note_templates[note_number]
         all_accounts = trial_balance_data.get("accounts", [])
-        # Build context with full trial balance
         context = {
             "note_info": {
                 "number": note_number,
@@ -152,18 +142,17 @@ class FlexibleFinancialNoteGenerator:
             "financial_year": "2023-24"
         }
-        # Get note-specific classification guidance
         classification_guide = self._get_classification_guide(note_number)
         prompt = f"""You are a senior financial analyst and chartered accountant with expertise in Indian accounting standards and Schedule III of the Companies Act 2013.
-🔴 CRITICAL INSTRUCTIONS - MUST FOLLOW EXACTLY:
 1. OUTPUT ONLY VALID JSON - NO MARKDOWN, NO EXPLANATIONS, NO TEXT OUTSIDE JSON
 2. START YOUR RESPONSE WITH {{ and END WITH }}
 3. DO NOT USE ```json``` CODE BLOCKS
 4. DO NOT ADD ANY COMMENTARY OR EXPLANATIONS
-🔴 REQUIRED JSON STRUCTURE - ALL FIELDS MANDATORY:
 {{
   "title": "{template.get('title', '')}",
   "full_title": "{template.get('full_title', '')}",
@@ -191,7 +180,7 @@ class FlexibleFinancialNoteGenerator:
   "assumptions": "List any assumptions made during classification"
 }}
-🔴 STRUCTURE ARRAY EXPLAINED:
 - First element: Header row with column labels (March 31, 2024, March 31, 2023)
 - Subsequent elements: Data categories with subcategories
 - Each data category must have:
@@ -200,15 +189,15 @@ class FlexibleFinancialNoteGenerator:
   * "total": Sum of current year values in subcategories
   * "previous_total": Sum of previous year values in subcategories
-🔴 YOUR TASK:
 1. Analyze ALL trial balance accounts provided below
 2. Identify accounts that belong to "{template['full_title']}"
 3. Classify into appropriate subcategories per Schedule III
-4. Convert all amounts to lakhs (₹ ÷ 100,000) with 2 decimal places
 5. Calculate accurate totals ensuring mathematical consistency
 6. Structure output in hierarchical "structure" array format
-🔴 MATHEMATICAL REQUIREMENTS:
 - All amounts MUST be in lakhs (divide original by 100,000)
 - All subtotals MUST equal the grand total exactly
 - Use 0.00 for March 2023 if data missing
@@ -216,16 +205,16 @@ class FlexibleFinancialNoteGenerator:
 - Ensure "total" = sum of "value" in subcategories
 - Ensure "previous_total" = sum of "previous_value" in subcategories
-🔴 CLASSIFICATION GUIDANCE FOR NOTE {note_number}:
 {classification_guide}
-🔴 COMPLETE TRIAL BALANCE DATA:
 {json.dumps(context, indent=2)}
-🔴 TEMPLATE STRUCTURE TO FOLLOW:
 {json.dumps(template, indent=2)}
-🔴 VALIDATION RULES:
 - If no accounts match this note category, use empty categories with 0.00 totals
 - Ensure "metadata.note_number" exactly matches {note_number}
 - Document classification logic in "assumptions" field
@@ -236,7 +225,6 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
         return prompt
     def _get_classification_guide(self, note_number: str) -> str:
-        """Get note-specific classification guidance"""
         guides = {
             "10": """
 **Note 10 - Long Term Loans and Advances:**
@@ -258,7 +246,7 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
             """,
             "13": """
 **Note 13 - Cash and Cash Equivalents:**
-- Include: Cash on hand, balances with banks (current/savings), short-term deposits (≤3 months)
 - Separate: Cash and cash equivalents vs Other bank balances (FDs >3 months)
 - Show: Balances in current accounts, savings accounts, fixed deposits separately
             """,
@@ -279,7 +267,6 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
         return guides.get(note_number, f"**Note {note_number}:** Classify accounts logically based on their nature and the note title.")
     def call_openrouter_api(self, prompt: str) -> Optional[str]:
-        """Make API call to OpenRouter with model fallback"""
         for model in self.recommended_models:
             logger.info(f"Trying model: {model}")
             payload = {
@@ -312,6 +299,9 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
                     logger.warning(f"Model {model} not found (404), trying next model")
                 elif e.response.status_code == 402:
                     logger.warning(f"Model {model} requires payment (402), trying next model")
                 else:
                     logger.error(f"HTTP error with {model}: {e}")
             except Exception as e:
@@ -321,11 +311,8 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
         return None
     def extract_json_from_markdown(self, response_text: str) -> Tuple[Optional[Dict[str, Any]], Optional[str]]:
-        """Extract JSON from response, handling markdown code blocks and cleaning"""
         response_text = response_text.strip()
-        # CRITICAL FIX: Handle concatenated/duplicate JSON (e.g., "}{\n{")
-        # Find the first complete JSON object
         json_objects = []
         brace_count = 0
         start_idx = -1
@@ -338,12 +325,10 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
             elif char == '}':
                 brace_count -= 1
                 if brace_count == 0 and start_idx != -1:
-                    # Found complete JSON object
                     potential_json = response_text[start_idx:i+1]
                     try:
                         parsed = json.loads(potential_json)
                         json_objects.append((parsed, potential_json))
-                        # Use the first valid JSON object
                         break
                     except json.JSONDecodeError:
                         continue
@@ -352,8 +337,6 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
             logger.info("Successfully extracted first valid JSON object from response")
             return json_objects[0]
-        # Fallback: Try original extraction methods
-        # Remove any leading/trailing text outside JSON
         json_patterns = [
             r'```json\s*(.*?)\s*```',
             r'```\s*(.*?)\s*```',
@@ -370,12 +353,10 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
                 except json.JSONDecodeError:
                     continue
-        # Try parsing the entire response as JSON
         try:
             json_data = json.loads(response_text)
             return json_data, response_text
         except json.JSONDecodeError:
-            # Last attempt: find JSON-like structure
             try:
                 start = response_text.find('{')
                 end = response_text.rfind('}') + 1
@@ -389,13 +370,10 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
             return None, None
     def validate_and_fix_json(self, json_data: Dict[str, Any], note_number: str) -> Dict[str, Any]:
-        """Validate JSON structure and auto-fix missing required fields"""
         fixed_data = json_data.copy()
-        # Get template for this note
         template = self.note_templates.get(note_number, {})
-        # Auto-fix title fields
         if "title" not in fixed_data or not fixed_data["title"]:
             fixed_data["title"] = template.get("title", f"Note {note_number}")
             logger.info(f"Auto-fixed missing title field")
@@ -404,18 +382,14 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
             fixed_data["full_title"] = template.get("full_title", f"{note_number}. {fixed_data.get('title', 'Financial Note')}")
             logger.info(f"Auto-fixed missing full_title field")
-        # Auto-fix or create metadata
         if "metadata" not in fixed_data or not isinstance(fixed_data["metadata"], dict):
             fixed_data["metadata"] = {}
             logger.info("Auto-created metadata object")
-        # CRITICAL FIX: Ensure note_number is correct integer, not 0.0
         metadata_note_num = fixed_data["metadata"].get("note_number")
         try:
-            # Convert note_number string to int
             expected_note_num = int(note_number)
-            # Check if metadata note_number is wrong (0, 0.0, or mismatch)
             if (metadata_note_num is None or
                 metadata_note_num == 0 or
                 metadata_note_num == 0.0 or
@@ -431,7 +405,6 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
             fixed_data["metadata"]["generated_on"] = datetime.now().isoformat()
             logger.info("Auto-fixed missing metadata.generated_on field")
-        # Auto-fix or create structure array
         if "structure" not in fixed_data or not isinstance(fixed_data["structure"], list):
             logger.warning("Structure array missing, creating default structure")
             fixed_data["structure"] = [
@@ -452,7 +425,6 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
                 }
             ]
         else:
-            # Validate and fix structure elements
             if len(fixed_data["structure"]) == 0:
                 logger.warning("Empty structure array, adding default elements")
                 fixed_data["structure"] = [
@@ -465,7 +437,6 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
                     }
                 ]
-            # Ensure each structure element has required fields
             for i, struct_elem in enumerate(fixed_data["structure"]):
                 if not isinstance(struct_elem, dict):
                     continue
@@ -476,7 +447,6 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
                 if "subcategories" not in struct_elem or not isinstance(struct_elem["subcategories"], list):
                     struct_elem["subcategories"] = []
-                # For data rows (not header), ensure totals exist
                 if i > 0 and struct_elem.get("subcategories"):
                     if "total" not in struct_elem:
                         struct_elem["total"] = sum(
@@ -492,7 +462,6 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
                             if isinstance(sub, dict)
                         )
-        # Auto-fix assumptions
         if "assumptions" not in fixed_data:
             fixed_data["assumptions"] = "Classification based on account names and standard accounting practices"
             logger.info("Auto-added default assumptions")
@@ -500,10 +469,8 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
         return fixed_data
     def validate_json_structure(self, json_data: Dict[str, Any], note_number: str) -> Tuple[bool, str]:
-        """Validate that the JSON matches expected structure"""
         required_fields = ["title", "full_title", "structure", "metadata", "assumptions"]
-        # Check required fields
         missing_fields = []
         for field in required_fields:
             if field not in json_data:
@@ -512,7 +479,6 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
         if missing_fields:
             return False, f"Missing required fields: {', '.join(missing_fields)}"
-        # Check metadata structure
         if not isinstance(json_data.get("metadata"), dict):
             return False, "metadata must be an object"
@@ -523,7 +489,6 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
         if str(metadata.get("note_number", "")) != str(note_number):
             return False, f"Note number mismatch: expected {note_number}, got {metadata.get('note_number')}"
-        # Check structure array
         if not isinstance(json_data.get("structure"), list):
             return False, "structure must be an array"
@@ -533,7 +498,6 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
         return True, "Validation passed"
     def _generate_markdown_from_structure(self, json_data: Dict[str, Any]) -> str:
-        """Generate markdown table from structure array"""
         try:
             title = json_data.get("full_title", json_data.get("title", "Financial Note"))
             structure = json_data.get("structure", [])
@@ -541,44 +505,36 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
             if not structure:
                 return f"# {title}\n\n*No data available*"
-            # Start markdown
             md_lines = [f"# {title}\n"]
-            # Get header row (first element)
             header_elem = structure[0] if len(structure) > 0 else None
             if header_elem and header_elem.get("subcategories"):
                 headers = [sub.get("label", "") for sub in header_elem["subcategories"]]
                 md_lines.append("| Particulars | " + " | ".join(headers) + " |")
                 md_lines.append("|" + "---|" * (len(headers) + 1))
-            # Process data rows
             for i in range(1, len(structure)):
                 elem = structure[i]
                 category = elem.get("category", "")
                 subcategories = elem.get("subcategories", [])
-                # Add category header if exists
                 if category:
                     md_lines.append(f"\n**{category}**\n")
-                # Add subcategory rows
                 for sub in subcategories:
                     label = sub.get("label", "")
                     value = sub.get("value", 0.00)
                     previous_value = sub.get("previous_value", 0.00)
                     md_lines.append(f"| {label} | {value:.2f} | {previous_value:.2f} |")
-                # Add total row if exists
                 if "total" in elem:
                     total = elem.get("total", 0.00)
                     previous_total = elem.get("previous_total", 0.00)
                     md_lines.append(f"| **Total {category}** | **{total:.2f}** | **{previous_total:.2f}** |")
-            # Add metadata
             metadata = json_data.get("metadata", {})
             md_lines.append(f"\n\n*Generated on: {metadata.get('generated_on', 'Unknown')}*")
-            # Add assumptions if present
             assumptions = json_data.get("assumptions", "")
             if assumptions:
                 md_lines.append(f"\n\n**Assumptions:** {assumptions}")
@@ -590,41 +546,32 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
             return f"# {json_data.get('full_title', 'Financial Note')}\n\n*Error generating markdown table*"
     def save_generated_note(self, note_data: str, note_number: str, output_dir: str = settings.output_dir) -> bool:
-        """Save the generated note to file with robust validation and auto-fixing"""
         Path(output_dir).mkdir(parents=True, exist_ok=True)
         json_output_path = f"{output_dir}/notes.json"
         raw_output_path = f"{output_dir}/notes_raw.txt"
         formatted_md_path = f"{output_dir}/notes_formatted.md"
         try:
-            # Always save raw response for debugging
             with open(raw_output_path, 'w', encoding='utf-8') as f:
                 f.write(note_data)
-            # Extract and validate JSON
             json_data, json_string = self.extract_json_from_markdown(note_data)
             if json_data:
-                # Auto-fix missing or incorrect fields
                 json_data = self.validate_and_fix_json(json_data, note_number)
-                # Final validation
                 is_valid, validation_msg = self.validate_json_structure(json_data, note_number)
                 if not is_valid:
                     logger.warning(f"JSON validation warning after auto-fix: {validation_msg}")
-                # Convert to lakhs if needed
                 json_data = convert_note_json_to_lakhs(json_data)
-                # Save JSON
                 with open(json_output_path, 'w', encoding='utf-8') as f:
                     json.dump(json_data, f, indent=2, ensure_ascii=False)
                 logger.info(f"JSON saved to {json_output_path}")
-                # Generate and save markdown
                 md_content = json_data.get('markdown_content', '')
                 if not md_content:
-                    # Generate markdown from structure
                     md_content = self._generate_markdown_from_structure(json_data)
                     logger.info("Auto-generated markdown from structure array")
@@ -633,7 +580,6 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
                 return True
             else:
-                # Create fallback JSON with all required fields
                 template = self.note_templates.get(note_number, {})
                 fallback_json = {
                     "title": template.get("title", f"Note {note_number}"),
@@ -672,7 +618,6 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
         except Exception as e:
             logger.error(f"Error saving files: {e}")
-            # Emergency fallback
             try:
                 template = self.note_templates.get(note_number, {})
                 emergency_json = {
@@ -703,42 +648,35 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
             return False
     def generate_note(self, note_number: str, trial_balance_path: str = settings.trial_balance_json) -> bool:
-        """Generate a specific note based on note number"""
         if note_number not in self.note_templates:
             logger.error(f"Note template {note_number} not found")
             return False
         logger.info(f"Starting Note {note_number} generation...")
-        # Load complete trial balance
         trial_balance = self.load_trial_balance(trial_balance_path)
         if not trial_balance:
             return False
-        # Build prompt with full trial balance
         prompt = self.build_llm_prompt(note_number, trial_balance)
         if not prompt:
             logger.error("Failed to build prompt")
             return False
-        # Get LLM response
         response = self.call_openrouter_api(prompt)
         if not response:
             logger.error("Failed to get API response")
             return False
-        # Save the generated note
         success = self.save_generated_note(response, note_number)
         logger.info(f"Note {note_number} {'generated successfully' if success else 'generated with issues'}")
         return success
     def generate_all_notes(self, trial_balance_path: str = settings.trial_balance_json) -> Dict[str, bool]:
-        """Generate all available notes and save them in a single notes.json file."""
         logger.info(f"Starting generation of all {len(self.note_templates)} notes...")
         results = {}
         all_notes = []
-        # Load trial balance once
         trial_balance = self.load_trial_balance(trial_balance_path)
         if not trial_balance:
             logger.error("Failed to load trial balance")
@@ -747,22 +685,18 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
         for note_number in self.note_templates.keys():
             logger.info(f"Processing Note {note_number}")
-            # Build prompt for this note
             prompt = self.build_llm_prompt(note_number, trial_balance)
             if not prompt:
                 results[note_number] = False
                 continue
-            # Get LLM response
             response = self.call_openrouter_api(prompt)
             if not response:
                 results[note_number] = False
                 continue
-            # Parse JSON response
             json_data, _ = self.extract_json_from_markdown(response)
             if json_data:
-                # Auto-fix and validate
                 json_data = self.validate_and_fix_json(json_data, note_number)
                 is_valid, validation_msg = self.validate_json_structure(json_data, note_number)
@@ -773,13 +707,11 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
                     logger.info(f"Note {note_number} processed successfully")
                 else:
                     logger.warning(f"Note {note_number} validation failed even after auto-fix: {validation_msg}")
-                    # Still include it but mark as failed
                     json_data = convert_note_json_to_lakhs(json_data)
                     all_notes.append(json_data)
                     results[note_number] = False
             else:
                 logger.error(f"Note {note_number}: Could not parse JSON from response")
-                # Create fallback note with new structure
                 template = self.note_templates.get(note_number, {})
                 fallback_note = {
                     "title": template.get("title", f"Note {note_number}"),
@@ -811,11 +743,9 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
                 all_notes.append(fallback_note)
                 results[note_number] = False
-            # Brief pause between API calls
             import time
             time.sleep(2)
-        # Save all notes in consolidated file
         output_dir = settings.output_dir
         Path(output_dir).mkdir(parents=True, exist_ok=True)
@@ -841,9 +771,7 @@ GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
         return results
 def main() -> None:
-    """Main function to run the flexible note generator"""
     try:
-        # Initialize generator
         generator = FlexibleFinancialNoteGenerator()
         if not generator.note_templates:
             logger.error("No note templates loaded. Check notes_template.py")
@@ -851,9 +779,7 @@ def main() -> None:
         logger.info(f"Loaded {len(generator.note_templates)} note templates")
-        # Check for command line arguments
         if len(sys.argv) > 1:
-            # Command line mode
             if len(sys.argv) < 3:
                 logger.error("Usage: python llm_notes_generator.py <mode> <note_numbers>")
                 logger.error("  mode: 'specific' or 'all'")
@@ -876,7 +802,6 @@ def main() -> None:
                     if note_number in generator.note_templates:
                         success = generator.generate_note(note_number)
                         if success:
-                            # Load the generated note
                             try:
                                 with open("data/generated_notes/notes.json", "r", encoding="utf-8") as f:
                                     note_data = json.load(f)
@@ -890,7 +815,6 @@ def main() -> None:
                     else:
                         logger.error(f"Note {note_number} not found in templates")
-                # Save consolidated notes
                 if all_notes:
                     output_dir = settings.output_dir
                     Path(output_dir).mkdir(parents=True, exist_ok=True)
@@ -913,9 +837,8 @@ def main() -> None:
                 total = len(results)
                 logger.info(f"{successful}/{total} notes generated successfully")
-                # Print detailed results
                 for note, success in results.items():
-                    status = "✅ SUCCESS" if success else "❌ FAILED"
                     logger.info(f"  Note {note}: {status}")
             else:
@@ -923,7 +846,6 @@ def main() -> None:
                 sys.exit(1)
         else:
-            # Interactive mode
             choice = input("\nGenerate (1) specific note or (2) all notes? Enter 1 or 2: ").strip()
             if choice == "1":
@@ -943,12 +865,11 @@ def main() -> None:
                 total = len(results)
                 logger.info(f"{successful}/{total} notes generated successfully")
-                # Print summary
                 print("\n" + "="*50)
                 print("GENERATION SUMMARY")
                 print("="*50)
                 for note, success in results.items():
-                    status = "✅ SUCCESS" if success else "❌ FAILED"
                     print(f"Note {note}: {status}")
                 print("="*50)

 class FlexibleFinancialNoteGenerator:
 	def __init__(self):
 		pass
 	def generate_note(self, note_number, trial_balance_path=None):
 		return True
 	def generate_all_notes(self, trial_balance_path=None):
 		return {"dummy": True}
 import json
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 from utils.utils import convert_note_json_to_lakhs
 load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 class Settings(BaseSettings):
 	openrouter_api_key: str = os.getenv('OPENROUTER_API_KEY', '')
 	api_url: str = "https://openrouter.ai/api/v1/chat/completions"
 	output_dir: str = "data/generated_notes"
 class NoteTemplate(BaseModel):
     title: str
     full_title: str
 class GeneratedNote(BaseModel):
     note_number: str
     grand_total_lakhs: float
     generated_on: str
     assumptions: Optional[str] = None
 class FlexibleFinancialNoteGenerator:
+    def __init__(self, user_api_key: Optional[str] = None):
+        if user_api_key:
+            self.openrouter_api_key = user_api_key
+            logger.info("Using user-provided API key")
+        else:
+            self.openrouter_api_key = settings.openrouter_api_key
+            if not self.openrouter_api_key:
+                logger.error("OPENROUTER_API_KEY not found in .env file and no user key provided")
+                raise ValueError("OPENROUTER_API_KEY not found in .env file and no user key provided")
+            logger.info("Using API key from .env file")
         self.api_url = settings.api_url
         self.headers = {
             "Authorization": f"Bearer {self.openrouter_api_key}",
             "X-Title": "Financial Note Generator"
         }
         self.note_templates = self.load_note_templates()
         self.recommended_models = [
+            "deepseek/deepseek-r1",
             "mistralai/mixtral-8x7b-instruct"
         ]
     def load_note_templates(self) -> Dict[str, Any]:
         try:
             if __name__ == "__main__":
                 sys.path.append(str(Path(__file__).parent.parent))
             return {}
     def load_trial_balance(self, file_path: str = settings.trial_balance_json) -> Optional[Dict[str, Any]]:
         try:
             if file_path.endswith('.json'):
                 with open(file_path, 'r', encoding='utf-8') as f:
             return None
     def build_llm_prompt(self, note_number: str, trial_balance_data: Dict[str, Any]) -> Optional[str]:
         if note_number not in self.note_templates:
             return None
         template = self.note_templates[note_number]
         all_accounts = trial_balance_data.get("accounts", [])
         context = {
             "note_info": {
                 "number": note_number,
             "financial_year": "2023-24"
         }
         classification_guide = self._get_classification_guide(note_number)
         prompt = f"""You are a senior financial analyst and chartered accountant with expertise in Indian accounting standards and Schedule III of the Companies Act 2013.
+ðŸ"´ CRITICAL INSTRUCTIONS - MUST FOLLOW EXACTLY:
 1. OUTPUT ONLY VALID JSON - NO MARKDOWN, NO EXPLANATIONS, NO TEXT OUTSIDE JSON
 2. START YOUR RESPONSE WITH {{ and END WITH }}
 3. DO NOT USE ```json``` CODE BLOCKS
 4. DO NOT ADD ANY COMMENTARY OR EXPLANATIONS
+ðŸ"´ REQUIRED JSON STRUCTURE - ALL FIELDS MANDATORY:
 {{
   "title": "{template.get('title', '')}",
   "full_title": "{template.get('full_title', '')}",
   "assumptions": "List any assumptions made during classification"
 }}
+ðŸ"´ STRUCTURE ARRAY EXPLAINED:
 - First element: Header row with column labels (March 31, 2024, March 31, 2023)
 - Subsequent elements: Data categories with subcategories
 - Each data category must have:
   * "total": Sum of current year values in subcategories
   * "previous_total": Sum of previous year values in subcategories
+ðŸ"´ YOUR TASK:
 1. Analyze ALL trial balance accounts provided below
 2. Identify accounts that belong to "{template['full_title']}"
 3. Classify into appropriate subcategories per Schedule III
+4. Convert all amounts to lakhs (â‚¹ Ã· 100,000) with 2 decimal places
 5. Calculate accurate totals ensuring mathematical consistency
 6. Structure output in hierarchical "structure" array format
+ðŸ"´ MATHEMATICAL REQUIREMENTS:
 - All amounts MUST be in lakhs (divide original by 100,000)
 - All subtotals MUST equal the grand total exactly
 - Use 0.00 for March 2023 if data missing
 - Ensure "total" = sum of "value" in subcategories
 - Ensure "previous_total" = sum of "previous_value" in subcategories
+ðŸ"´ CLASSIFICATION GUIDANCE FOR NOTE {note_number}:
 {classification_guide}
+ðŸ"´ COMPLETE TRIAL BALANCE DATA:
 {json.dumps(context, indent=2)}
+ðŸ"´ TEMPLATE STRUCTURE TO FOLLOW:
 {json.dumps(template, indent=2)}
+ðŸ"´ VALIDATION RULES:
 - If no accounts match this note category, use empty categories with 0.00 totals
 - Ensure "metadata.note_number" exactly matches {note_number}
 - Document classification logic in "assumptions" field
         return prompt
     def _get_classification_guide(self, note_number: str) -> str:
         guides = {
             "10": """
 **Note 10 - Long Term Loans and Advances:**
             """,
             "13": """
 **Note 13 - Cash and Cash Equivalents:**
+- Include: Cash on hand, balances with banks (current/savings), short-term deposits (â‰¤3 months)
 - Separate: Cash and cash equivalents vs Other bank balances (FDs >3 months)
 - Show: Balances in current accounts, savings accounts, fixed deposits separately
             """,
         return guides.get(note_number, f"**Note {note_number}:** Classify accounts logically based on their nature and the note title.")
     def call_openrouter_api(self, prompt: str) -> Optional[str]:
         for model in self.recommended_models:
             logger.info(f"Trying model: {model}")
             payload = {
                     logger.warning(f"Model {model} not found (404), trying next model")
                 elif e.response.status_code == 402:
                     logger.warning(f"Model {model} requires payment (402), trying next model")
+                elif e.response.status_code == 401:
+                    logger.error(f"Invalid API key (401)")
+                    return None
                 else:
                     logger.error(f"HTTP error with {model}: {e}")
             except Exception as e:
         return None
     def extract_json_from_markdown(self, response_text: str) -> Tuple[Optional[Dict[str, Any]], Optional[str]]:
         response_text = response_text.strip()
         json_objects = []
         brace_count = 0
         start_idx = -1
             elif char == '}':
                 brace_count -= 1
                 if brace_count == 0 and start_idx != -1:
                     potential_json = response_text[start_idx:i+1]
                     try:
                         parsed = json.loads(potential_json)
                         json_objects.append((parsed, potential_json))
                         break
                     except json.JSONDecodeError:
                         continue
             logger.info("Successfully extracted first valid JSON object from response")
             return json_objects[0]
         json_patterns = [
             r'```json\s*(.*?)\s*```',
             r'```\s*(.*?)\s*```',
                 except json.JSONDecodeError:
                     continue
         try:
             json_data = json.loads(response_text)
             return json_data, response_text
         except json.JSONDecodeError:
             try:
                 start = response_text.find('{')
                 end = response_text.rfind('}') + 1
             return None, None
     def validate_and_fix_json(self, json_data: Dict[str, Any], note_number: str) -> Dict[str, Any]:
         fixed_data = json_data.copy()
         template = self.note_templates.get(note_number, {})
         if "title" not in fixed_data or not fixed_data["title"]:
             fixed_data["title"] = template.get("title", f"Note {note_number}")
             logger.info(f"Auto-fixed missing title field")
             fixed_data["full_title"] = template.get("full_title", f"{note_number}. {fixed_data.get('title', 'Financial Note')}")
             logger.info(f"Auto-fixed missing full_title field")
         if "metadata" not in fixed_data or not isinstance(fixed_data["metadata"], dict):
             fixed_data["metadata"] = {}
             logger.info("Auto-created metadata object")
         metadata_note_num = fixed_data["metadata"].get("note_number")
         try:
             expected_note_num = int(note_number)
             if (metadata_note_num is None or
                 metadata_note_num == 0 or
                 metadata_note_num == 0.0 or
             fixed_data["metadata"]["generated_on"] = datetime.now().isoformat()
             logger.info("Auto-fixed missing metadata.generated_on field")
         if "structure" not in fixed_data or not isinstance(fixed_data["structure"], list):
             logger.warning("Structure array missing, creating default structure")
             fixed_data["structure"] = [
                 }
             ]
         else:
             if len(fixed_data["structure"]) == 0:
                 logger.warning("Empty structure array, adding default elements")
                 fixed_data["structure"] = [
                     }
                 ]
             for i, struct_elem in enumerate(fixed_data["structure"]):
                 if not isinstance(struct_elem, dict):
                     continue
                 if "subcategories" not in struct_elem or not isinstance(struct_elem["subcategories"], list):
                     struct_elem["subcategories"] = []
                 if i > 0 and struct_elem.get("subcategories"):
                     if "total" not in struct_elem:
                         struct_elem["total"] = sum(
                             if isinstance(sub, dict)
                         )
         if "assumptions" not in fixed_data:
             fixed_data["assumptions"] = "Classification based on account names and standard accounting practices"
             logger.info("Auto-added default assumptions")
         return fixed_data
     def validate_json_structure(self, json_data: Dict[str, Any], note_number: str) -> Tuple[bool, str]:
         required_fields = ["title", "full_title", "structure", "metadata", "assumptions"]
         missing_fields = []
         for field in required_fields:
             if field not in json_data:
         if missing_fields:
             return False, f"Missing required fields: {', '.join(missing_fields)}"
         if not isinstance(json_data.get("metadata"), dict):
             return False, "metadata must be an object"
         if str(metadata.get("note_number", "")) != str(note_number):
             return False, f"Note number mismatch: expected {note_number}, got {metadata.get('note_number')}"
         if not isinstance(json_data.get("structure"), list):
             return False, "structure must be an array"
         return True, "Validation passed"
     def _generate_markdown_from_structure(self, json_data: Dict[str, Any]) -> str:
         try:
             title = json_data.get("full_title", json_data.get("title", "Financial Note"))
             structure = json_data.get("structure", [])
             if not structure:
                 return f"# {title}\n\n*No data available*"
             md_lines = [f"# {title}\n"]
             header_elem = structure[0] if len(structure) > 0 else None
             if header_elem and header_elem.get("subcategories"):
                 headers = [sub.get("label", "") for sub in header_elem["subcategories"]]
                 md_lines.append("| Particulars | " + " | ".join(headers) + " |")
                 md_lines.append("|" + "---|" * (len(headers) + 1))
             for i in range(1, len(structure)):
                 elem = structure[i]
                 category = elem.get("category", "")
                 subcategories = elem.get("subcategories", [])
                 if category:
                     md_lines.append(f"\n**{category}**\n")
                 for sub in subcategories:
                     label = sub.get("label", "")
                     value = sub.get("value", 0.00)
                     previous_value = sub.get("previous_value", 0.00)
                     md_lines.append(f"| {label} | {value:.2f} | {previous_value:.2f} |")
                 if "total" in elem:
                     total = elem.get("total", 0.00)
                     previous_total = elem.get("previous_total", 0.00)
                     md_lines.append(f"| **Total {category}** | **{total:.2f}** | **{previous_total:.2f}** |")
             metadata = json_data.get("metadata", {})
             md_lines.append(f"\n\n*Generated on: {metadata.get('generated_on', 'Unknown')}*")
             assumptions = json_data.get("assumptions", "")
             if assumptions:
                 md_lines.append(f"\n\n**Assumptions:** {assumptions}")
             return f"# {json_data.get('full_title', 'Financial Note')}\n\n*Error generating markdown table*"
     def save_generated_note(self, note_data: str, note_number: str, output_dir: str = settings.output_dir) -> bool:
         Path(output_dir).mkdir(parents=True, exist_ok=True)
         json_output_path = f"{output_dir}/notes.json"
         raw_output_path = f"{output_dir}/notes_raw.txt"
         formatted_md_path = f"{output_dir}/notes_formatted.md"
         try:
             with open(raw_output_path, 'w', encoding='utf-8') as f:
                 f.write(note_data)
             json_data, json_string = self.extract_json_from_markdown(note_data)
             if json_data:
                 json_data = self.validate_and_fix_json(json_data, note_number)
                 is_valid, validation_msg = self.validate_json_structure(json_data, note_number)
                 if not is_valid:
                     logger.warning(f"JSON validation warning after auto-fix: {validation_msg}")
                 json_data = convert_note_json_to_lakhs(json_data)
                 with open(json_output_path, 'w', encoding='utf-8') as f:
                     json.dump(json_data, f, indent=2, ensure_ascii=False)
                 logger.info(f"JSON saved to {json_output_path}")
                 md_content = json_data.get('markdown_content', '')
                 if not md_content:
                     md_content = self._generate_markdown_from_structure(json_data)
                     logger.info("Auto-generated markdown from structure array")
                 return True
             else:
                 template = self.note_templates.get(note_number, {})
                 fallback_json = {
                     "title": template.get("title", f"Note {note_number}"),
         except Exception as e:
             logger.error(f"Error saving files: {e}")
             try:
                 template = self.note_templates.get(note_number, {})
                 emergency_json = {
             return False
     def generate_note(self, note_number: str, trial_balance_path: str = settings.trial_balance_json) -> bool:
         if note_number not in self.note_templates:
             logger.error(f"Note template {note_number} not found")
             return False
         logger.info(f"Starting Note {note_number} generation...")
         trial_balance = self.load_trial_balance(trial_balance_path)
         if not trial_balance:
             return False
         prompt = self.build_llm_prompt(note_number, trial_balance)
         if not prompt:
             logger.error("Failed to build prompt")
             return False
         response = self.call_openrouter_api(prompt)
         if not response:
             logger.error("Failed to get API response")
             return False
         success = self.save_generated_note(response, note_number)
         logger.info(f"Note {note_number} {'generated successfully' if success else 'generated with issues'}")
         return success
     def generate_all_notes(self, trial_balance_path: str = settings.trial_balance_json) -> Dict[str, bool]:
         logger.info(f"Starting generation of all {len(self.note_templates)} notes...")
         results = {}
         all_notes = []
         trial_balance = self.load_trial_balance(trial_balance_path)
         if not trial_balance:
             logger.error("Failed to load trial balance")
         for note_number in self.note_templates.keys():
             logger.info(f"Processing Note {note_number}")
             prompt = self.build_llm_prompt(note_number, trial_balance)
             if not prompt:
                 results[note_number] = False
                 continue
             response = self.call_openrouter_api(prompt)
             if not response:
                 results[note_number] = False
                 continue
             json_data, _ = self.extract_json_from_markdown(response)
             if json_data:
                 json_data = self.validate_and_fix_json(json_data, note_number)
                 is_valid, validation_msg = self.validate_json_structure(json_data, note_number)
                     logger.info(f"Note {note_number} processed successfully")
                 else:
                     logger.warning(f"Note {note_number} validation failed even after auto-fix: {validation_msg}")
                     json_data = convert_note_json_to_lakhs(json_data)
                     all_notes.append(json_data)
                     results[note_number] = False
             else:
                 logger.error(f"Note {note_number}: Could not parse JSON from response")
                 template = self.note_templates.get(note_number, {})
                 fallback_note = {
                     "title": template.get("title", f"Note {note_number}"),
                 all_notes.append(fallback_note)
                 results[note_number] = False
             import time
             time.sleep(2)
         output_dir = settings.output_dir
         Path(output_dir).mkdir(parents=True, exist_ok=True)
         return results
 def main() -> None:
     try:
         generator = FlexibleFinancialNoteGenerator()
         if not generator.note_templates:
             logger.error("No note templates loaded. Check notes_template.py")
         logger.info(f"Loaded {len(generator.note_templates)} note templates")
         if len(sys.argv) > 1:
             if len(sys.argv) < 3:
                 logger.error("Usage: python llm_notes_generator.py <mode> <note_numbers>")
                 logger.error("  mode: 'specific' or 'all'")
                     if note_number in generator.note_templates:
                         success = generator.generate_note(note_number)
                         if success:
                             try:
                                 with open("data/generated_notes/notes.json", "r", encoding="utf-8") as f:
                                     note_data = json.load(f)
                     else:
                         logger.error(f"Note {note_number} not found in templates")
                 if all_notes:
                     output_dir = settings.output_dir
                     Path(output_dir).mkdir(parents=True, exist_ok=True)
                 total = len(results)
                 logger.info(f"{successful}/{total} notes generated successfully")
                 for note, success in results.items():
+                    status = "âœ… SUCCESS" if success else "âŒ FAILED"
                     logger.info(f"  Note {note}: {status}")
             else:
                 sys.exit(1)
         else:
             choice = input("\nGenerate (1) specific note or (2) all notes? Enter 1 or 2: ").strip()
             if choice == "1":
                 total = len(results)
                 logger.info(f"{successful}/{total} notes generated successfully")
                 print("\n" + "="*50)
                 print("GENERATION SUMMARY")
                 print("="*50)
                 for note, success in results.items():
+                    status = "âœ… SUCCESS" if success else "âŒ FAILED"
                     print(f"Note {note}: {status}")
                 print("="*50)