finryver-dev / notes /llm_notes_generator.py
dipan004's picture
Update notes/llm_notes_generator.py
b9befe1 verified
import json
import os
import logging
import requests
from datetime import datetime
from pathlib import Path
from dotenv import load_dotenv
import re
import sys
from typing import Dict, List, Any, Optional, Tuple
import pandas as pd
from pydantic import BaseModel, Field, ValidationError
from pydantic_settings import BaseSettings
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from utils.utils import convert_note_json_to_lakhs
load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class Settings(BaseSettings):
api_url: str = "https://openrouter.ai/api/v1/chat/completions"
output_dir: str = "data/generated_notes"
trial_balance_json: str = "data/output1/parsed_trial_balance.json"
model_config = {
"extra": "ignore"
}
settings = Settings()
class Account(BaseModel):
account_name: str
amount: float
group: Optional[str] = None
class NoteTemplate(BaseModel):
title: str
full_title: str
class GeneratedNote(BaseModel):
note_number: str
markdown_content: str
grand_total_lakhs: float
generated_on: str
assumptions: Optional[str] = None
class FlexibleFinancialNoteGenerator:
def __init__(self, user_api_key: Optional[str] = None):
if not user_api_key:
logger.error("OpenRouter API key is required. Please provide OPENROUTER_API_KEY.")
raise ValueError("OpenRouter API key is required. Please provide OPENROUTER_API_KEY.")
self.openrouter_api_key = user_api_key
logger.info("Using provided OpenRouter API key")
self.api_url = settings.api_url
self.headers = {
"Authorization": f"Bearer {self.openrouter_api_key}",
"Content-Type": "application/json",
"HTTP-Referer": "https://localhost:3000",
"X-Title": "Financial Note Generator"
}
self.note_templates = self.load_note_templates()
self.recommended_models = [
"mistralai/mixtral-8x7b-instruct"
]
def load_note_templates(self) -> Dict[str, Any]:
try:
if __name__ == "__main__":
sys.path.append(str(Path(__file__).parent.parent))
from notes_template import note_templates
return note_templates
except ImportError as e:
logger.error(f"Error importing note_templates from notes_template: {e}")
return {}
except Exception as e:
logger.error(f"Unexpected error loading note_templates: {e}")
return {}
def load_trial_balance(self, file_path: str = settings.trial_balance_json) -> Optional[Dict[str, Any]]:
try:
if file_path.endswith('.json'):
with open(file_path, 'r', encoding='utf-8') as f:
data = json.load(f)
if isinstance(data, list):
accounts = data
elif isinstance(data, dict):
accounts = data.get('accounts', [])
else:
logger.error(f"Unexpected trial balance format: {type(data)}")
return None
logger.info(f"Loaded trial balance with {len(accounts)} accounts")
return {"accounts": accounts}
elif file_path.endswith('.xlsx'):
from notes.data_extraction import extract_trial_balance_data
accounts = extract_trial_balance_data(file_path)
logger.info(f"Extracted trial balance with {len(accounts)} accounts from Excel")
return {"accounts": accounts}
else:
logger.error(f"Unsupported file type: {file_path}")
return None
except FileNotFoundError:
logger.error(f"Trial balance file not found: {file_path}")
return None
except Exception as e:
logger.error(f"Error loading trial balance: {e}")
return None
def build_llm_prompt(self, note_number: str, trial_balance_data: Dict[str, Any]) -> Optional[str]:
if note_number not in self.note_templates:
return None
template = self.note_templates[note_number]
all_accounts = trial_balance_data.get("accounts", [])
context = {
"note_info": {
"number": note_number,
"title": template.get("title", ""),
"full_title": template.get("full_title", "")
},
"trial_balance": {
"total_accounts": len(all_accounts),
"accounts": all_accounts
},
"current_date": datetime.now().strftime("%Y-%m-%d"),
"financial_year": "2023-24"
}
classification_guide = self._get_classification_guide(note_number)
prompt = f"""You are a senior financial analyst and chartered accountant with expertise in Indian accounting standards and Schedule III of the Companies Act 2013.
ðŸ"´ CRITICAL INSTRUCTIONS - MUST FOLLOW EXACTLY:
1. OUTPUT ONLY VALID JSON - NO MARKDOWN, NO EXPLANATIONS, NO TEXT OUTSIDE JSON
2. START YOUR RESPONSE WITH {{ and END WITH }}
3. DO NOT USE ```json``` CODE BLOCKS
4. DO NOT ADD ANY COMMENTARY OR EXPLANATIONS
ðŸ"´ REQUIRED JSON STRUCTURE - ALL FIELDS MANDATORY:
{{
"title": "{template.get('title', '')}",
"full_title": "{template.get('full_title', '')}",
"structure": [
{{
"category": "In Lakhs",
"subcategories": [
{{ "label": "March 31, 2024", "value": 0.00 }},
{{ "label": "March 31, 2023", "value": 0.00 }}
]
}},
{{
"category": "Category Name",
"subcategories": [
{{ "label": "Subcategory Item", "value": 0.00, "previous_value": 0.00 }}
],
"total": 0.00,
"previous_total": 0.00
}}
],
"metadata": {{
"note_number": {note_number},
"generated_on": "{datetime.now().isoformat()}"
}},
"assumptions": "List any assumptions made during classification"
}}
ðŸ"´ STRUCTURE ARRAY EXPLAINED:
- First element: Header row with column labels (March 31, 2024, March 31, 2023)
- Subsequent elements: Data categories with subcategories
- Each data category must have:
* "category": Main category name
* "subcategories": Array of line items with "label", "value", "previous_value"
* "total": Sum of current year values in subcategories
* "previous_total": Sum of previous year values in subcategories
ðŸ"´ YOUR TASK:
1. Analyze ALL trial balance accounts provided below
2. Identify accounts that belong to "{template['full_title']}"
3. Classify into appropriate subcategories per Schedule III
4. Convert all amounts to lakhs (₹ ÷ 100,000) with 2 decimal places
5. Calculate accurate totals ensuring mathematical consistency
6. Structure output in hierarchical "structure" array format
ðŸ"´ MATHEMATICAL REQUIREMENTS:
- All amounts MUST be in lakhs (divide original by 100,000)
- All subtotals MUST equal the grand total exactly
- Use 0.00 for March 2023 if data missing
- Round to 2 decimal places consistently
- Ensure "total" = sum of "value" in subcategories
- Ensure "previous_total" = sum of "previous_value" in subcategories
ðŸ"´ CLASSIFICATION GUIDANCE FOR NOTE {note_number}:
{classification_guide}
ðŸ"´ COMPLETE TRIAL BALANCE DATA:
{json.dumps(context, indent=2)}
ðŸ"´ TEMPLATE STRUCTURE TO FOLLOW:
{json.dumps(template, indent=2)}
ðŸ"´ VALIDATION RULES:
- If no accounts match this note category, use empty categories with 0.00 totals
- Ensure "metadata.note_number" exactly matches {note_number}
- Document classification logic in "assumptions" field
- Structure must have at least 2 elements (header + data)
GENERATE VALID JSON NOW (NO ADDITIONAL TEXT):"""
return prompt
def _get_classification_guide(self, note_number: str) -> str:
guides = {
"10": """
**Note 10 - Long Term Loans and Advances:**
- Include: Security deposits, long-term advances to suppliers/employees, deposits with utilities
- Categories: Unsecured considered good, Unsecured considered doubtful, Doubtful (provision)
- Exclude: Short-term advances, trade receivables, prepaid expenses under 1 year
""",
"11": """
**Note 11 - Inventories:**
- Include: Raw materials, work-in-progress, finished goods, stores and spares, consumables
- Value at lower of cost or net realizable value
- Exclude: Advances for inventory purchases (classify as advances)
""",
"12": """
**Note 12 - Trade Receivables:**
- Include: Amounts due from customers for goods/services, bills receivable
- Categories: Unsecured considered good, Unsecured considered doubtful, Provision for doubtful debts
- Exclude: Advances, deposits, other receivables
""",
"13": """
**Note 13 - Cash and Cash Equivalents:**
- Include: Cash on hand, balances with banks (current/savings), short-term deposits (≤3 months)
- Separate: Cash and cash equivalents vs Other bank balances (FDs >3 months)
- Show: Balances in current accounts, savings accounts, fixed deposits separately
""",
"14": """
**Note 14 - Short Term Loans and Advances:**
- Include: Prepaid expenses, advances to suppliers, employee advances, advance tax, TDS receivable
- Categories:
* Unsecured, considered good: Prepaid expenses, Other advances
* Other loans and advances: Advance tax, Balances with statutory/govt authorities
- Exclude: Long-term advances, trade receivables
""",
"15": """
**Note 15 - Other Current Assets:**
- Include: Interest accrued, export incentives receivable, insurance claims, other miscellaneous current assets
- Exclude: Items that fit into specific categories like trade receivables, advances, cash
"""
}
return guides.get(note_number, f"**Note {note_number}:** Classify accounts logically based on their nature and the note title.")
def call_openrouter_api(self, prompt: str) -> Optional[str]:
for model in self.recommended_models:
logger.info(f"Trying model: {model}")
payload = {
"model": model,
"messages": [
{
"role": "system",
"content": "You are an expert chartered accountant specializing in Indian accounting standards. You MUST respond with ONLY valid JSON, never with markdown code blocks or explanations. Start with { and end with }."
},
{"role": "user", "content": prompt}
],
"max_tokens": 12000,
"temperature": 0.1,
"top_p": 0.9
}
try:
response = requests.post(
self.api_url,
headers=self.headers,
json=payload,
timeout=180
)
response.raise_for_status()
result = response.json()
content = result['choices'][0]['message']['content']
logger.info(f"Successful response from {model}")
return content
except requests.exceptions.Timeout:
logger.warning(f"Request to {model} timed out after 180s")
continue
except requests.exceptions.HTTPError as e:
if e.response.status_code == 404:
logger.warning(f"Model {model} not found (404), trying next model")
elif e.response.status_code == 402:
logger.warning(f"Model {model} requires payment (402), trying next model")
elif e.response.status_code == 401:
logger.error(f"Invalid API key (401)")
return None
else:
logger.error(f"HTTP error with {model}: {e}")
except Exception as e:
logger.error(f"Failed with {model}: {e}")
continue
logger.error("All models failed")
return None
def extract_json_from_markdown(self, response_text: str) -> Tuple[Optional[Dict[str, Any]], Optional[str]]:
response_text = response_text.strip()
json_objects = []
brace_count = 0
start_idx = -1
for i, char in enumerate(response_text):
if char == '{':
if brace_count == 0:
start_idx = i
brace_count += 1
elif char == '}':
brace_count -= 1
if brace_count == 0 and start_idx != -1:
potential_json = response_text[start_idx:i+1]
try:
parsed = json.loads(potential_json)
json_objects.append((parsed, potential_json))
break
except json.JSONDecodeError:
continue
if json_objects:
logger.info("Successfully extracted first valid JSON object from response")
return json_objects[0]
json_patterns = [
r'```json\s*(.*?)\s*```',
r'```\s*(.*?)\s*```',
r'(\{.*?\})'
]
for pattern in json_patterns:
match = re.search(pattern, response_text, re.DOTALL)
if match:
try:
json_content = match.group(1).strip()
json_data = json.loads(json_content)
return json_data, json_content
except json.JSONDecodeError:
continue
try:
json_data = json.loads(response_text)
return json_data, response_text
except json.JSONDecodeError:
try:
start = response_text.find('{')
end = response_text.rfind('}') + 1
if start != -1 and end > start:
json_part = response_text[start:end]
json_data = json.loads(json_part)
return json_data, json_part
except json.JSONDecodeError:
pass
return None, None
def safe_numeric_value(self, value, default=0.0):
"""Convert any value to a safe numeric float, defaulting to 0.0 if conversion fails."""
if value is None or value == '' or value == '-':
return default
try:
# If already a number, return it as float
if isinstance(value, (int, float)):
return float(value)
# Handle string numbers
if isinstance(value, str):
# Remove common non-numeric characters
cleaned = value.replace(',', '').replace('₹', '').replace('Rs', '').replace('Rs.', '').strip()
# Handle parentheses for negative numbers (accounting format)
if cleaned.startswith('(') and cleaned.endswith(')'):
cleaned = '-' + cleaned[1:-1]
# Handle negative numbers
is_negative = cleaned.startswith('-')
if is_negative:
cleaned = cleaned[1:]
# Remove any remaining non-numeric characters except decimal point
cleaned = ''.join(c for c in cleaned if c.isdigit() or c == '.')
if not cleaned or cleaned == '.':
return default
# Convert to number
result = float(cleaned)
return -result if is_negative else result
# Try direct conversion as last resort
return float(value)
except (ValueError, TypeError, AttributeError):
logger.warning(f"Could not convert value '{value}' to numeric, using default {default}")
return default
def validate_and_fix_json(self, json_data: Dict[str, Any], note_number: str) -> Dict[str, Any]:
fixed_data = json_data.copy()
template = self.note_templates.get(note_number, {})
if "title" not in fixed_data or not fixed_data["title"]:
fixed_data["title"] = template.get("title", f"Note {note_number}")
logger.info(f"Auto-fixed missing title field")
if "full_title" not in fixed_data or not fixed_data["full_title"]:
fixed_data["full_title"] = template.get("full_title", f"{note_number}. {fixed_data.get('title', 'Financial Note')}")
logger.info(f"Auto-fixed missing full_title field")
if "metadata" not in fixed_data or not isinstance(fixed_data["metadata"], dict):
fixed_data["metadata"] = {}
logger.info("Auto-created metadata object")
metadata_note_num = fixed_data["metadata"].get("note_number")
try:
expected_note_num = int(note_number);
if (metadata_note_num is None or
metadata_note_num == 0 or
metadata_note_num == 0.0 or
int(metadata_note_num) != expected_note_num):
fixed_data["metadata"]["note_number"] = expected_note_num
logger.info(f"Auto-corrected metadata.note_number from {metadata_note_num} to {expected_note_num}")
except ValueError:
fixed_data["metadata"]["note_number"] = note_number
logger.info(f"Auto-set metadata.note_number to string: {note_number}")
if "generated_on" not in fixed_data["metadata"]:
fixed_data["metadata"]["generated_on"] = datetime.now().isoformat()
logger.info("Auto-fixed missing metadata.generated_on field")
if "structure" not in fixed_data or not isinstance(fixed_data["structure"], list):
logger.warning("Structure array missing, creating default structure")
fixed_data["structure"] = [
{
"category": "In Lakhs",
"subcategories": [
{"label": "March 31, 2024", "value": 0.00},
{"label": "March 31, 2023", "value": 0.00}
]
},
{
"category": "No data available",
"subcategories": [
{"label": "Items", "value": 0.00, "previous_value": 0.00}
],
"total": 0.00,
"previous_total": 0.00
}
]
else:
if len(fixed_data["structure"]) == 0:
logger.warning("Empty structure array, adding default elements")
fixed_data["structure"] = [
{
"category": "In Lakhs",
"subcategories": [
{"label": "March 31, 2024", "value": 0.00},
{"label": "March 31, 2023", "value": 0.00}
]
}
]
for i, struct_elem in enumerate(fixed_data["structure"]):
if not isinstance(struct_elem, dict):
continue
if "category" not in struct_elem:
struct_elem["category"] = f"Category {i}"
if "subcategories" not in struct_elem or not isinstance(struct_elem["subcategories"], list):
struct_elem["subcategories"] = []
# FIX: Ensure all subcategory values are numeric before summing
for sub in struct_elem.get("subcategories", []):
if isinstance(sub, dict):
if "value" in sub:
sub["value"] = self.safe_numeric_value(sub["value"])
if "previous_value" in sub:
sub["previous_value"] = self.safe_numeric_value(sub["previous_value"])
# Now safely calculate totals
if i > 0 and struct_elem.get("subcategories"):
if "total" not in struct_elem:
struct_elem["total"] = sum(
self.safe_numeric_value(sub.get("value", 0.0))
for sub in struct_elem["subcategories"]
if isinstance(sub, dict)
)
if "previous_total" not in struct_elem:
struct_elem["previous_total"] = sum(
self.safe_numeric_value(sub.get("previous_value", 0.0))
for sub in struct_elem["subcategories"]
if isinstance(sub, dict)
)
if "assumptions" not in fixed_data:
fixed_data["assumptions"] = "Classification based on account names and standard accounting practices"
logger.info("Auto-added default assumptions")
return fixed_data
def validate_json_structure(self, json_data: Dict[str, Any], note_number: str) -> Tuple[bool, str]:
required_fields = ["title", "full_title", "structure", "metadata", "assumptions"]
missing_fields = []
for field in required_fields:
if field not in json_data:
missing_fields.append(field)
if missing_fields:
return False, f"Missing required fields: {', '.join(missing_fields)}"
if not isinstance(json_data.get("metadata"), dict):
return False, "metadata must be an object"
metadata = json_data["metadata"]
if "note_number" not in metadata:
return False, "metadata.note_number is required"
if str(metadata.get("note_number", "")) != str(note_number):
return False, f"Note number mismatch: expected {note_number}, got {metadata.get('note_number')}"
if not isinstance(json_data.get("structure"), list):
return False, "structure must be an array"
if len(json_data["structure"]) == 0:
return False, "structure array cannot be empty"
return True, "Validation passed"
def _generate_markdown_from_structure(self, json_data: Dict[str, Any]) -> str:
try:
title = json_data.get("full_title", json_data.get("title", "Financial Note"))
structure = json_data.get("structure", [])
if not structure:
return f"# {title}\n\n*No data available*"
md_lines = [f"# {title}\n"]
header_elem = structure[0] if len(structure) > 0 else None
if header_elem and header_elem.get("subcategories"):
headers = [sub.get("label", "") for sub in header_elem["subcategories"]]
md_lines.append("| Particulars | " + " | ".join(headers) + " |")
md_lines.append("|" + "---|" * (len(headers) + 1))
for i in range(1, len(structure)):
elem = structure[i]
category = elem.get("category", "")
subcategories = elem.get("subcategories", [])
if category:
md_lines.append(f"\n**{category}**\n")
for sub in subcategories:
label = sub.get("label", "")
value = sub.get("value", 0.00)
previous_value = sub.get("previous_value", 0.00)
md_lines.append(f"| {label} | {value:.2f} | {previous_value:.2f} |")
if "total" in elem:
total = elem.get("total", 0.00)
previous_total = elem.get("previous_total", 0.00)
md_lines.append(f"| **Total {category}** | **{total:.2f}** | **{previous_total:.2f}** |")
metadata = json_data.get("metadata", {})
md_lines.append(f"\n\n*Generated on: {metadata.get('generated_on', 'Unknown')}*")
assumptions = json_data.get("assumptions", "")
if assumptions:
md_lines.append(f"\n\n**Assumptions:** {assumptions}")
return "\n".join(md_lines)
except Exception as e:
logger.error(f"Error generating markdown from structure: {e}")
return f"# {json_data.get('full_title', 'Financial Note')}\n\n*Error generating markdown table*"
def save_generated_note(self, note_data: str, note_number: str, output_dir: str = settings.output_dir) -> bool:
Path(output_dir).mkdir(parents=True, exist_ok=True)
json_output_path = f"{output_dir}/notes.json"
raw_output_path = f"{output_dir}/notes_raw.txt"
formatted_md_path = f"{output_dir}/notes_formatted.md"
try:
with open(raw_output_path, 'w', encoding='utf-8') as f:
f.write(note_data)
json_data, json_string = self.extract_json_from_markdown(note_data)
if json_data:
json_data = self.validate_and_fix_json(json_data, note_number)
is_valid, validation_msg = self.validate_json_structure(json_data, note_number)
if not is_valid:
logger.warning(f"JSON validation warning after auto-fix: {validation_msg}")
json_data = convert_note_json_to_lakhs(json_data)
with open(json_output_path, 'w', encoding='utf-8') as f:
json.dump(json_data, f, indent=2, ensure_ascii=False)
logger.info(f"JSON saved to {json_output_path}")
md_content = json_data.get('markdown_content', '')
if not md_content:
md_content = self._generate_markdown_from_structure(json_data)
logger.info("Auto-generated markdown from structure array")
with open(formatted_md_path, 'w', encoding='utf-8') as f:
f.write(md_content)
return True
else:
template = self.note_templates.get(note_number, {})
fallback_json = {
"title": template.get("title", f"Note {note_number}"),
"full_title": template.get("full_title", f"{note_number}. Financial Note"),
"structure": [
{
"category": "In Lakhs",
"subcategories": [
{"label": "March 31, 2024", "value": 0.00},
{"label": "March 31, 2023", "value": 0.00}
]
},
{
"category": "Error - No data",
"subcategories": [
{"label": "Could not parse response", "value": 0.00, "previous_value": 0.00}
],
"total": 0.00,
"previous_total": 0.00
}
],
"metadata": {
"note_number": int(note_number) if note_number.isdigit() else note_number,
"generated_on": datetime.now().isoformat()
},
"assumptions": "Failed to parse LLM response",
"raw_response": note_data,
"error": "Could not parse JSON from response"
}
with open(json_output_path, 'w', encoding='utf-8') as f:
json.dump(fallback_json, f, indent=2, ensure_ascii=False)
logger.warning(f"Fallback JSON with required fields saved to {json_output_path}")
return False
except Exception as e:
logger.error(f"Error saving files: {e}")
try:
template = self.note_templates.get(note_number, {})
emergency_json = {
"title": template.get("title", f"Note {note_number}"),
"full_title": template.get("full_title", f"{note_number}. Financial Note"),
"structure": [
{
"category": "In Lakhs",
"subcategories": [
{"label": "March 31, 2024", "value": 0.00},
{"label": "March 31, 2023", "value": 0.00}
]
}
],
"metadata": {
"note_number": int(note_number) if note_number.isdigit() else note_number,
"generated_on": datetime.now().isoformat()
},
"assumptions": "Emergency fallback due to processing error",
"error": str(e)
}
with open(json_output_path, 'w', encoding='utf-8') as f:
json.dump(emergency_json, f, indent=2, ensure_ascii=False)
logger.info(f"Emergency fallback JSON saved to {json_output_path}")
except Exception as emergency_error:
logger.error(f"Emergency fallback also failed: {emergency_error}")
return False
def generate_note(self, note_number: str, trial_balance_path: str = settings.trial_balance_json) -> bool:
if note_number not in self.note_templates:
logger.error(f"Note template {note_number} not found")
return False
logger.info(f"Starting Note {note_number} generation...")
trial_balance = self.load_trial_balance(trial_balance_path)
if not trial_balance:
return False
prompt = self.build_llm_prompt(note_number, trial_balance)
if not prompt:
logger.error("Failed to build prompt")
return False
response = self.call_openrouter_api(prompt)
if not response:
logger.error("Failed to get API response")
return False
success = self.save_generated_note(response, note_number)
logger.info(f"Note {note_number} {'generated successfully' if success else 'generated with issues'}")
return success
def generate_all_notes(self, trial_balance_path: str = settings.trial_balance_json) -> Dict[str, bool]:
logger.info(f"Starting generation of all {len(self.note_templates)} notes...")
results = {}
all_notes = []
trial_balance = self.load_trial_balance(trial_balance_path)
if not trial_balance:
logger.error("Failed to load trial balance")
return {note: False for note in self.note_templates.keys()}
for note_number in self.note_templates.keys():
logger.info(f"Processing Note {note_number}")
prompt = self.build_llm_prompt(note_number, trial_balance)
if not prompt:
results[note_number] = False
continue
response = self.call_openrouter_api(prompt)
if not response:
results[note_number] = False
continue
json_data, _ = self.extract_json_from_markdown(response)
if json_data:
json_data = self.validate_and_fix_json(json_data, note_number)
is_valid, validation_msg = self.validate_json_structure(json_data, note_number)
if is_valid:
json_data = convert_note_json_to_lakhs(json_data)
all_notes.append(json_data)
results[note_number] = True
logger.info(f"Note {note_number} processed successfully")
else:
logger.warning(f"Note {note_number} validation failed even after auto-fix: {validation_msg}")
json_data = convert_note_json_to_lakhs(json_data)
all_notes.append(json_data)
results[note_number] = False
else:
logger.error(f"Note {note_number}: Could not parse JSON from response")
template = self.note_templates.get(note_number, {})
fallback_note = {
"title": template.get("title", f"Note {note_number}"),
"full_title": template.get("full_title", f"{note_number}. Financial Note"),
"structure": [
{
"category": "In Lakhs",
"subcategories": [
{"label": "March 31, 2024", "value": 0.00},
{"label": "March 31, 2023", "value": 0.00}
]
},
{
"category": "Error",
"subcategories": [
{"label": "Failed to generate from LLM response", "value": 0.00, "previous_value": 0.00}
],
"total": 0.00,
"previous_total": 0.00
}
],
"metadata": {
"note_number": int(note_number) if note_number.isdigit() else note_number,
"generated_on": datetime.now().isoformat()
},
"assumptions": "LLM response parsing failed",
"error": "JSON parsing failed"
}
all_notes.append(fallback_note)
results[note_number] = False
import time
time.sleep(2)
output_dir = settings.output_dir
Path(output_dir).mkdir(parents=True, exist_ok=True)
consolidated_output = {
"notes": all_notes,
"generation_summary": {
"total_notes": len(self.note_templates),
"successful_notes": sum(1 for success in results.values() if success),
"failed_notes": sum(1 for success in results.values() if not success),
"generated_on": datetime.now().isoformat(),
"results": results
}
}
with open(f"{output_dir}/notes.json", "w", encoding="utf-8") as f:
json.dump(consolidated_output, f, indent=2, ensure_ascii=False)
successful = sum(1 for success in results.values() if success)
total = len(results)
logger.info(f"GENERATION SUMMARY: {successful}/{total} notes generated successfully")
logger.info(f"All notes saved to {output_dir}/notes.json")
return results
def main() -> None:
try:
# Get API key from environment
user_api_key = os.environ.get("OPENROUTER_API_KEY")
if not user_api_key:
logger.error("OpenRouter API key is required. Please provide OPENROUTER_API_KEY.")
sys.exit(1)
generator = FlexibleFinancialNoteGenerator(user_api_key=user_api_key)
if not generator.note_templates:
logger.error("No note templates loaded. Check notes_template.py")
return
logger.info(f"Loaded {len(generator.note_templates)} note templates")
if len(sys.argv) > 1:
if len(sys.argv) < 3:
logger.error("Usage: python llm_notes_generator.py <mode> <note_numbers>")
logger.error(" mode: 'specific' or 'all'")
logger.error(" note_numbers: comma-separated note numbers (for specific mode)")
sys.exit(1)
mode = sys.argv[1].lower()
note_numbers = sys.argv[2] if len(sys.argv) > 2 else ""
if mode == "specific":
if not note_numbers:
logger.error("Note numbers required for specific mode")
sys.exit(1)
note_list = [n.strip() for n in note_numbers.split(",")]
all_notes = []
successful_notes = []
for note_number in note_list:
if note_number in generator.note_templates:
success = generator.generate_note(note_number)
if success:
try:
with open("data/generated_notes/notes.json", "r", encoding="utf-8") as f:
note_data = json.load(f)
all_notes.append(note_data)
successful_notes.append(note_number)
logger.info(f"Note {note_number} generated successfully")
except Exception as e:
logger.error(f"Failed to load generated note {note_number}: {e}")
else:
logger.error(f"Failed to generate note {note_number}")
else:
logger.error(f"Note {note_number} not found in templates")
if all_notes:
output_dir = settings.output_dir
Path(output_dir).mkdir(parents=True, exist_ok=True)
consolidated = {
"notes": all_notes,
"generation_summary": {
"requested_notes": note_list,
"successful_notes": successful_notes,
"total_successful": len(successful_notes),
"generated_on": datetime.now().isoformat()
}
}
with open(f"{output_dir}/notes.json", "w", encoding="utf-8") as f:
json.dump(consolidated, f, indent=2, ensure_ascii=False)
logger.info(f"Consolidated notes saved to {output_dir}/notes.json")
elif mode == "all":
results = generator.generate_all_notes()
successful = sum(1 for success in results.values() if success)
total = len(results)
logger.info(f"{successful}/{total} notes generated successfully")
for note, success in results.items():
status = "✅ SUCCESS" if success else "⌠FAILED"
logger.info(f" Note {note}: {status}")
else:
logger.error("Invalid mode. Use 'specific' or 'all'")
sys.exit(1)
else:
choice = input("\nGenerate (1) specific note or (2) all notes? Enter 1 or 2: ").strip()
if choice == "1":
available_notes = list(generator.note_templates.keys())
print(f"Available notes: {', '.join(available_notes)}")
note_number = input("Enter note number: ").strip()
if note_number in available_notes:
success = generator.generate_note(note_number)
logger.info(f"Note {note_number} {'generated successfully' if success else 'generated with issues'}")
else:
logger.error(f"Note {note_number} not found")
elif choice == "2":
results = generator.generate_all_notes()
successful = sum(1 for success in results.values() if success)
total = len(results)
logger.info(f"{successful}/{total} notes generated successfully")
print("\n" + "="*50)
print("GENERATION SUMMARY")
print("="*50)
for note, success in results.items():
status = "✅ SUCCESS" if success else "⌠FAILED"
print(f"Note {note}: {status}")
print("="*50)
else:
logger.error("Invalid choice. Enter 1 or 2.")
except KeyboardInterrupt:
logger.info("Generation interrupted by user")
sys.exit(0)
except Exception as e:
logger.error(f"Error: {e}", exc_info=True)
sys.exit(1)
if __name__ == "__main__":
main()