finryver-dev / notes /llm_notes_data_processor.py
Sahil Garg
/notes-llm added, system.md file added
7e453aa
#!/usr/bin/env python3
"""
LLM Notes Data Processor - Extract and analyze trial balance data for LLM notes generation
"""
import sys
import os
import json
import logging
from pathlib import Path
# Add parent directory to path for imports
sys.path.append(str(Path(__file__).parent.parent))
from notes.data_extraction import extract_trial_balance_data
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def save_trial_balance_data(data, output_path):
"""Save trial balance data to JSON file"""
# Convert TrialBalanceRecord objects to dictionaries
records = []
for record in data:
if hasattr(record, 'model_dump'):
records.append(record.model_dump())
elif hasattr(record, 'dict'):
records.append(record.dict())
else:
records.append(record)
output_data = {
"trial_balance": records,
"metadata": {
"total_records": len(records),
"source": "llm_notes_data_processor"
}
}
with open(output_path, 'w', encoding='utf-8') as f:
json.dump(output_data, f, indent=2, ensure_ascii=False)
logger.info(f"Saved {len(records)} records to {output_path}")
def main():
"""Main function to process trial balance data for LLM notes generation"""
if len(sys.argv) != 2:
logger.error("Usage: python llm_notes_data_processor.py <input_file>")
sys.exit(1)
input_file = sys.argv[1]
if not os.path.exists(input_file):
logger.error(f"Input file not found: {input_file}")
sys.exit(1)
try:
logger.info(f"Processing trial balance data from: {input_file}")
# Extract trial balance data
structured_data = extract_trial_balance_data(input_file)
logger.info("Data extraction completed")
# Save the data
output_json = "data/output1/parsed_trial_balance.json"
os.makedirs("data/output1", exist_ok=True)
save_trial_balance_data(structured_data, output_json)
logger.info(f"Data analysis completed. Results saved to: {output_json}")
logger.info("LLM notes data processing completed successfully")
return 0
except Exception as e:
logger.error(f"Error processing data: {e}")
return 1
if __name__ == "__main__":
sys.exit(main())