#!/usr/bin/env python3 """ LLM Notes Data Processor - Extract and analyze trial balance data for LLM notes generation """ import sys import os import json import logging from pathlib import Path # Add parent directory to path for imports sys.path.append(str(Path(__file__).parent.parent)) from notes.data_extraction import extract_trial_balance_data # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def save_trial_balance_data(data, output_path): """Save trial balance data to JSON file""" # Convert TrialBalanceRecord objects to dictionaries records = [] for record in data: if hasattr(record, 'model_dump'): records.append(record.model_dump()) elif hasattr(record, 'dict'): records.append(record.dict()) else: records.append(record) output_data = { "trial_balance": records, "metadata": { "total_records": len(records), "source": "llm_notes_data_processor" } } with open(output_path, 'w', encoding='utf-8') as f: json.dump(output_data, f, indent=2, ensure_ascii=False) logger.info(f"Saved {len(records)} records to {output_path}") def main(): """Main function to process trial balance data for LLM notes generation""" if len(sys.argv) != 2: logger.error("Usage: python llm_notes_data_processor.py ") sys.exit(1) input_file = sys.argv[1] if not os.path.exists(input_file): logger.error(f"Input file not found: {input_file}") sys.exit(1) try: logger.info(f"Processing trial balance data from: {input_file}") # Extract trial balance data structured_data = extract_trial_balance_data(input_file) logger.info("Data extraction completed") # Save the data output_json = "data/output1/parsed_trial_balance.json" os.makedirs("data/output1", exist_ok=True) save_trial_balance_data(structured_data, output_json) logger.info(f"Data analysis completed. Results saved to: {output_json}") logger.info("LLM notes data processing completed successfully") return 0 except Exception as e: logger.error(f"Error processing data: {e}") return 1 if __name__ == "__main__": sys.exit(main())