Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| LLM Notes Data Processor - Extract and analyze trial balance data for LLM notes generation | |
| """ | |
| import sys | |
| import os | |
| import json | |
| import logging | |
| from pathlib import Path | |
| # Add parent directory to path for imports | |
| sys.path.append(str(Path(__file__).parent.parent)) | |
| from notes.data_extraction import extract_trial_balance_data | |
| # Configure logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| def save_trial_balance_data(data, output_path): | |
| """Save trial balance data to JSON file""" | |
| # Convert TrialBalanceRecord objects to dictionaries | |
| records = [] | |
| for record in data: | |
| if hasattr(record, 'model_dump'): | |
| records.append(record.model_dump()) | |
| elif hasattr(record, 'dict'): | |
| records.append(record.dict()) | |
| else: | |
| records.append(record) | |
| output_data = { | |
| "trial_balance": records, | |
| "metadata": { | |
| "total_records": len(records), | |
| "source": "llm_notes_data_processor" | |
| } | |
| } | |
| with open(output_path, 'w', encoding='utf-8') as f: | |
| json.dump(output_data, f, indent=2, ensure_ascii=False) | |
| logger.info(f"Saved {len(records)} records to {output_path}") | |
| def main(): | |
| """Main function to process trial balance data for LLM notes generation""" | |
| if len(sys.argv) != 2: | |
| logger.error("Usage: python llm_notes_data_processor.py <input_file>") | |
| sys.exit(1) | |
| input_file = sys.argv[1] | |
| if not os.path.exists(input_file): | |
| logger.error(f"Input file not found: {input_file}") | |
| sys.exit(1) | |
| try: | |
| logger.info(f"Processing trial balance data from: {input_file}") | |
| # Extract trial balance data | |
| structured_data = extract_trial_balance_data(input_file) | |
| logger.info("Data extraction completed") | |
| # Save the data | |
| output_json = "data/output1/parsed_trial_balance.json" | |
| os.makedirs("data/output1", exist_ok=True) | |
| save_trial_balance_data(structured_data, output_json) | |
| logger.info(f"Data analysis completed. Results saved to: {output_json}") | |
| logger.info("LLM notes data processing completed successfully") | |
| return 0 | |
| except Exception as e: | |
| logger.error(f"Error processing data: {e}") | |
| return 1 | |
| if __name__ == "__main__": | |
| sys.exit(main()) | |