File size: 2,357 Bytes
7e453aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#!/usr/bin/env python3
"""
LLM Notes Data Processor - Extract and analyze trial balance data for LLM notes generation
"""
import sys
import os
import json
import logging
from pathlib import Path

# Add parent directory to path for imports
sys.path.append(str(Path(__file__).parent.parent))

from notes.data_extraction import extract_trial_balance_data

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def save_trial_balance_data(data, output_path):
    """Save trial balance data to JSON file"""
    # Convert TrialBalanceRecord objects to dictionaries
    records = []
    for record in data:
        if hasattr(record, 'model_dump'):
            records.append(record.model_dump())
        elif hasattr(record, 'dict'):
            records.append(record.dict())
        else:
            records.append(record)

    output_data = {
        "trial_balance": records,
        "metadata": {
            "total_records": len(records),
            "source": "llm_notes_data_processor"
        }
    }

    with open(output_path, 'w', encoding='utf-8') as f:
        json.dump(output_data, f, indent=2, ensure_ascii=False)

    logger.info(f"Saved {len(records)} records to {output_path}")

def main():
    """Main function to process trial balance data for LLM notes generation"""
    if len(sys.argv) != 2:
        logger.error("Usage: python llm_notes_data_processor.py <input_file>")
        sys.exit(1)

    input_file = sys.argv[1]

    if not os.path.exists(input_file):
        logger.error(f"Input file not found: {input_file}")
        sys.exit(1)

    try:
        logger.info(f"Processing trial balance data from: {input_file}")

        # Extract trial balance data
        structured_data = extract_trial_balance_data(input_file)
        logger.info("Data extraction completed")

        # Save the data
        output_json = "data/output1/parsed_trial_balance.json"
        os.makedirs("data/output1", exist_ok=True)
        save_trial_balance_data(structured_data, output_json)
        logger.info(f"Data analysis completed. Results saved to: {output_json}")

        logger.info("LLM notes data processing completed successfully")
        return 0

    except Exception as e:
        logger.error(f"Error processing data: {e}")
        return 1

if __name__ == "__main__":
    sys.exit(main())