Spaces:
Sleeping
Sleeping
File size: 1,804 Bytes
f39814a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
import os
import json
import logging
import pandas as pd
from typing import Any
from pydantic import BaseModel, ValidationError
from pydantic_settings import BaseSettings
from utils.utils import clean_value
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class Settings(BaseSettings):
"""Application settings loaded from environment variables or .env file."""
trial_balance_json: str = "data/output1/parsed_trial_balance.json"
settings = Settings()
class TrialBalanceRecord(BaseModel):
account_name: str
amount: float
group: str
def load_trial_balance() -> pd.DataFrame:
"""
Load trial balance data from a JSON file, validate with Pydantic, and return as a cleaned DataFrame.
Raises FileNotFoundError if the file does not exist.
"""
json_file = settings.trial_balance_json
if not os.path.exists(json_file):
logger.error(f"{json_file} not found! Please run the data extraction step first.")
raise FileNotFoundError(f"{json_file} not found! Please run the data extraction step first.")
with open(json_file, "r", encoding="utf-8") as f:
parsed_data = json.load(f)
# Determine the structure and load into DataFrame
if isinstance(parsed_data, list):
records = parsed_data
else:
records = parsed_data.get("trial_balance", parsed_data)
validated_records = []
for record in records:
try:
validated = TrialBalanceRecord(**record)
validated_dict = validated.dict()
except ValidationError as ve:
logger.warning(f"Validation error for record: {ve}")
validated_dict = record # fallback to raw dict
validated_records.append(validated_dict)
tb_df = pd.DataFrame(validated_records)
tb_df['amount'] = tb_df['amount'].apply(clean_value)
logger.info(f"Loaded trial balance with {len(tb_df)} records.")
return tb_df
|