from fastapi import FastAPI, APIRouter, UploadFile, File, Form, HTTPException from fastapi.responses import JSONResponse, PlainTextResponse, FileResponse from typing import Optional, Dict, Any import pandas as pd import os import shutil import json import subprocess import logging # Import utilities and logic from modular files from utils.utils import clean_value from notes.data_extraction import extract_trial_balance_data, analyze_and_save_results from notes.llm_notes_generator import FlexibleFinancialNoteGenerator from notes.notes_generator import process_json from notes.json_to_excel import json_to_xlsx from utils.utils_normalize import normalize_llm_note_json, normalize_llm_notes_json # Configure logging for the application logging.basicConfig(level=logging.INFO) logger = logging.getLogger("financial_notes_api") app = FastAPI( title="Financial Notes Generator API", description="API for generating financial notes, balance sheets, cash flow statements, and P&L reports.", version="1.0.0" ) @app.on_event("startup") async def startup_event(): logger.info("Financial Notes Generator API has started.") @app.on_event("shutdown") async def shutdown_event(): logger.info("Financial Notes Generator API is shutting down.") router = APIRouter() def process_uploaded_file(file: UploadFile) -> pd.DataFrame: os.makedirs("data/input", exist_ok=True) file_location = f"data/input/{file.filename}" with open(file_location, "wb") as buffer: shutil.copyfileobj(file.file, buffer) structured_data = extract_trial_balance_data(file_location) output_file = "data/output1/parsed_trial_balance.json" analyze_and_save_results(structured_data, output_file) with open(output_file, "r", encoding="utf-8") as f: parsed_data = json.load(f) tb_df = pd.DataFrame(parsed_data if isinstance(parsed_data, list) else parsed_data.get("trial_balance", parsed_data)) tb_df['amount'] = tb_df['amount'].apply(clean_value) return tb_df @router.post("/new") async def llm_generate_and_excel( file: UploadFile = File(...), note_number: Optional[str] = Form(None) ): os.makedirs("data/input", exist_ok=True) file_location = f"data/input/{file.filename}" with open(file_location, "wb") as buffer: shutil.copyfileobj(file.file, buffer) structured_data = extract_trial_balance_data(file_location) output_json = "data/output1/parsed_trial_balance.json" analyze_and_save_results(structured_data, output_json) try: generator = FlexibleFinancialNoteGenerator() except Exception as e: logger.error(f"Generator init failed: {e}") raise HTTPException(status_code=500, detail=f"Generator init failed: {e}") os.makedirs("data/generated_notes_excel", exist_ok=True) wrapped_json_path = "data/generated_notes/notes_wrapped.json" if note_number: note_numbers = [n.strip() for n in note_number.split(",")] all_notes = [] for n in note_numbers: success = generator.generate_note(n, trial_balance_path=output_json) if success: with open("data/generated_notes/notes.json", "r", encoding="utf-8") as f: note_json = json.load(f) all_notes.append(note_json) with open("data/generated_notes/notes.json", "w", encoding="utf-8") as f: json.dump({"notes": all_notes}, f, indent=2, ensure_ascii=False) wrapped = normalize_llm_notes_json({"notes": all_notes}) with open(wrapped_json_path, "w", encoding="utf-8") as f2: json.dump(wrapped, f2, ensure_ascii=False, indent=2) excel_path = "data/generated_notes_excel/notes.xlsx" json_to_xlsx(wrapped_json_path, excel_path) else: results = generator.generate_all_notes(trial_balance_path=output_json) if not any(results.values()): logger.error("Failed to generate any notes. LLM API may be down or unreachable.") raise HTTPException(status_code=500, detail="Failed to generate any notes. LLM API may be down or unreachable.") with open("data/generated_notes/notes.json", "r", encoding="utf-8") as f: notes_json = json.load(f) wrapped = normalize_llm_notes_json(notes_json) with open(wrapped_json_path, "w", encoding="utf-8") as f2: json.dump(wrapped, f2, ensure_ascii=False, indent=2) excel_path = "data/generated_notes_excel/notes.xlsx" json_to_xlsx(wrapped_json_path, excel_path) return FileResponse( excel_path, filename=os.path.basename(excel_path), media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ) @router.post("/hardcoded") async def run_full_pipeline( file: UploadFile = File(...), note_number: Optional[str] = Form(None) ): os.makedirs("data/input", exist_ok=True) file_location = f"data/input/{file.filename}" with open(file_location, "wb") as buffer: shutil.copyfileobj(file.file, buffer) os.makedirs("data/output1", exist_ok=True) structured_data = extract_trial_balance_data(file_location) output1_json = "data/output1/parsed_trial_balance.json" analyze_and_save_results(structured_data, output1_json) os.makedirs("data/output2", exist_ok=True) try: process_json(output1_json) except ImportError: logger.error("main16_23.process_json not found. Please ensure 'app/main16_23.py' exists and is named correctly.") raise HTTPException(status_code=500, detail="main16_23.process_json not found. Please ensure 'app/main16_23.py' exists and is named correctly.") except Exception as e: logger.error(f"main16_23.process_json failed: {e}") raise HTTPException(status_code=500, detail=f"main16_23.process_json failed: {e}") notes_json = "data/output2/notes_output.json" with open(notes_json, "r", encoding="utf-8") as f: notes_data = json.load(f) if isinstance(notes_data, dict): for key in ["notes", "trial_balance"]: if key in notes_data: notes_data = notes_data[key] break def wrap_notes(notes): return {"notes": notes} if note_number: numbers = [n.strip() for n in note_number.split(",")] notes_data = [ note for note in notes_data if str(note.get('note_number', '')).strip() in numbers ] filtered_json = "data/output2/notes_output_filtered.json" with open(filtered_json, "w", encoding="utf-8") as f2: json.dump(wrap_notes(notes_data), f2, ensure_ascii=False, indent=2) json_input_for_excel = filtered_json else: temp_json = "data/output2/notes_output_wrapped.json" with open(temp_json, "w", encoding="utf-8") as f2: json.dump(wrap_notes(notes_data), f2, ensure_ascii=False, indent=2) json_input_for_excel = temp_json os.makedirs("data/output3", exist_ok=True) try: output3_xlsx = "data/output3/final_output.xlsx" json_to_xlsx(json_input_for_excel, output3_xlsx) except ImportError: logger.error("json_xlsx.json_to_xlsx not found") raise HTTPException(status_code=500, detail="json_xlsx.json_to_xlsx not found") except Exception as e: logger.error(f"json_xlsx.json_to_xlsx failed: {e}") raise HTTPException(status_code=500, detail=f"json_xlsx.json_to_xlsx failed: {e}") return FileResponse( output3_xlsx, filename=os.path.basename(output3_xlsx), media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ) def run_subprocess( script_path: str, args: list, env: Dict[str, str], cwd: str ) -> subprocess.CompletedProcess: try: logger.info(f"Running {script_path} with args {args} in {cwd}") result = subprocess.run( ["python", script_path] + args, capture_output=True, text=True, check=True, env=env, cwd=cwd ) logger.debug(f"{script_path} STDOUT:\n{result.stdout}") logger.debug(f"{script_path} STDERR:\n{result.stderr}") return result except subprocess.CalledProcessError as e: logger.error(f"{script_path} failed: {e}") logger.error(f"STDOUT: {e.stdout}") logger.error(f"STDERR: {e.stderr}") raise HTTPException( status_code=500, detail=f"{script_path} failed: {e}\nSTDOUT:\n{e.stdout}\nSTDERR:\n{e.stderr}" ) def extract_output_file(stdout: str, keyword: str = "Output file:") -> Optional[str]: for line in stdout.splitlines(): if keyword in line: return line.split(keyword)[-1].strip() return None @router.post("/bs_from_notes") async def bs_from_notes(file: UploadFile = File(...)): os.makedirs("data/input", exist_ok=True) input_excel_path = os.path.join("data/input", file.filename) with open(input_excel_path, "wb") as buffer: shutil.copyfileobj(file.file, buffer) logger.info(f"Uploaded Excel saved to: {input_excel_path}") logger.info(f"Files in data/input/: {os.listdir('data/input')}") env = os.environ.copy() if os.getenv("OPENROUTER_API_KEY"): env["OPENROUTER_API_KEY"] = os.getenv("OPENROUTER_API_KEY") env["INPUT_FILE"] = "data/clean_financial_data_bs.json" cwd = os.getenv("PROJECT_ROOT", os.getcwd()) # Run sircodebs.py run_subprocess("bs/sircodebs.py", [input_excel_path], env, cwd) logger.info(f"Files in data/csv_notes_bs/: {os.listdir('data/csv_notes_bs') if os.path.exists('data/csv_notes_bs') else 'data/csv_notes_bs does not exist'}") # Run csv_json_bs.py run_subprocess("bs/csv_json_bs.py", [], env, cwd) logger.info(f"data/clean_financial_data_bs.json exists: {os.path.exists('data/clean_financial_data_bs.json')}") # Run bl_llm.py result = run_subprocess("bs/bl_llm.py", [], env, cwd) output_file = extract_output_file(result.stdout) if output_file and not os.path.isabs(output_file): output_file_path = os.path.join(cwd, output_file) else: output_file_path = output_file if not output_file or not os.path.exists(output_file_path): debug_msg = f"\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}" logger.error(f"Could not determine output file from bl_llm.py output.{debug_msg}") raise HTTPException(status_code=500, detail=f"Could not determine output file from bl_llm.py output.{debug_msg}") logger.info(f"Pipeline completed. Output file: {output_file_path}") return FileResponse( output_file_path, filename=os.path.basename(output_file_path), media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ) @router.post("/pnl_from_notes") async def pnl_from_notes(file: UploadFile = File(...)): os.makedirs("data/input", exist_ok=True) input_excel_path = os.path.join("data/input", file.filename) with open(input_excel_path, "wb") as buffer: shutil.copyfileobj(file.file, buffer) logger.info(f"Uploaded Excel saved to: {input_excel_path}") logger.info(f"Files in data/input/: {os.listdir('data/input')}") env = os.environ.copy() if os.getenv("OPENROUTER_API_KEY"): env["OPENROUTER_API_KEY"] = os.getenv("OPENROUTER_API_KEY") env["INPUT_FILE"] = "data/clean_financial_data_pnl.json" cwd = os.getenv("PROJECT_ROOT", os.getcwd()) # Run sircodepnl.py run_subprocess("pnl/sircodepnl.py", [input_excel_path], env, cwd) csv_notes_pnl_path = os.path.join(cwd, 'data/csv_notes_pnl') logger.info(f"Files in {csv_notes_pnl_path}/: {os.listdir(csv_notes_pnl_path) if os.path.exists(csv_notes_pnl_path) else f'{csv_notes_pnl_path} does not exist'}") # Run csv_json_pnl.py run_subprocess("pnl/csv_json_pnl.py", [], env, cwd) json_path = os.path.join(cwd, 'data/clean_financial_data_pnl.json') logger.info(f"data/clean_financial_data_pnl.json exists: {os.path.exists(json_path)}") # Run pnl_note.py run_subprocess("pnl/pnl_note.py", [], env, cwd) # Use fixed output file path output_file_path = os.path.join(cwd, "data/pnl_statement.xlsx") if not os.path.exists(output_file_path): logger.error(f"Could not find expected output file for P&L statement: {output_file_path}") raise HTTPException(status_code=500, detail=f"Could not find expected output file for P&L statement: {output_file_path}") logger.info(f"Pipeline completed. Output file: {output_file_path}") return FileResponse( output_file_path, filename=os.path.basename(output_file_path), media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ) @router.post("/cf_from_notes") async def cf_from_notes(file: UploadFile = File(...)): os.makedirs("data/input", exist_ok=True) input_excel_path = os.path.join("data/input", file.filename) with open(input_excel_path, "wb") as buffer: shutil.copyfileobj(file.file, buffer) logger.info(f"Uploaded Excel saved to: {input_excel_path}") logger.info(f"Files in data/input/: {os.listdir('data/input')}") env = os.environ.copy() cwd = os.getenv("PROJECT_ROOT", os.getcwd()) # Step 1: Run sircodecf.py run_subprocess("cf/sircodecf.py", [input_excel_path], env, cwd) csv_notes_cfs_path = os.path.join(cwd, 'data/csv_notes_cfs') logger.info(f"Files in {csv_notes_cfs_path}/: {os.listdir(csv_notes_cfs_path) if os.path.exists(csv_notes_cfs_path) else f'{csv_notes_cfs_path} does not exist'}") # Step 2: Run csv_json_cf.py run_subprocess("cf/csv_json_cf.py", [], env, cwd) json_path = os.path.join(cwd, 'data/clean_financial_data_cfs.json') logger.info(f"data/clean_financial_data_cfs.json exists: {os.path.exists(json_path)}") # Step 3: Run cf_middlestep.py run_subprocess("cf/cf_middlestep.py", [], env, cwd) extracted_json_path = os.path.join(cwd, 'data/extracted_cfs_data.json') logger.info(f"data/extracted_cfs_data.json exists: {os.path.exists(extracted_json_path)}") # Step 4: Run cf_generation.py result = run_subprocess("cf/cf_generation.py", [], env, cwd) output_file = "data/cash_flow_statements.xlsx" output_file_path = os.path.join(cwd, output_file) if not os.path.exists(output_file_path): output_file_path = os.path.join(cwd, "data/cash_flow_statements.xlsx") if not os.path.exists(output_file_path): debug_msg = f"\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}" logger.error(f"Could not determine output file from cf_generation.py output.{debug_msg}") raise HTTPException(status_code=500, detail=f"Could not determine output file from cf_generation.py output.{debug_msg}") logger.info(f"Pipeline completed. Output file: {output_file_path}") return FileResponse( output_file_path, filename=os.path.basename(output_file_path), media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ) app.include_router(router) if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860)