File size: 15,181 Bytes
f39814a
 
b9d7594
c79824c
 
 
 
 
 
 
 
f39814a
 
c094882
 
 
 
f39814a
c79824c
f39814a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c79824c
 
 
f39814a
 
c79824c
 
 
f39814a
c79824c
 
 
 
 
 
 
 
 
 
 
 
f39814a
 
c79824c
 
 
f39814a
c79824c
 
 
 
 
 
f39814a
 
c79824c
 
 
 
 
 
f39814a
c79824c
 
f39814a
c79824c
 
 
 
f39814a
c79824c
 
 
 
 
 
f39814a
c79824c
 
 
 
f39814a
c79824c
b9d7594
 
 
 
 
c79824c
 
 
 
 
 
f39814a
 
c79824c
 
f39814a
c79824c
f39814a
c79824c
f39814a
c79824c
 
 
 
 
 
 
 
f39814a
c79824c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f39814a
c79824c
 
 
 
f39814a
c79824c
 
 
f39814a
c79824c
f39814a
c79824c
 
 
 
 
 
 
b9d7594
 
 
 
 
c79824c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f39814a
 
c79824c
 
 
f39814a
c79824c
 
 
f39814a
4f00669
c79824c
f39814a
 
c79824c
f39814a
 
c79824c
f39814a
c79824c
452e581
 
 
 
 
c79824c
 
 
452e581
b9d7594
 
 
 
 
c79824c
 
 
f39814a
 
c79824c
 
 
f39814a
c79824c
 
 
f39814a
4f00669
c79824c
f39814a
 
5fd0efa
c79824c
f39814a
 
 
c79824c
f39814a
 
 
 
 
 
bc7f19f
 
 
 
 
 
 
 
 
f39814a
 
bc7f19f
 
 
f39814a
bc7f19f
 
 
 
f39814a
bc7f19f
 
 
f39814a
 
bc7f19f
 
f39814a
 
bc7f19f
 
f39814a
bc7f19f
 
f39814a
bc7f19f
 
 
 
5fd0efa
b9d7594
 
 
 
f39814a
 
 
 
 
 
551175f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319

from fastapi import FastAPI, APIRouter, UploadFile, File, Form, HTTPException
from fastapi.responses import JSONResponse, PlainTextResponse, FileResponse
from typing import Optional, Dict, Any
import pandas as pd
import os
import shutil
import json
import subprocess
import logging

# Import utilities and logic from modular files
from utils.utils import clean_value
from notes.data_extraction import extract_trial_balance_data, analyze_and_save_results
from notes.llm_notes_generator import FlexibleFinancialNoteGenerator
from notes.notes_generator import process_json
from notes.json_to_excel import json_to_xlsx
from utils.utils_normalize import normalize_llm_note_json, normalize_llm_notes_json


# Configure logging for the application
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("financial_notes_api")

app = FastAPI(
    title="Financial Notes Generator API",
    description="API for generating financial notes, balance sheets, cash flow statements, and P&L reports.",
    version="1.0.0"
)
@app.on_event("startup")
async def startup_event():
    logger.info("Financial Notes Generator API has started.")

@app.on_event("shutdown")
async def shutdown_event():
    logger.info("Financial Notes Generator API is shutting down.")
router = APIRouter()

def process_uploaded_file(file: UploadFile) -> pd.DataFrame:
    os.makedirs("data/input", exist_ok=True)
    file_location = f"data/input/{file.filename}"
    with open(file_location, "wb") as buffer:
        shutil.copyfileobj(file.file, buffer)
    structured_data = extract_trial_balance_data(file_location)
    output_file = "data/output1/parsed_trial_balance.json"
    analyze_and_save_results(structured_data, output_file)
    with open(output_file, "r", encoding="utf-8") as f:
        parsed_data = json.load(f)
    tb_df = pd.DataFrame(parsed_data if isinstance(parsed_data, list) else parsed_data.get("trial_balance", parsed_data))
    tb_df['amount'] = tb_df['amount'].apply(clean_value)
    return tb_df

@router.post("/new")
async def llm_generate_and_excel(
    file: UploadFile = File(...),
    note_number: Optional[str] = Form(None)
):
    os.makedirs("data/input", exist_ok=True)
    file_location = f"data/input/{file.filename}"
    with open(file_location, "wb") as buffer:
        shutil.copyfileobj(file.file, buffer)
    structured_data = extract_trial_balance_data(file_location)
    output_json = "data/output1/parsed_trial_balance.json"
    analyze_and_save_results(structured_data, output_json)
    try:
        generator = FlexibleFinancialNoteGenerator()
    except Exception as e:
        logger.error(f"Generator init failed: {e}")
        raise HTTPException(status_code=500, detail=f"Generator init failed: {e}")
    os.makedirs("data/generated_notes_excel", exist_ok=True)
    wrapped_json_path = "data/generated_notes/notes_wrapped.json"
    if note_number:
        note_numbers = [n.strip() for n in note_number.split(",")]
        all_notes = []
        for n in note_numbers:
            success = generator.generate_note(n, trial_balance_path=output_json)
            if success:
                with open("data/generated_notes/notes.json", "r", encoding="utf-8") as f:
                    note_json = json.load(f)
                all_notes.append(note_json)
        with open("data/generated_notes/notes.json", "w", encoding="utf-8") as f:
            json.dump({"notes": all_notes}, f, indent=2, ensure_ascii=False)
        wrapped = normalize_llm_notes_json({"notes": all_notes})
        with open(wrapped_json_path, "w", encoding="utf-8") as f2:
            json.dump(wrapped, f2, ensure_ascii=False, indent=2)
        excel_path = "data/generated_notes_excel/notes.xlsx"
        json_to_xlsx(wrapped_json_path, excel_path)
    else:
        results = generator.generate_all_notes(trial_balance_path=output_json)
        if not any(results.values()):
            logger.error("Failed to generate any notes. LLM API may be down or unreachable.")
            raise HTTPException(status_code=500, detail="Failed to generate any notes. LLM API may be down or unreachable.")
        with open("data/generated_notes/notes.json", "r", encoding="utf-8") as f:
            notes_json = json.load(f)
        wrapped = normalize_llm_notes_json(notes_json)
        with open(wrapped_json_path, "w", encoding="utf-8") as f2:
            json.dump(wrapped, f2, ensure_ascii=False, indent=2)
        excel_path = "data/generated_notes_excel/notes.xlsx"
        json_to_xlsx(wrapped_json_path, excel_path)
    return FileResponse(
        excel_path,
        filename=os.path.basename(excel_path),
        media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
    )

@router.post("/hardcoded")
async def run_full_pipeline(
    file: UploadFile = File(...),
    note_number: Optional[str] = Form(None)
):
    os.makedirs("data/input", exist_ok=True)
    file_location = f"data/input/{file.filename}"
    with open(file_location, "wb") as buffer:
        shutil.copyfileobj(file.file, buffer)
    os.makedirs("data/output1", exist_ok=True)
    structured_data = extract_trial_balance_data(file_location)
    output1_json = "data/output1/parsed_trial_balance.json"
    analyze_and_save_results(structured_data, output1_json)
    os.makedirs("data/output2", exist_ok=True)
    try:
        process_json(output1_json)
    except ImportError:
        logger.error("main16_23.process_json not found. Please ensure 'app/main16_23.py' exists and is named correctly.")
        raise HTTPException(status_code=500, detail="main16_23.process_json not found. Please ensure 'app/main16_23.py' exists and is named correctly.")
    except Exception as e:
        logger.error(f"main16_23.process_json failed: {e}")
        raise HTTPException(status_code=500, detail=f"main16_23.process_json failed: {e}")
    notes_json = "data/output2/notes_output.json"
    with open(notes_json, "r", encoding="utf-8") as f:
        notes_data = json.load(f)
    if isinstance(notes_data, dict):
        for key in ["notes", "trial_balance"]:
            if key in notes_data:
                notes_data = notes_data[key]
                break
    def wrap_notes(notes):
        return {"notes": notes}
    if note_number:
        numbers = [n.strip() for n in note_number.split(",")]
        notes_data = [
            note for note in notes_data
            if str(note.get('note_number', '')).strip() in numbers
        ]
        filtered_json = "data/output2/notes_output_filtered.json"
        with open(filtered_json, "w", encoding="utf-8") as f2:
            json.dump(wrap_notes(notes_data), f2, ensure_ascii=False, indent=2)
        json_input_for_excel = filtered_json
    else:
        temp_json = "data/output2/notes_output_wrapped.json"
        with open(temp_json, "w", encoding="utf-8") as f2:
            json.dump(wrap_notes(notes_data), f2, ensure_ascii=False, indent=2)
        json_input_for_excel = temp_json
    os.makedirs("data/output3", exist_ok=True)
    try:
        output3_xlsx = "data/output3/final_output.xlsx"
        json_to_xlsx(json_input_for_excel, output3_xlsx)
    except ImportError:
        logger.error("json_xlsx.json_to_xlsx not found")
        raise HTTPException(status_code=500, detail="json_xlsx.json_to_xlsx not found")
    except Exception as e:
        logger.error(f"json_xlsx.json_to_xlsx failed: {e}")
        raise HTTPException(status_code=500, detail=f"json_xlsx.json_to_xlsx failed: {e}")
    return FileResponse(
        output3_xlsx,
        filename=os.path.basename(output3_xlsx),
        media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
    )

def run_subprocess(
    script_path: str,
    args: list,
    env: Dict[str, str],
    cwd: str
) -> subprocess.CompletedProcess:
    try:
        logger.info(f"Running {script_path} with args {args} in {cwd}")
        result = subprocess.run(
            ["python", script_path] + args,
            capture_output=True,
            text=True,
            check=True,
            env=env,
            cwd=cwd
        )
        logger.debug(f"{script_path} STDOUT:\n{result.stdout}")
        logger.debug(f"{script_path} STDERR:\n{result.stderr}")
        return result
    except subprocess.CalledProcessError as e:
        logger.error(f"{script_path} failed: {e}")
        logger.error(f"STDOUT: {e.stdout}")
        logger.error(f"STDERR: {e.stderr}")
        raise HTTPException(
            status_code=500,
            detail=f"{script_path} failed: {e}\nSTDOUT:\n{e.stdout}\nSTDERR:\n{e.stderr}"
        )

def extract_output_file(stdout: str, keyword: str = "Output file:") -> Optional[str]:
    for line in stdout.splitlines():
        if keyword in line:
            return line.split(keyword)[-1].strip()
    return None

@router.post("/bs_from_notes")
async def bs_from_notes(file: UploadFile = File(...)):
    os.makedirs("data/input", exist_ok=True)
    input_excel_path = os.path.join("data/input", file.filename)
    with open(input_excel_path, "wb") as buffer:
        shutil.copyfileobj(file.file, buffer)
    logger.info(f"Uploaded Excel saved to: {input_excel_path}")
    logger.info(f"Files in data/input/: {os.listdir('data/input')}")
    env = os.environ.copy()
    if os.getenv("OPENROUTER_API_KEY"):
        env["OPENROUTER_API_KEY"] = os.getenv("OPENROUTER_API_KEY")
    env["INPUT_FILE"] = "data/clean_financial_data_bs.json"
    cwd = os.getenv("PROJECT_ROOT", os.getcwd())
    # Run sircodebs.py
    run_subprocess("bs/sircodebs.py", [input_excel_path], env, cwd)
    logger.info(f"Files in data/csv_notes_bs/: {os.listdir('data/csv_notes_bs') if os.path.exists('data/csv_notes_bs') else 'data/csv_notes_bs does not exist'}")
    # Run csv_json_bs.py
    run_subprocess("bs/csv_json_bs.py", [], env, cwd)
    logger.info(f"data/clean_financial_data_bs.json exists: {os.path.exists('data/clean_financial_data_bs.json')}")
    # Run bl_llm.py
    result = run_subprocess("bs/bl_llm.py", [], env, cwd)
    output_file = extract_output_file(result.stdout)
    if output_file and not os.path.isabs(output_file):
        output_file_path = os.path.join(cwd, output_file)
    else:
        output_file_path = output_file
    if not output_file or not os.path.exists(output_file_path):
        debug_msg = f"\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}"
        logger.error(f"Could not determine output file from bl_llm.py output.{debug_msg}")
        raise HTTPException(status_code=500, detail=f"Could not determine output file from bl_llm.py output.{debug_msg}")
    logger.info(f"Pipeline completed. Output file: {output_file_path}")
    return FileResponse(
        output_file_path,
        filename=os.path.basename(output_file_path),
        media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
    )

@router.post("/pnl_from_notes")
async def pnl_from_notes(file: UploadFile = File(...)):
    os.makedirs("data/input", exist_ok=True)
    input_excel_path = os.path.join("data/input", file.filename)
    with open(input_excel_path, "wb") as buffer:
        shutil.copyfileobj(file.file, buffer)
    logger.info(f"Uploaded Excel saved to: {input_excel_path}")
    logger.info(f"Files in data/input/: {os.listdir('data/input')}")
    env = os.environ.copy()
    if os.getenv("OPENROUTER_API_KEY"):
        env["OPENROUTER_API_KEY"] = os.getenv("OPENROUTER_API_KEY")
    env["INPUT_FILE"] = "data/clean_financial_data_pnl.json"
    cwd = os.getenv("PROJECT_ROOT", os.getcwd())
    # Run sircodepnl.py
    run_subprocess("pnl/sircodepnl.py", [input_excel_path], env, cwd)
    csv_notes_pnl_path = os.path.join(cwd, 'data/csv_notes_pnl')
    logger.info(f"Files in {csv_notes_pnl_path}/: {os.listdir(csv_notes_pnl_path) if os.path.exists(csv_notes_pnl_path) else f'{csv_notes_pnl_path} does not exist'}")
    # Run csv_json_pnl.py
    run_subprocess("pnl/csv_json_pnl.py", [], env, cwd)
    json_path = os.path.join(cwd, 'data/clean_financial_data_pnl.json')
    logger.info(f"data/clean_financial_data_pnl.json exists: {os.path.exists(json_path)}")
    # Run pnl_note.py
    run_subprocess("pnl/pnl_note.py", [], env, cwd)
    # Use fixed output file path
    output_file_path = os.path.join(cwd, "data/pnl_statement.xlsx")
    if not os.path.exists(output_file_path):
        logger.error(f"Could not find expected output file for P&L statement: {output_file_path}")
        raise HTTPException(status_code=500, detail=f"Could not find expected output file for P&L statement: {output_file_path}")
    logger.info(f"Pipeline completed. Output file: {output_file_path}")
    return FileResponse(
        output_file_path,
        filename=os.path.basename(output_file_path),
        media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
    )

@router.post("/cf_from_notes")
async def cf_from_notes(file: UploadFile = File(...)):
    os.makedirs("data/input", exist_ok=True)
    input_excel_path = os.path.join("data/input", file.filename)
    with open(input_excel_path, "wb") as buffer:
        shutil.copyfileobj(file.file, buffer)
    logger.info(f"Uploaded Excel saved to: {input_excel_path}")
    logger.info(f"Files in data/input/: {os.listdir('data/input')}")
    env = os.environ.copy()
    cwd = os.getenv("PROJECT_ROOT", os.getcwd())
    # Step 1: Run sircodecf.py
    run_subprocess("cf/sircodecf.py", [input_excel_path], env, cwd)
    csv_notes_cfs_path = os.path.join(cwd, 'data/csv_notes_cfs')
    logger.info(f"Files in {csv_notes_cfs_path}/: {os.listdir(csv_notes_cfs_path) if os.path.exists(csv_notes_cfs_path) else f'{csv_notes_cfs_path} does not exist'}")
    # Step 2: Run csv_json_cf.py
    run_subprocess("cf/csv_json_cf.py", [], env, cwd)
    json_path = os.path.join(cwd, 'data/clean_financial_data_cfs.json')
    logger.info(f"data/clean_financial_data_cfs.json exists: {os.path.exists(json_path)}")
    # Step 3: Run cf_middlestep.py
    run_subprocess("cf/cf_middlestep.py", [], env, cwd)
    extracted_json_path = os.path.join(cwd, 'data/extracted_cfs_data.json')
    logger.info(f"data/extracted_cfs_data.json exists: {os.path.exists(extracted_json_path)}")
    # Step 4: Run cf_generation.py
    result = run_subprocess("cf/cf_generation.py", [], env, cwd)
    output_file = "data/cash_flow_statements.xlsx"
    output_file_path = os.path.join(cwd, output_file)
    if not os.path.exists(output_file_path):
        output_file_path = os.path.join(cwd, "data/cash_flow_statements.xlsx")
    if not os.path.exists(output_file_path):
        debug_msg = f"\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}"
        logger.error(f"Could not determine output file from cf_generation.py output.{debug_msg}")
        raise HTTPException(status_code=500, detail=f"Could not determine output file from cf_generation.py output.{debug_msg}")
    logger.info(f"Pipeline completed. Output file: {output_file_path}")
    return FileResponse(
        output_file_path,
        filename=os.path.basename(output_file_path),
        media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
    )

app.include_router(router)

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)