Spaces:

point9
/

finryver-dev

Running

App Files Files Community

Sahil Garg commited on Aug 8, 2025

Commit

5fd0efa

1 Parent(s): 9542994

/pnl_from_notes working perfectly

Browse files

Files changed (4) hide show

app/api.py +13 -6
pnlbs/csv_json_pnl.py +33 -16
pnlbs/pnl_note.py +82 -19
pnlbs/sircodepnl.py +39 -10

app/api.py CHANGED Viewed

@@ -369,23 +369,30 @@ async def pnl_from_notes(file: UploadFile = File(...)):
     if os.getenv("OPENROUTER_API_KEY"):
         env["OPENROUTER_API_KEY"] = os.getenv("OPENROUTER_API_KEY")
     env["INPUT_FILE"] = "clean_financial_data_pnl.json"
-    cwd = "C:/SAHIL/NOTES"
     # Run sircodepnl.py
     run_subprocess("pnlbs/sircodepnl.py", [input_excel_path], env, cwd)
-    logger.info(f"Files in csv_notes_pnl/: {os.listdir('csv_notes_pnl') if os.path.exists('csv_notes_pnl') else 'csv_notes_pnl does not exist'}")
     # Run csv_json_pnl.py
     run_subprocess("pnlbs/csv_json_pnl.py", [], env, cwd)
-    logger.info(f"clean_financial_data_pnl.json exists: {os.path.exists('clean_financial_data_pnl.json')}")
     # Run pnl_note.py
     result = run_subprocess("pnlbs/pnl_note.py", [], env, cwd)
     output_file = extract_output_file(result.stdout)
-    if not output_file or not os.path.exists(output_file):
         debug_msg = f"\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}"
         logger.error(f"Could not determine output file from pnl_note.py output.{debug_msg}")
         raise HTTPException(status_code=500, detail=f"Could not determine output file from pnl_note.py output.{debug_msg}")
-    logger.info(f"Pipeline completed. Output file: {output_file}")
-    return {"message": "Profit and Loss statement generated successfully.", "file": output_file}

     if os.getenv("OPENROUTER_API_KEY"):
         env["OPENROUTER_API_KEY"] = os.getenv("OPENROUTER_API_KEY")
     env["INPUT_FILE"] = "clean_financial_data_pnl.json"
+    cwd = "C:/SAHIL/production_notes"
     # Run sircodepnl.py
     run_subprocess("pnlbs/sircodepnl.py", [input_excel_path], env, cwd)
+    csv_notes_pnl_path = os.path.join(cwd, 'csv_notes_pnl')
+    logger.info(f"Files in {csv_notes_pnl_path}/: {os.listdir(csv_notes_pnl_path) if os.path.exists(csv_notes_pnl_path) else f'{csv_notes_pnl_path} does not exist'}")
     # Run csv_json_pnl.py
     run_subprocess("pnlbs/csv_json_pnl.py", [], env, cwd)
+    json_path = os.path.join(cwd, 'clean_financial_data_pnl.json')
+    logger.info(f"clean_financial_data_pnl.json exists: {os.path.exists(json_path)}")
     # Run pnl_note.py
     result = run_subprocess("pnlbs/pnl_note.py", [], env, cwd)
     output_file = extract_output_file(result.stdout)
+    # If output_file is not absolute, resolve relative to cwd
+    if output_file and not os.path.isabs(output_file):
+        output_file_path = os.path.join(cwd, output_file)
+    else:
+        output_file_path = output_file
+    if not output_file or not os.path.exists(output_file_path):
         debug_msg = f"\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}"
         logger.error(f"Could not determine output file from pnl_note.py output.{debug_msg}")
         raise HTTPException(status_code=500, detail=f"Could not determine output file from pnl_note.py output.{debug_msg}")
+    logger.info(f"Pipeline completed. Output file: {output_file_path}")
+    return {"message": "Profit and Loss statement generated successfully.", "file": output_file_path}

pnlbs/csv_json_pnl.py CHANGED Viewed

@@ -41,7 +41,8 @@ class FinancialData(BaseModel):
 class FinancialCSVMapper:
     def __init__(self, csv_folder_path: str = settings.csv_folder_path):
-        self.csv_folder_path = csv_folder_path
     def clean_value(self, value: Any) -> Optional[Union[float, int, str]]:
         """
@@ -284,16 +285,18 @@ class FinancialCSVMapper:
             }
     def process_all_csvs(self) -> Dict[str, Any]:
-        """
-        Process all CSV files and create meaningful financial JSON.
-        Returns the structured financial data.
-        """
         if not os.path.exists(self.csv_folder_path):
-            logger.error(f"Folder {self.csv_folder_path} not found")
-            return {"error": f"Folder {self.csv_folder_path} not found"}
         csv_files = [f for f in os.listdir(self.csv_folder_path) if f.endswith('.csv')]
         if not csv_files:
             logger.error(f"No CSV files found in {self.csv_folder_path}")
             return {"error": f"No CSV files found in {self.csv_folder_path}"}
@@ -345,16 +348,30 @@ class FinancialCSVMapper:
         Process all CSVs and save meaningful financial JSON.
         Returns the output file path.
         """
         financial_data = self.process_all_csvs()
-        with open(output_path, 'w', encoding='utf-8') as f:
-            json.dump(financial_data, f, indent=2, ensure_ascii=False, default=str)
-        logger.info(f"Clean financial JSON created: {output_path}")
         return output_path
 # Usage
 if __name__ == "__main__":
-    mapper = FinancialCSVMapper(settings.csv_folder_path)
-    output_file = mapper.save_to_json(settings.output_json)
     logger.info(f"Clean financial JSON created: {output_file}")

 class FinancialCSVMapper:
     def __init__(self, csv_folder_path: str = settings.csv_folder_path):
+        # Always use absolute path for folder
+        self.csv_folder_path = os.path.abspath(csv_folder_path)
     def clean_value(self, value: Any) -> Optional[Union[float, int, str]]:
         """
             }
     def process_all_csvs(self) -> Dict[str, Any]:
+        logger.info(f"Current working directory: {os.getcwd()}")
+        logger.info(f"Looking for CSVs in: {self.csv_folder_path}")
+        # Ensure CSV folder exists
         if not os.path.exists(self.csv_folder_path):
+            try:
+                os.makedirs(self.csv_folder_path, exist_ok=True)
+                logger.info(f"Created missing CSV folder: {self.csv_folder_path}")
+            except Exception as e:
+                logger.error(f"Failed to create CSV folder '{self.csv_folder_path}': {e}")
+                return {"error": f"Failed to create CSV folder '{self.csv_folder_path}': {e}"}
         csv_files = [f for f in os.listdir(self.csv_folder_path) if f.endswith('.csv')]
+        logger.info(f"CSV files found: {csv_files}")
         if not csv_files:
             logger.error(f"No CSV files found in {self.csv_folder_path}")
             return {"error": f"No CSV files found in {self.csv_folder_path}"}
         Process all CSVs and save meaningful financial JSON.
         Returns the output file path.
         """
+        # Always use absolute path for output JSON
+        output_path = os.path.abspath(output_path)
         financial_data = self.process_all_csvs()
+        try:
+            with open(output_path, 'w', encoding='utf-8') as f:
+                json.dump(financial_data, f, indent=2, ensure_ascii=False, default=str)
+            logger.info(f"Clean financial JSON created: {output_path}")
+        except Exception as e:
+            logger.error(f"Failed to write JSON file '{output_path}': {e}")
+            raise
         return output_path
 # Usage
 if __name__ == "__main__":
+    import sys
+    logger.info(f"Current working directory: {os.getcwd()}")
+    csv_folder = settings.csv_folder_path
+    output_json = settings.output_json
+    if len(sys.argv) > 1:
+        csv_folder = sys.argv[1]
+        logger.info(f"CSV folder path from argument: {os.path.abspath(csv_folder)}")
+    if len(sys.argv) > 2:
+        output_json = sys.argv[2]
+        logger.info(f"Output JSON path from argument: {os.path.abspath(output_json)}")
+    mapper = FinancialCSVMapper(csv_folder)
+    output_file = mapper.save_to_json(output_json)
     logger.info(f"Clean financial JSON created: {output_file}")

pnlbs/pnl_note.py CHANGED Viewed

@@ -65,19 +65,68 @@ class PnLGenerator:
             logger.warning(f"{item_key} not found in data")
             return 0.0, 0.0
         item_data = self.financial_data[item_key]
-        total_2024 = 0.0
-        total_2023 = 0.0
-        if isinstance(item_data, dict):
-            for category, values in item_data.items():
-                if isinstance(values, list) and len(values) >= 2:
-                    total_2024 += float(values[0] or 0)
-                    total_2023 += float(values[1] or 0)
-                elif isinstance(values, (int, float)):
-                    total_2024 += float(values)
-        elif isinstance(item_data, list) and len(item_data) >= 2:
-            total_2024 = float(item_data[0] or 0)
-            total_2023 = float(item_data[1] or 0)
-        return total_2024, total_2023
     def get_revenue_data(self) -> Tuple[float, float]:
         """Extract revenue from operations data."""
@@ -285,6 +334,7 @@ class PnLGenerator:
         try:
             wb.save(output_file)
             logger.info(f"P&L Statement generated successfully: {output_file}")
             self.print_financial_summary(
                 total_revenue_2024, total_revenue_2023,
                 total_expenses_2024, total_expenses_2023,
@@ -298,6 +348,7 @@ class PnLGenerator:
             try:
                 wb.save(fallback_file)
                 logger.info(f"P&L Statement saved to: {fallback_file}")
                 return True
             except Exception as e:
                 logger.error(f"Failed to save: {str(e)}")
@@ -330,22 +381,34 @@ def main() -> None:
     """Main function to run the P&L generator."""
     logger.info("P&L STATEMENT GENERATOR FROM JSON")
     logger.info("=" * 50)
     json_file: Optional[str] = None
     for file in settings.json_files:
         if os.path.exists(file):
             json_file = file
             break
     if not json_file:
-        json_file = input("Enter the path to your JSON file: ").strip()
     generator = PnLGenerator(json_file)
     if generator.load_financial_data():
         output_path = settings.output_file
         logger.info(f"Output file: {output_path}")
-        if generator.generate_pnl_statement(output_path):
-            logger.info("P&L STATEMENT GENERATION COMPLETED SUCCESSFULLY!")
-            logger.info(f"Output file: {output_path}")
-        else:
-            logger.error("Failed to generate P&L statement")
     else:
         logger.error("Failed to load financial data")

             logger.warning(f"{item_key} not found in data")
             return 0.0, 0.0
         item_data = self.financial_data[item_key]
+        def recursive_sum(data):
+            sum_2024, sum_2023 = 0.0, 0.0
+            if isinstance(data, dict):
+                for k, v in data.items():
+                    # Skip metadata
+                    if k == "_metadata":
+                        continue
+                    s24, s23 = recursive_sum(v)
+                    sum_2024 += s24
+                    sum_2023 += s23
+            elif isinstance(data, list):
+                # If list contains only numbers, try to use first two as 2024/2023
+                nums = [x for x in data if isinstance(x, (int, float))]
+                if len(nums) >= 2:
+                    sum_2024 += float(nums[0] or 0)
+                    sum_2023 += float(nums[1] or 0)
+                elif len(nums) == 1:
+                    sum_2024 += float(nums[0] or 0)
+                # Otherwise, skip non-numeric entries
+            elif isinstance(data, (int, float)):
+                sum_2024 += float(data)
+            elif isinstance(data, str):
+                # Try to parse as float
+                try:
+                    val = float(data)
+                    sum_2024 += val
+                except Exception:
+                    pass
+            return sum_2024, sum_2023
+        # Special handling for date-based dicts
+        def sum_dates(data):
+            sum_2024, sum_2023 = 0.0, 0.0
+            if isinstance(data, dict):
+                for k, v in data.items():
+                    if k == "_metadata":
+                        continue
+                    if isinstance(v, dict):
+                        # If keys look like dates, sum by year
+                        for date_key, val in v.items():
+                            if "2024" in date_key:
+                                try:
+                                    sum_2024 += float(val)
+                                except Exception:
+                                    pass
+                            elif "2023" in date_key:
+                                try:
+                                    sum_2023 += float(val)
+                                except Exception:
+                                    pass
+                    else:
+                        s24, s23 = recursive_sum(v)
+                        sum_2024 += s24
+                        sum_2023 += s23
+            return sum_2024, sum_2023
+        # Try date-based sum first, fallback to recursive
+        s24, s23 = sum_dates(item_data)
+        if s24 == 0.0 and s23 == 0.0:
+            s24, s23 = recursive_sum(item_data)
+        logger.info(f"Extracted for {item_key}: 2024={s24}, 2023={s23}")
+        return s24, s23
     def get_revenue_data(self) -> Tuple[float, float]:
         """Extract revenue from operations data."""
         try:
             wb.save(output_file)
             logger.info(f"P&L Statement generated successfully: {output_file}")
+            print(f"Output file: {os.path.abspath(output_file)}")  # For API subprocess parsing
             self.print_financial_summary(
                 total_revenue_2024, total_revenue_2023,
                 total_expenses_2024, total_expenses_2023,
             try:
                 wb.save(fallback_file)
                 logger.info(f"P&L Statement saved to: {fallback_file}")
+                print(f"Output file: {os.path.abspath(fallback_file)}")  # For API subprocess parsing
                 return True
             except Exception as e:
                 logger.error(f"Failed to save: {str(e)}")
     """Main function to run the P&L generator."""
     logger.info("P&L STATEMENT GENERATOR FROM JSON")
     logger.info("=" * 50)
+    import sys
+    logger.info(f"Current working directory: {os.getcwd()}")
     json_file: Optional[str] = None
     for file in settings.json_files:
         if os.path.exists(file):
             json_file = file
+            logger.info(f"Found input JSON file: {json_file}")
             break
     if not json_file:
+        if len(sys.argv) > 1:
+            json_file = sys.argv[1]
+            logger.info(f"Input JSON file from argument: {json_file}")
+        else:
+            json_file = input("Enter the path to your JSON file: ").strip()
     generator = PnLGenerator(json_file)
     if generator.load_financial_data():
         output_path = settings.output_file
+        if len(sys.argv) > 2:
+            output_path = sys.argv[2]
+            logger.info(f"Output Excel path from argument: {output_path}")
         logger.info(f"Output file: {output_path}")
+        try:
+            if generator.generate_pnl_statement(output_path):
+                logger.info(f"P&L Statement generated successfully: {os.path.abspath(output_path)}")
+            else:
+                logger.error("Failed to generate P&L statement.")
+        except Exception as e:
+            logger.error(f"Error writing Excel file: {e}")
     else:
         logger.error("Failed to load financial data")

pnlbs/sircodepnl.py CHANGED Viewed

@@ -18,16 +18,25 @@ class Settings(BaseSettings):
 settings = Settings()
 class NoteCSVInfo(BaseModel):
     name: str
     rows: int
-def clean_note(sheet_name: str, skiprows: int = settings.skiprows) -> pd.DataFrame:
     """
     Parse and clean a sheet from the Excel file.
     Drops empty rows and columns, resets index.
     """
-    xls = pd.ExcelFile(settings.excel_file_path)
     df = xls.parse(sheet_name, skiprows=skiprows)
     df = df.dropna(how='all').dropna(axis=1, how='all').reset_index(drop=True)
     return df
@@ -36,20 +45,40 @@ def export_note_to_csv(df: pd.DataFrame, filename: str, output_folder: str) -> N
     """
     Export DataFrame to CSV and return info.
     """
-    # Ensure output folder exists
-    os.makedirs(output_folder, exist_ok=True)
-    output_path = os.path.join(output_folder, filename)
     df.to_csv(output_path, index=False)
     return NoteCSVInfo(name=filename, rows=df.shape[0])
 def main() -> None:
     """
     Main function to extract P&L notes from Excel and export as CSV.
     """
-    logger.info("Loading Excel file: %s", settings.excel_file_path)
-    note_16_23_df = clean_note(settings.note_16_23_sheet, settings.skiprows)
-    os.makedirs(settings.output_folder, exist_ok=True)
     info_16_23 = export_note_to_csv(note_16_23_df, "Note_16_to_23_Full.csv", settings.output_folder)
     logger.info(f"Extracted rows: Note 16-23 = {info_16_23.rows} rows")

 settings = Settings()
+def get_xls(excel_file_path: str) -> pd.ExcelFile:
+    try:
+        xls = pd.ExcelFile(excel_file_path)
+        logger.info(f"Loaded Excel file: {excel_file_path}")
+        logger.info(f"Available sheets: {xls.sheet_names}")
+        return xls
+    except Exception as e:
+        logger.error(f"Failed to load Excel file '{excel_file_path}': {e}")
+        raise
 class NoteCSVInfo(BaseModel):
     name: str
     rows: int
+def clean_note(xls, sheet_name: str, skiprows: int = settings.skiprows) -> pd.DataFrame:
     """
     Parse and clean a sheet from the Excel file.
     Drops empty rows and columns, resets index.
     """
     df = xls.parse(sheet_name, skiprows=skiprows)
     df = df.dropna(how='all').dropna(axis=1, how='all').reset_index(drop=True)
     return df
     """
     Export DataFrame to CSV and return info.
     """
+    # Always use absolute path for output folder
+    abs_output_folder = os.path.abspath(output_folder)
+    try:
+        os.makedirs(abs_output_folder, exist_ok=True)
+        logger.info(f"Output folder ensured: {abs_output_folder}")
+    except Exception as e:
+        logger.error(f"Failed to create output folder '{abs_output_folder}': {e}")
+        raise
+    output_path = os.path.join(abs_output_folder, filename)
     df.to_csv(output_path, index=False)
+    logger.info(f"CSV file written to: {output_path}")
     return NoteCSVInfo(name=filename, rows=df.shape[0])
 def main() -> None:
     """
     Main function to extract P&L notes from Excel and export as CSV.
     """
+    import sys
+    logger.info(f"Current working directory: {os.getcwd()}")
+    excel_file_path = settings.excel_file_path
+    if len(sys.argv) > 1:
+        excel_file_path = sys.argv[1]
+        logger.info(f"Excel file path from argument: {excel_file_path}")
+    xls = get_xls(excel_file_path)
+    if settings.note_16_23_sheet not in xls.sheet_names:
+        logger.error(f"Sheet '{settings.note_16_23_sheet}' not found in Excel file. Available sheets: {xls.sheet_names}")
+        return
+    note_16_23_df = clean_note(xls, settings.note_16_23_sheet, settings.skiprows)
+    logger.info(f"Loaded DataFrame shape: {note_16_23_df.shape}")
+    logger.info(f"First few rows:\n{note_16_23_df.head()}\n")
     info_16_23 = export_note_to_csv(note_16_23_df, "Note_16_to_23_Full.csv", settings.output_folder)
     logger.info(f"Extracted rows: Note 16-23 = {info_16_23.rows} rows")
+    abs_output_folder = os.path.abspath(settings.output_folder)
+    logger.info(f"CSV output path: {os.path.join(abs_output_folder, 'Note_16_to_23_Full.csv')}")
+if __name__ == "__main__":
+    main()