Spaces:

point9
/

finryver-dev

Runtime error

App Files Files Community

Sahil Garg commited on Sep 20, 2025

Commit

29ee329

1 Parent(s): c00e175

udf generation is dynamic, different files, udf application on json

Browse files

Files changed (5) hide show

agents/generator_validator.py +122 -98
agents/langgraph.py +10 -2
agents/simple_tools.py +44 -2
app.py +7 -1
notes/llm_notes_generator.py +18 -16

agents/generator_validator.py CHANGED Viewed

@@ -201,58 +201,99 @@ class InteractiveFeedbackManager:
         apply_detailed_depreciation = 'depreciation' in feedback_lower and 'asset' in feedback_lower
         apply_increase_detail = 'detail' in feedback_lower
-        # Handle formula feedback specifically
         if feedback_type == 'formula':
             return self._generate_formula_udf(feedback_text, iteration)
-        # Create properly formatted UDF code
-        udf_code = f'''def apply_user_feedback_v{iteration}(notes_data, feedback_type='{feedback_type}'):
     """
-    UDF generated from user feedback iteration {iteration}
     Original Feedback: {feedback_text}
-    Type: {feedback_type}
     Generated: {datetime.now().isoformat()}
     """
-    import pandas as pd
     import re
-    # Apply feedback-based modifications
-    if notes_data and isinstance(notes_data, dict):
-        # Modify notes content based on feedback analysis
-        for sheet_name, df in notes_data.items():
-            if isinstance(df, pd.DataFrame):
-                df_copy = df.copy()
-                # Add detailed depreciation notes with asset categories
-                if {apply_detailed_depreciation}:
-                    if 'depreciation' in sheet_name.lower() or 'fixed asset' in sheet_name.lower():
-                        if len(df.columns) >= 1:
-                            # Add detailed descriptions to the first column
-                            if df_copy.columns[0] in df_copy.columns:
-                                mask = df_copy.iloc[:, 0].astype(str).str.contains('depreciation|asset', case=False, na=False)
-                                df_copy.loc[mask, df_copy.columns[0]] = df_copy.loc[mask, df_copy.columns[0]].astype(str) + \\
-                                    ' - Detailed breakdown by asset category including buildings, equipment, furniture, and motor vehicles'
-                # Increase detail level for all notes
-                if {apply_increase_detail}:
-                    if len(df.columns) >= 1 and df_copy.columns[0] in df_copy.columns:
-                        for idx in df_copy.index:
-                            if pd.notna(df_copy.iloc[idx, 0]):
-                                current_value = str(df_copy.iloc[idx, 0])
-                                if 'depreciation' in current_value.lower():
-                                    df_copy.iloc[idx, 0] = current_value + ' (Systematic allocation of asset cost over useful life)'
-                                elif 'inventory' in current_value.lower():
-                                    df_copy.iloc[idx, 0] = current_value + ' (Valued at lower of cost or net realizable value)'
-                                elif 'loans' in current_value.lower() or 'advances' in current_value.lower():
-                                    df_copy.iloc[idx, 0] = current_value + ' (Long-term financial assets with repayment terms)'
-                # Update the notes data with modified dataframe
-                notes_data[sheet_name] = df_copy
     return notes_data
 '''
-        return udf_code
     def _generate_formula_udf(self, feedback_text: str, iteration: int) -> str:
         """Generate UDF specifically for formula feedback"""
@@ -284,61 +325,46 @@ class InteractiveFeedbackManager:
     """
     UDF generated from formula feedback iteration {iteration}
     Original Feedback: {feedback_text}
-    Formula: Total = {operand1} - {operand2}
     Generated: {datetime.now().isoformat()}
     """
-    import pandas as pd
-    # Apply formula modifications
-    if notes_data and isinstance(notes_data, dict):
-        for sheet_name, df in notes_data.items():
-            if isinstance(df, pd.DataFrame) and len(df.columns) >= 2:
-                df_copy = df.copy()
-                # Look for the operands in the dataframe
-                operand1_col = None
-                operand2_col = None
-                total_col = None
-                # Find columns containing the operands
-                for col in df_copy.columns:
-                    col_str = str(col).lower()
-                    if operand1.lower() in col_str:
-                        operand1_col = col
-                    if operand2.lower() in col_str:
-                        operand2_col = col
-                    if 'total' in col_str:
-                        total_col = col
-                # If we found the operand columns, create or update total
-                if operand1_col is not None and operand2_col is not None:
-                    # Calculate the formula: operand1 - operand2
-                    try:
-                        # Convert to numeric, handling any non-numeric values
-                        op1_values = pd.to_numeric(df_copy[operand1_col], errors='coerce')
-                        op2_values = pd.to_numeric(df_copy[operand2_col], errors='coerce')
-                        # Calculate total = operand1 - operand2
-                        calculated_total = op1_values - op2_values
-                        # Add or update total column
-                        if total_col is None:
-                            # Find a good position for total column (usually after the operands)
-                            cols = list(df_copy.columns)
-                            max_idx = max(cols.index(operand1_col), cols.index(operand2_col))
-                            cols.insert(max_idx + 1, 'Total')
-                            df_copy['Total'] = calculated_total
-                            df_copy = df_copy[cols]
-                        else:
-                            df_copy[total_col] = calculated_total
-                        print(f"Applied formula: Total = {operand1} - {operand2}")
-                        print(f"Sample calculation: {{op1_values.iloc[0] if len(op1_values) > 0 else 'N/A'}} - {{op2_values.iloc[0] if len(op2_values) > 0 else 'N/A'}} = {{calculated_total.iloc[0] if len(calculated_total) > 0 else 'N/A'}}")
-                    except Exception as e:
-                        print(f"Error applying formula: {{e}}")
-                notes_data[sheet_name] = df_copy
     return notes_data
 '''
@@ -428,13 +454,11 @@ class LLMNotesGenerator(BaseGenerator):
                 result = run_rlhf_workflow(file_path, "notes-llm")
             else:
                 from agents.langgraph import run_workflow
-                result = run_workflow(file_path, "notes-llm")
             if result["status"] == "success":
-                # Apply UDFs to the result if available
-                if udfs_to_apply:
-                    result = self._apply_udfs_to_result(result, udfs_to_apply, feedback_context)
                 return GenerationResult(
                     success=True,
                     output_path=result["result"]["output_xlsx_path"],

         apply_detailed_depreciation = 'depreciation' in feedback_lower and 'asset' in feedback_lower
         apply_increase_detail = 'detail' in feedback_lower
+        # Handle different feedback types
         if feedback_type == 'formula':
             return self._generate_formula_udf(feedback_text, iteration)
+        elif feedback_type == 'text':
+            return self._generate_text_udf(feedback_text, iteration)
+        elif feedback_type == 'suggestion':
+            return self._generate_suggestion_udf(feedback_text, iteration)
+        else:
+            return self._generate_general_udf(feedback_text, feedback_type, iteration)
+    def _generate_text_udf(self, feedback_text: str, iteration: int) -> str:
+        """Generate UDF for text feedback"""
+        return f'''def apply_user_feedback_v{iteration}(notes_data, feedback_type='text'):
     """
+    UDF generated from text feedback iteration {iteration}
     Original Feedback: {feedback_text}
     Generated: {datetime.now().isoformat()}
     """
     import re
+    if notes_data and isinstance(notes_data, dict) and 'notes' in notes_data:
+        # Extract target note number
+        feedback_lower = "{feedback_text}".lower()
+        note_match = re.search(r'note\\s*(\\d+)', feedback_lower)
+        target_note = note_match.group(1) if note_match else None
+        for note in notes_data['notes']:
+            note_num = note.get('metadata', {{}}).get('note_number', '')
+            if not target_note or note_num == target_note:
+                # Add text feedback to assumptions or create user_notes field
+                if 'assumptions' in note:
+                    note['assumptions'] += f" [User Note: {feedback_text}]"
+                else:
+                    note['user_notes'] = note.get('user_notes', [])
+                    note['user_notes'].append(feedback_text)
+    return notes_data
+'''
+    def _generate_suggestion_udf(self, feedback_text: str, iteration: int) -> str:
+        """Generate UDF for suggestion feedback"""
+        return f'''def apply_user_feedback_v{iteration}(notes_data, feedback_type='suggestion'):
+    """
+    UDF generated from suggestion feedback iteration {iteration}
+    Original Feedback: {feedback_text}
+    Generated: {datetime.now().isoformat()}
+    """
+    import re
+    if notes_data and isinstance(notes_data, dict) and 'notes' in notes_data:
+        # Extract target note number
+        feedback_lower = "{feedback_text}".lower()
+        note_match = re.search(r'note\\s*(\\d+)', feedback_lower)
+        target_note = note_match.group(1) if note_match else None
+        for note in notes_data['notes']:
+            note_num = note.get('metadata', {{}}).get('note_number', '')
+            if not target_note or note_num == target_note:
+                # Apply suggestions
+                note['user_suggestions'] = note.get('user_suggestions', [])
+                note['user_suggestions'].append(feedback_text)
+                # Parse common suggestions
+                if 'add' in feedback_lower and 'breakdown' in feedback_lower:
+                    note['enhanced_breakdown'] = True
+                elif 'more detail' in feedback_lower:
+                    note['detail_level'] = 'enhanced'
     return notes_data
 '''
+    def _generate_general_udf(self, feedback_text: str, feedback_type: str, iteration: int) -> str:
+        """Generate general UDF for other feedback types"""
+        return f'''def apply_user_feedback_v{iteration}(notes_data, feedback_type='{feedback_type}'):
+    """
+    UDF generated from {feedback_type} feedback iteration {iteration}
+    Original Feedback: {feedback_text}
+    Generated: {datetime.now().isoformat()}
+    """
+    if notes_data and isinstance(notes_data, dict) and 'notes' in notes_data:
+        for note in notes_data['notes']:
+            # Apply general feedback
+            note['user_feedback'] = note.get('user_feedback', [])
+            note['user_feedback'].append({{
+                'type': '{feedback_type}',
+                'text': '{feedback_text}',
+                'iteration': {iteration}
+            }})
+    return notes_data
+'''
     def _generate_formula_udf(self, feedback_text: str, iteration: int) -> str:
         """Generate UDF specifically for formula feedback"""
     """
     UDF generated from formula feedback iteration {iteration}
     Original Feedback: {feedback_text}
     Generated: {datetime.now().isoformat()}
     """
+    import re
+    # Apply formula modifications to JSON structure
+    if notes_data and isinstance(notes_data, dict) and 'notes' in notes_data:
+        # Extract note number and formula from feedback
+        feedback_lower = "{feedback_text}".lower()
+        note_match = re.search(r'note\\s*(\\d+)', feedback_lower)
+        target_note = note_match.group(1) if note_match else None
+        # Parse formula operators
+        operand1, operand2 = "{operand1}", "{operand2}"
+        for note in notes_data['notes']:
+            note_num = note.get('metadata', {{}}).get('note_number', '')
+            if not target_note or note_num == target_note:
+                if 'structure' in note:
+                    for item in note['structure']:
+                        if 'subcategories' in item:
+                            vals = {{}}
+                            for sub in item['subcategories']:
+                                label = sub.get('label', '').lower()
+                                if operand1.lower() in label:
+                                    try:
+                                        vals[operand1] = float(sub.get('value', 0))
+                                    except:
+                                        vals[operand1] = 0
+                                elif operand2.lower() in label:
+                                    try:
+                                        vals[operand2] = float(sub.get('value', 0))
+                                    except:
+                                        vals[operand2] = 0
+                            if len(vals) == 2:
+                                result = vals[operand1] - vals[operand2]
+                                item['total'] = str(result)
+                                print(f"Applied formula in note {{note_num}}: {{vals[operand1]}} - {{vals[operand2]}} = {{result}}")
     return notes_data
 '''
                 result = run_rlhf_workflow(file_path, "notes-llm")
             else:
                 from agents.langgraph import run_workflow
+                result = run_workflow(file_path, "notes-llm", feedback_context=feedback_context)
             if result["status"] == "success":
+                # UDFs are now applied in generate_llm_notes function before Excel conversion
                 return GenerationResult(
                     success=True,
                     output_path=result["result"]["output_xlsx_path"],

agents/langgraph.py CHANGED Viewed

@@ -23,8 +23,13 @@ def make_workflow(tool_func):
     def node(state: FinancialAgentState) -> FinancialAgentState:
         state["start_time"] = time.time()
         try:
             # Use .invoke() to avoid deprecation warning
-            result = tool_func.invoke({"file_path": state["file_path"]})
             state["result"] = result
             state["status"] = "success" if result.get("status") == "success" else "error"
             state["error"] = result.get("error", "")
@@ -48,7 +53,7 @@ workflows = {
     "notes-llm": make_workflow(generate_llm_notes),
 }
-def run_workflow(file_path: str, kind: str) -> Dict[str, Any]:
     state = FinancialAgentState(
         messages=[HumanMessage(content=f"Run {kind} for {file_path}")],
         file_path=file_path,
@@ -58,5 +63,8 @@ def run_workflow(file_path: str, kind: str) -> Dict[str, Any]:
         end_time=0,
         error="",
     )
     final = workflows[kind].invoke(state)
     return final

     def node(state: FinancialAgentState) -> FinancialAgentState:
         state["start_time"] = time.time()
         try:
+            # Prepare parameters for tool invocation
+            tool_params = {"file_path": state["file_path"]}
+            # Add feedback_context if available
+            if "feedback_context" in state:
+                tool_params["feedback_context"] = state["feedback_context"]
             # Use .invoke() to avoid deprecation warning
+            result = tool_func.invoke(tool_params)
             state["result"] = result
             state["status"] = "success" if result.get("status") == "success" else "error"
             state["error"] = result.get("error", "")
     "notes-llm": make_workflow(generate_llm_notes),
 }
+def run_workflow(file_path: str, kind: str, **kwargs) -> Dict[str, Any]:
     state = FinancialAgentState(
         messages=[HumanMessage(content=f"Run {kind} for {file_path}")],
         file_path=file_path,
         end_time=0,
         error="",
     )
+    # Add feedback_context if provided
+    if "feedback_context" in kwargs:
+        state["feedback_context"] = kwargs["feedback_context"]
     final = workflows[kind].invoke(state)
     return final

agents/simple_tools.py CHANGED Viewed

@@ -9,6 +9,7 @@ import json
 import shutil
 import time
 import uuid
 from typing import Dict, Any
 import logging
@@ -371,7 +372,7 @@ def generate_cash_flow_statement(file_path: str) -> Dict[str, Any]:
         }
 @tool
-def generate_llm_notes(file_path: str, note_numbers: str = "") -> Dict[str, Any]:
     """
     Generate notes using LLM-based approach (FlexibleFinancialNoteGenerator)
     Args:
@@ -430,7 +431,12 @@ def generate_llm_notes(file_path: str, note_numbers: str = "") -> Dict[str, Any]
         # Step 3: Convert to Excel
         logger.info("Step 3: Converting to Excel format")
         input_json = "data/generated_notes/notes.json"
-        output_excel = "data/generated_notes_excel/notes.xlsx"
         # Check if the JSON file was created and has content
         if not os.path.exists(input_json):
@@ -442,6 +448,42 @@ def generate_llm_notes(file_path: str, note_numbers: str = "") -> Dict[str, Any]
                 "execution_time": execution_time
             }
         # Check if JSON file has content
         try:
             with open(input_json, 'r', encoding='utf-8') as f:

 import shutil
 import time
 import uuid
+from datetime import datetime
 from typing import Dict, Any
 import logging
         }
 @tool
+def generate_llm_notes(file_path: str, note_numbers: str = "", **kwargs) -> Dict[str, Any]:
     """
     Generate notes using LLM-based approach (FlexibleFinancialNoteGenerator)
     Args:
         # Step 3: Convert to Excel
         logger.info("Step 3: Converting to Excel format")
         input_json = "data/generated_notes/notes.json"
+        # Create unique output path in llm_generated folder
+        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
+        output_folder = "data/notes_llm_generated"
+        os.makedirs(output_folder, exist_ok=True)
+        output_excel = f"{output_folder}/new_{timestamp}_{execution_id}.xlsx"
         # Check if the JSON file was created and has content
         if not os.path.exists(input_json):
                 "execution_time": execution_time
             }
+        # Apply UDFs if provided in kwargs
+        feedback_context = kwargs.get('feedback_context', {})
+        udfs_to_apply = feedback_context.get('udfs', [])
+        if udfs_to_apply:
+            try:
+                # Load JSON data
+                with open(input_json, 'r', encoding='utf-8') as f:
+                    notes_data = json.load(f)
+                # Apply each UDF
+                for udf_code in udfs_to_apply:
+                    try:
+                        local_vars = {}
+                        exec(udf_code, {"datetime": datetime}, local_vars)
+                        # Find the UDF function
+                        udf_func = None
+                        for var_name, var_value in local_vars.items():
+                            if callable(var_value) and var_name.startswith('apply_user_feedback'):
+                                udf_func = var_value
+                                break
+                        if udf_func:
+                            notes_data = udf_func(notes_data, feedback_context.get('feedback_type', 'general'))
+                            logger.info(f"Applied UDF successfully")
+                    except Exception as e:
+                        logger.warning(f"Failed to apply UDF: {e}")
+                        continue
+                # Save modified JSON back
+                with open(input_json, 'w', encoding='utf-8') as f:
+                    json.dump(notes_data, f, ensure_ascii=False, indent=2)
+            except Exception as e:
+                logger.error(f"Error applying UDFs to JSON: {e}")
         # Check if JSON file has content
         try:
             with open(input_json, 'r', encoding='utf-8') as f:

app.py CHANGED Viewed

@@ -216,9 +216,15 @@ async def generate_with_feedback(
         pipeline = create_notes_pipeline(use_rlhf=False)
         # Prepare feedback context for the generator
         feedback_context = {
             'session_id': session_id,
-            'udfs': session.archived_udfs,  # Pass all archived UDFs
             'feedback_history': [
                 {
                     'text': f.feedback_text,

         pipeline = create_notes_pipeline(use_rlhf=False)
         # Prepare feedback context for the generator
+        udfs_to_apply = []
+        if session.final_udf:
+            udfs_to_apply.append(session.final_udf)
+        elif session.archived_udfs:
+            udfs_to_apply.extend(session.archived_udfs)
         feedback_context = {
             'session_id': session_id,
+            'udfs': udfs_to_apply,  # Pass final UDF if available, otherwise archived UDFs
             'feedback_history': [
                 {
                     'text': f.feedback_text,

notes/llm_notes_generator.py CHANGED Viewed

@@ -23,6 +23,7 @@ from typing import Dict, List, Any, Optional, Tuple
 import pandas as pd
 from pydantic import BaseModel, ValidationError
 from pydantic_settings import BaseSettings
 from utils.utils import convert_note_json_to_lakhs
 # Load environment variables
@@ -33,9 +34,8 @@ logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 class Settings(BaseSettings):
-	"""Application settings loaded from environment variables or .env file."""
-	mistral_api_key: str = os.getenv('MISTRAL_API_KEY', '')
-	api_url: str = "https://api.mistral.ai/v1/chat/completions"
 	output_dir: str = "data/generated_notes"
 	trial_balance_json: str = "data/output1/parsed_trial_balance.json"
@@ -61,20 +61,22 @@ class GeneratedNote(BaseModel):
 class FlexibleFinancialNoteGenerator:
     def __init__(self):
-        self.mistral_api_key = settings.mistral_api_key
-        if not self.mistral_api_key:
-            logger.error("MISTRAL_API_KEY not found in .env file")
-            raise ValueError("MISTRAL_API_KEY not found in .env file")
         self.api_url = settings.api_url
         self.headers = {
-            "Authorization": f"Bearer {self.mistral_api_key}",
-            "Content-Type": "application/json"
         }
         self.note_templates = self.load_note_templates()
         self.account_patterns = self._init_account_patterns()
         self.recommended_models = [
-            "mistral-large-latest",
-            "mistral-medium-latest"
         ]
     def _init_account_patterns(self) -> Dict[str, Dict[str, Any]]:
@@ -338,8 +340,8 @@ class FlexibleFinancialNoteGenerator:
         return prompt
-    def call_mistral_api(self, prompt: str) -> Optional[str]:
-        """Make API call to Mistral with model fallback"""
         for model in self.recommended_models:
             logger.info(f"Trying model: {model}")
             payload = {
@@ -357,7 +359,7 @@ class FlexibleFinancialNoteGenerator:
                     self.api_url,
                     headers=self.headers,
                     json=payload,
-                    timeout=30
                 )
                 response.raise_for_status()
                 result = response.json()
@@ -448,7 +450,7 @@ class FlexibleFinancialNoteGenerator:
             logger.error("Failed to build prompt")
             return False
-        response = self.call_mistral_api(prompt)
         if not response:
             logger.error("Failed to get API response")
             return False
@@ -473,7 +475,7 @@ class FlexibleFinancialNoteGenerator:
             if not prompt:
                 results[note_number] = False
                 continue
-            response = self.call_mistral_api(prompt)
             if not response:
                 results[note_number] = False
                 continue

 import pandas as pd
 from pydantic import BaseModel, ValidationError
 from pydantic_settings import BaseSettings
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 from utils.utils import convert_note_json_to_lakhs
 # Load environment variables
 logger = logging.getLogger(__name__)
 class Settings(BaseSettings):
+	openrouter_api_key: str = os.getenv('OPENROUTER_API_KEY', '')
+	api_url: str = "https://openrouter.ai/api/v1/chat/completions"
 	output_dir: str = "data/generated_notes"
 	trial_balance_json: str = "data/output1/parsed_trial_balance.json"
 class FlexibleFinancialNoteGenerator:
     def __init__(self):
+        self.openrouter_api_key = settings.openrouter_api_key
+        if not self.openrouter_api_key:
+            logger.error("OPENROUTER_API_KEY not found in .env file")
+            raise ValueError("OPENROUTER_API_KEY not found in .env file")
         self.api_url = settings.api_url
         self.headers = {
+            "Authorization": f"Bearer {self.openrouter_api_key}",
+            "Content-Type": "application/json",
+            "HTTP-Referer": "https://localhost:3000",
+            "X-Title": "Financial Note Generator"
         }
         self.note_templates = self.load_note_templates()
         self.account_patterns = self._init_account_patterns()
         self.recommended_models = [
+            "mistralai/mixtral-8x7b-instruct",
+            "mistralai/mistral-7b-instruct-v0.2"
         ]
     def _init_account_patterns(self) -> Dict[str, Dict[str, Any]]:
         return prompt
+    def call_openrouter_api(self, prompt: str) -> Optional[str]:
+        """Make API call to OpenRouter with model fallback"""
         for model in self.recommended_models:
             logger.info(f"Trying model: {model}")
             payload = {
                     self.api_url,
                     headers=self.headers,
                     json=payload,
+                    timeout=30  # <-- Add timeout here!
                 )
                 response.raise_for_status()
                 result = response.json()
             logger.error("Failed to build prompt")
             return False
+        response = self.call_openrouter_api(prompt)
         if not response:
             logger.error("Failed to get API response")
             return False
             if not prompt:
                 results[note_number] = False
                 continue
+            response = self.call_openrouter_api(prompt)
             if not response:
                 results[note_number] = False
                 continue