Sahil Garg commited on
Commit
6611563
·
1 Parent(s): 850182e

udf added to /notes-llm alongwith RLHF

Browse files
.gitignore CHANGED
@@ -23,6 +23,16 @@ docker-compose.dev.yml
23
  file_cleanup.py
24
  agents/langgraph_routes.py
25
 
 
 
 
 
 
 
 
 
 
 
26
  # RLHF related data
27
  data/feedback/
28
  data/models/
 
23
  file_cleanup.py
24
  agents/langgraph_routes.py
25
 
26
+ # Test and debug files
27
+ test_*.py
28
+ debug_*.py
29
+ check_*.py
30
+ *_test.py
31
+ *_debug.py
32
+ *_check.py
33
+ file_cleanup.py
34
+ restart_server.py
35
+
36
  # RLHF related data
37
  data/feedback/
38
  data/models/
agents/feedback_manager.py CHANGED
@@ -53,22 +53,6 @@ class FeedbackManager:
53
  "timestamp": time.time(),
54
  "reviewer_id": feedback.get("reviewer_id", "anonymous"),
55
 
56
- # Technical accuracy metrics
57
- "calculation_accuracy": feedback.get("calculation_accuracy"),
58
- "account_classification": feedback.get("account_classification"),
59
- "statement_balance": feedback.get("statement_balance"),
60
-
61
- # Compliance metrics
62
- "accounting_standards": feedback.get("accounting_standards"),
63
- "regulatory_compliance": feedback.get("regulatory_compliance"),
64
-
65
- # Quality metrics
66
- "completeness": feedback.get("completeness"),
67
- "professional_presentation": feedback.get("professional_presentation"),
68
-
69
- # Overall quality score (computed)
70
- "overall_score": self._compute_overall_score(feedback),
71
-
72
  # Qualitative feedback
73
  "specific_errors": feedback.get("specific_errors", ""),
74
  "missing_items": feedback.get("missing_items", ""),
@@ -102,29 +86,17 @@ class FeedbackManager:
102
  # Filter and prepare training data
103
  training_data = []
104
  for feedback in feedback_data:
105
- if feedback.get("overall_score") is not None:
106
- training_sample = {
107
- "statement_id": feedback["statement_id"],
108
- "statement_type": feedback["statement_type"],
109
- "reward_score": feedback["overall_score"],
110
- "binary_approval": feedback["would_accept_for_audit"],
111
- "technical_metrics": {
112
- "calculation_accuracy": feedback.get("calculation_accuracy"),
113
- "account_classification": feedback.get("account_classification"),
114
- "statement_balance": feedback.get("statement_balance")
115
- },
116
- "quality_metrics": {
117
- "completeness": feedback.get("completeness"),
118
- "professional_presentation": feedback.get("professional_presentation"),
119
- "accounting_standards": feedback.get("accounting_standards")
120
- },
121
- "feedback_text": {
122
- "errors": feedback.get("specific_errors", ""),
123
- "missing": feedback.get("missing_items", ""),
124
- "suggestions": feedback.get("improvement_suggestions", "")
125
- }
126
  }
127
- training_data.append(training_sample)
 
128
 
129
  return training_data
130
 
@@ -157,23 +129,20 @@ class FeedbackManager:
157
  return {"total_feedback": 0, "total_statements": len(statements)}
158
 
159
  # Calculate statistics
160
- scores = [fb["overall_score"] for fb in feedback_data if fb.get("overall_score")]
161
  audit_approvals = [fb["would_accept_for_audit"] for fb in feedback_data]
162
 
163
  stats = {
164
  "total_feedback": len(feedback_data),
165
  "total_statements": len(statements),
166
- "avg_overall_score": sum(scores) / len(scores) if scores else 0,
167
  "audit_approval_rate": sum(audit_approvals) / len(audit_approvals) if audit_approvals else 0,
168
- "feedback_by_type": {},
169
- "recent_trend": self._calculate_trend()
170
  }
171
 
172
  # Group by statement type
173
  for fb in feedback_data:
174
  stmt_type = fb.get("statement_type", "unknown")
175
  if stmt_type not in stats["feedback_by_type"]:
176
- stats["feedback_by_type"][stmt_type] = {"count": 0, "avg_score": 0}
177
  stats["feedback_by_type"][stmt_type]["count"] += 1
178
 
179
  return stats
@@ -197,52 +166,3 @@ class FeedbackManager:
197
  except (json.JSONDecodeError, FileNotFoundError):
198
  logger.warning("Could not load statements database, starting fresh")
199
  return []
200
-
201
- def _compute_overall_score(self, feedback: Dict[str, Any]) -> float:
202
- """Compute overall quality score from individual metrics"""
203
- metrics = [
204
- feedback.get("calculation_accuracy"),
205
- feedback.get("account_classification"),
206
- feedback.get("statement_balance"),
207
- feedback.get("accounting_standards"),
208
- feedback.get("regulatory_compliance"),
209
- feedback.get("completeness"),
210
- feedback.get("professional_presentation")
211
- ]
212
-
213
- # Filter out None values
214
- valid_metrics = [m for m in metrics if m is not None]
215
-
216
- if not valid_metrics:
217
- return 0.0
218
-
219
- return sum(valid_metrics) / len(valid_metrics)
220
-
221
- def _calculate_trend(self) -> Dict[str, float]:
222
- """Calculate recent feedback trend"""
223
- feedback_data = self._load_feedback()
224
-
225
- if len(feedback_data) < 5:
226
- return {"trend": "insufficient_data"}
227
-
228
- # Sort by timestamp
229
- sorted_feedback = sorted(feedback_data, key=lambda x: x.get("timestamp", 0))
230
-
231
- # Compare recent vs older feedback
232
- mid_point = len(sorted_feedback) // 2
233
- older_scores = [fb["overall_score"] for fb in sorted_feedback[:mid_point] if fb.get("overall_score")]
234
- recent_scores = [fb["overall_score"] for fb in sorted_feedback[mid_point:] if fb.get("overall_score")]
235
-
236
- if older_scores and recent_scores:
237
- older_avg = sum(older_scores) / len(older_scores)
238
- recent_avg = sum(recent_scores) / len(recent_scores)
239
- improvement = recent_avg - older_avg
240
-
241
- return {
242
- "older_average": older_avg,
243
- "recent_average": recent_avg,
244
- "improvement": improvement,
245
- "trend": "improving" if improvement > 0.1 else "stable" if abs(improvement) <= 0.1 else "declining"
246
- }
247
-
248
- return {"trend": "insufficient_data"}
 
53
  "timestamp": time.time(),
54
  "reviewer_id": feedback.get("reviewer_id", "anonymous"),
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  # Qualitative feedback
57
  "specific_errors": feedback.get("specific_errors", ""),
58
  "missing_items": feedback.get("missing_items", ""),
 
86
  # Filter and prepare training data
87
  training_data = []
88
  for feedback in feedback_data:
89
+ training_sample = {
90
+ "statement_id": feedback["statement_id"],
91
+ "statement_type": feedback["statement_type"],
92
+ "binary_approval": feedback["would_accept_for_audit"],
93
+ "feedback_text": {
94
+ "errors": feedback.get("specific_errors", ""),
95
+ "missing": feedback.get("missing_items", ""),
96
+ "suggestions": feedback.get("improvement_suggestions", "")
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  }
98
+ }
99
+ training_data.append(training_sample)
100
 
101
  return training_data
102
 
 
129
  return {"total_feedback": 0, "total_statements": len(statements)}
130
 
131
  # Calculate statistics
 
132
  audit_approvals = [fb["would_accept_for_audit"] for fb in feedback_data]
133
 
134
  stats = {
135
  "total_feedback": len(feedback_data),
136
  "total_statements": len(statements),
 
137
  "audit_approval_rate": sum(audit_approvals) / len(audit_approvals) if audit_approvals else 0,
138
+ "feedback_by_type": {}
 
139
  }
140
 
141
  # Group by statement type
142
  for fb in feedback_data:
143
  stmt_type = fb.get("statement_type", "unknown")
144
  if stmt_type not in stats["feedback_by_type"]:
145
+ stats["feedback_by_type"][stmt_type] = {"count": 0}
146
  stats["feedback_by_type"][stmt_type]["count"] += 1
147
 
148
  return stats
 
166
  except (json.JSONDecodeError, FileNotFoundError):
167
  logger.warning("Could not load statements database, starting fresh")
168
  return []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
agents/generator_validator.py CHANGED
@@ -9,8 +9,6 @@ from abc import ABC, abstractmethod
9
  from typing import Dict, Any, List, Optional, Tuple
10
  from dataclasses import dataclass
11
  from datetime import datetime
12
- import subprocess
13
- import shutil
14
  import uuid
15
 
16
  logger = logging.getLogger(__name__)
@@ -76,9 +74,14 @@ class InteractiveFeedbackManager:
76
  # Convert datetime strings back to datetime objects
77
  session_data['created_at'] = datetime.fromisoformat(session_data['created_at'])
78
  session_data['last_updated'] = datetime.fromisoformat(session_data['last_updated'])
79
- # Convert feedback history timestamps
80
- for feedback in session_data['feedback_history']:
81
- feedback['timestamp'] = datetime.fromisoformat(feedback['timestamp'])
 
 
 
 
 
82
  self.sessions[session_id] = InteractiveSession(**session_data)
83
  except Exception as e:
84
  logger.error(f"Failed to load sessions: {e}")
@@ -90,28 +93,32 @@ class InteractiveFeedbackManager:
90
  try:
91
  data = {}
92
  for session_id, session in self.sessions.items():
93
- session_dict = {
94
- 'session_id': session.session_id,
95
- 'original_file_path': session.original_file_path,
96
- 'current_iteration': session.current_iteration,
97
- 'feedback_history': [
98
- {
99
- 'session_id': f.session_id,
100
- 'feedback_text': f.feedback_text,
101
- 'feedback_type': f.feedback_type,
102
- 'iteration_number': f.iteration_number,
103
- 'timestamp': f.timestamp.isoformat(),
104
- 'changes_description': f.changes_description,
105
- 'udf_function': f.udf_function,
106
- 'udf_version': f.udf_version
107
- } for f in session.feedback_history
108
- ],
109
- 'archived_udfs': session.archived_udfs,
110
- 'final_udf': session.final_udf,
111
- 'status': session.status,
112
- 'created_at': session.created_at.isoformat(),
113
- 'last_updated': session.last_updated.isoformat()
114
- }
 
 
 
 
115
  data[session_id] = session_dict
116
 
117
  with open(self.sessions_file, 'w') as f:
@@ -186,34 +193,186 @@ class InteractiveFeedbackManager:
186
  return self.sessions.get(session_id)
187
 
188
  def _generate_udf_from_feedback(self, feedback_text: str, feedback_type: str, iteration: int) -> str:
189
- """Generate UDF function based on user feedback"""
190
- # This is a simplified UDF generation - in practice, this would use LLM to create proper functions
191
- udf_template = f"""
192
- def apply_user_feedback_v{iteration}(notes_data, feedback_type='{feedback_type}'):
193
- '''
 
 
 
 
 
 
 
 
 
 
194
  UDF generated from user feedback iteration {iteration}
195
- Feedback: {feedback_text}
196
  Type: {feedback_type}
197
  Generated: {datetime.now().isoformat()}
198
- '''
 
 
 
199
  # Apply feedback-based modifications
200
- if '{feedback_type}' == 'numeric':
201
- # Handle numeric feedback (e.g., adjust calculations)
202
- pass
203
- elif '{feedback_type}' == 'formula':
204
- # Handle formula suggestions
205
- pass
206
- elif '{feedback_type}' == 'text':
207
- # Handle text-based suggestions
208
- pass
209
- elif '{feedback_type}' == 'suggestion':
210
- # Handle general suggestions
211
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
 
213
  return notes_data
214
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
 
216
- return udf_template.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
 
218
  class BaseGenerator(ABC):
219
  """Abstract base class for financial statement generators"""
@@ -253,11 +412,16 @@ class LLMNotesGenerator(BaseGenerator):
253
  self.use_rlhf = use_rlhf
254
 
255
  def generate(self, file_path: str, **kwargs) -> GenerationResult:
256
- """Generate notes using AI/LLM approach"""
257
  try:
258
  self.attempts_made += 1
259
  execution_id = f"notes_llm_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{self.attempts_made}"
260
 
 
 
 
 
 
261
  # Choose workflow based on RLHF preference
262
  if self.use_rlhf:
263
  from agents.rlhf_workflows import run_rlhf_workflow
@@ -267,6 +431,10 @@ class LLMNotesGenerator(BaseGenerator):
267
  result = run_workflow(file_path, "notes-llm")
268
 
269
  if result["status"] == "success":
 
 
 
 
270
  return GenerationResult(
271
  success=True,
272
  output_path=result["result"]["output_xlsx_path"],
@@ -277,7 +445,10 @@ class LLMNotesGenerator(BaseGenerator):
277
  "generation_method": "llm",
278
  "use_rlhf": self.use_rlhf,
279
  "attempt": self.attempts_made,
280
- "rlhf_metadata": result["result"].get("rlhf_metadata", {})
 
 
 
281
  }
282
  )
283
  else:
@@ -309,6 +480,37 @@ class LLMNotesGenerator(BaseGenerator):
309
  }
310
  )
311
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
312
  def refine(self, previous_result: GenerationResult, feedback: List[str]) -> GenerationResult:
313
  """Refine LLM notes generation based on feedback"""
314
  logger.info(f"Refining LLM notes generation with feedback: {feedback}")
 
9
  from typing import Dict, Any, List, Optional, Tuple
10
  from dataclasses import dataclass
11
  from datetime import datetime
 
 
12
  import uuid
13
 
14
  logger = logging.getLogger(__name__)
 
74
  # Convert datetime strings back to datetime objects
75
  session_data['created_at'] = datetime.fromisoformat(session_data['created_at'])
76
  session_data['last_updated'] = datetime.fromisoformat(session_data['last_updated'])
77
+
78
+ # Convert feedback history dictionaries back to FeedbackData objects
79
+ feedback_objects = []
80
+ for feedback_dict in session_data['feedback_history']:
81
+ feedback_dict['timestamp'] = datetime.fromisoformat(feedback_dict['timestamp'])
82
+ feedback_objects.append(FeedbackData(**feedback_dict))
83
+ session_data['feedback_history'] = feedback_objects
84
+
85
  self.sessions[session_id] = InteractiveSession(**session_data)
86
  except Exception as e:
87
  logger.error(f"Failed to load sessions: {e}")
 
93
  try:
94
  data = {}
95
  for session_id, session in self.sessions.items():
96
+ # Handle case where session might be a dict instead of InteractiveSession object
97
+ if isinstance(session, dict):
98
+ session_dict = session
99
+ else:
100
+ session_dict = {
101
+ 'session_id': session.session_id,
102
+ 'original_file_path': session.original_file_path,
103
+ 'current_iteration': session.current_iteration,
104
+ 'feedback_history': [
105
+ {
106
+ 'session_id': f.session_id,
107
+ 'feedback_text': f.feedback_text,
108
+ 'feedback_type': f.feedback_type,
109
+ 'iteration_number': f.iteration_number,
110
+ 'timestamp': f.timestamp.isoformat(),
111
+ 'changes_description': f.changes_description,
112
+ 'udf_function': f.udf_function,
113
+ 'udf_version': f.udf_version
114
+ } for f in session.feedback_history
115
+ ],
116
+ 'archived_udfs': session.archived_udfs,
117
+ 'final_udf': session.final_udf,
118
+ 'status': session.status,
119
+ 'created_at': session.created_at.isoformat(),
120
+ 'last_updated': session.last_updated.isoformat()
121
+ }
122
  data[session_id] = session_dict
123
 
124
  with open(self.sessions_file, 'w') as f:
 
193
  return self.sessions.get(session_id)
194
 
195
  def _generate_udf_from_feedback(self, feedback_text: str, feedback_type: str, iteration: int) -> str:
196
+ """Generate UDF function based on user feedback with actual analysis"""
197
+ # Analyze feedback content and create meaningful modifications
198
+ feedback_lower = feedback_text.lower()
199
+
200
+ # Determine what modifications to apply based on feedback
201
+ apply_detailed_depreciation = 'depreciation' in feedback_lower and 'asset' in feedback_lower
202
+ apply_increase_detail = 'detail' in feedback_lower
203
+
204
+ # Handle formula feedback specifically
205
+ if feedback_type == 'formula':
206
+ return self._generate_formula_udf(feedback_text, iteration)
207
+
208
+ # Create properly formatted UDF code
209
+ udf_code = f'''def apply_user_feedback_v{iteration}(notes_data, feedback_type='{feedback_type}'):
210
+ """
211
  UDF generated from user feedback iteration {iteration}
212
+ Original Feedback: {feedback_text}
213
  Type: {feedback_type}
214
  Generated: {datetime.now().isoformat()}
215
+ """
216
+ import pandas as pd
217
+ import re
218
+
219
  # Apply feedback-based modifications
220
+ if notes_data and isinstance(notes_data, dict):
221
+ # Modify notes content based on feedback analysis
222
+ for sheet_name, df in notes_data.items():
223
+ if isinstance(df, pd.DataFrame):
224
+ df_copy = df.copy()
225
+
226
+ # Add detailed depreciation notes with asset categories
227
+ if {apply_detailed_depreciation}:
228
+ if 'depreciation' in sheet_name.lower() or 'fixed asset' in sheet_name.lower():
229
+ if len(df.columns) >= 1:
230
+ # Add detailed descriptions to the first column
231
+ if df_copy.columns[0] in df_copy.columns:
232
+ mask = df_copy.iloc[:, 0].astype(str).str.contains('depreciation|asset', case=False, na=False)
233
+ df_copy.loc[mask, df_copy.columns[0]] = df_copy.loc[mask, df_copy.columns[0]].astype(str) + \\
234
+ ' - Detailed breakdown by asset category including buildings, equipment, furniture, and motor vehicles'
235
+
236
+ # Increase detail level for all notes
237
+ if {apply_increase_detail}:
238
+ if len(df.columns) >= 1 and df_copy.columns[0] in df_copy.columns:
239
+ for idx in df_copy.index:
240
+ if pd.notna(df_copy.iloc[idx, 0]):
241
+ current_value = str(df_copy.iloc[idx, 0])
242
+ if 'depreciation' in current_value.lower():
243
+ df_copy.iloc[idx, 0] = current_value + ' (Systematic allocation of asset cost over useful life)'
244
+ elif 'inventory' in current_value.lower():
245
+ df_copy.iloc[idx, 0] = current_value + ' (Valued at lower of cost or net realizable value)'
246
+ elif 'loans' in current_value.lower() or 'advances' in current_value.lower():
247
+ df_copy.iloc[idx, 0] = current_value + ' (Long-term financial assets with repayment terms)'
248
+
249
+ # Update the notes data with modified dataframe
250
+ notes_data[sheet_name] = df_copy
251
 
252
  return notes_data
253
+ '''
254
+
255
+ return udf_code
256
+
257
+ def _generate_formula_udf(self, feedback_text: str, iteration: int) -> str:
258
+ """Generate UDF specifically for formula feedback"""
259
+ import re
260
+
261
+ # Parse the formula from feedback text
262
+ # First try the flexible pattern that captures full operand names
263
+ formula_match = re.search(r'=\s*([^-\n]+)\s*-\s*([^\n]+)', feedback_text, re.IGNORECASE)
264
+ if formula_match:
265
+ operand1 = formula_match.group(1).strip()
266
+ operand2 = formula_match.group(2).strip()
267
+ else:
268
+ # Fallback to other patterns
269
+ formula_match = re.search(r'total\s*=\s*(.+?)\s*-\s*(.+?)(?:\s|$)', feedback_text, re.IGNORECASE)
270
+ if formula_match:
271
+ operand1 = formula_match.group(1).strip()
272
+ operand2 = formula_match.group(2).strip()
273
+ else:
274
+ formula_match = re.search(r'(.+?)\s*-\s*(.+?)\s*=\s*total', feedback_text, re.IGNORECASE)
275
+ if formula_match:
276
+ operand1 = formula_match.group(1).strip()
277
+ operand2 = formula_match.group(2).strip()
278
+
279
+ if formula_match:
280
+ operand1 = formula_match.group(1).strip()
281
+ operand2 = formula_match.group(2).strip()
282
+
283
+ udf_code = f'''def apply_user_feedback_v{iteration}(notes_data, feedback_type='formula'):
284
+ """
285
+ UDF generated from formula feedback iteration {iteration}
286
+ Original Feedback: {feedback_text}
287
+ Formula: Total = {operand1} - {operand2}
288
+ Generated: {datetime.now().isoformat()}
289
+ """
290
+ import pandas as pd
291
+
292
+ # Apply formula modifications
293
+ if notes_data and isinstance(notes_data, dict):
294
+ for sheet_name, df in notes_data.items():
295
+ if isinstance(df, pd.DataFrame) and len(df.columns) >= 2:
296
+ df_copy = df.copy()
297
+
298
+ # Look for the operands in the dataframe
299
+ operand1_col = None
300
+ operand2_col = None
301
+ total_col = None
302
+
303
+ # Find columns containing the operands
304
+ for col in df_copy.columns:
305
+ col_str = str(col).lower()
306
+ if operand1.lower() in col_str:
307
+ operand1_col = col
308
+ if operand2.lower() in col_str:
309
+ operand2_col = col
310
+ if 'total' in col_str:
311
+ total_col = col
312
+
313
+ # If we found the operand columns, create or update total
314
+ if operand1_col is not None and operand2_col is not None:
315
+ # Calculate the formula: operand1 - operand2
316
+ try:
317
+ # Convert to numeric, handling any non-numeric values
318
+ op1_values = pd.to_numeric(df_copy[operand1_col], errors='coerce')
319
+ op2_values = pd.to_numeric(df_copy[operand2_col], errors='coerce')
320
+
321
+ # Calculate total = operand1 - operand2
322
+ calculated_total = op1_values - op2_values
323
+
324
+ # Add or update total column
325
+ if total_col is None:
326
+ # Find a good position for total column (usually after the operands)
327
+ cols = list(df_copy.columns)
328
+ max_idx = max(cols.index(operand1_col), cols.index(operand2_col))
329
+ cols.insert(max_idx + 1, 'Total')
330
+ df_copy['Total'] = calculated_total
331
+ df_copy = df_copy[cols]
332
+ else:
333
+ df_copy[total_col] = calculated_total
334
+
335
+ print(f"Applied formula: Total = {operand1} - {operand2}")
336
+ print(f"Sample calculation: {{op1_values.iloc[0] if len(op1_values) > 0 else 'N/A'}} - {{op2_values.iloc[0] if len(op2_values) > 0 else 'N/A'}} = {{calculated_total.iloc[0] if len(calculated_total) > 0 else 'N/A'}}")
337
+
338
+ except Exception as e:
339
+ print(f"Error applying formula: {{e}}")
340
+
341
+ notes_data[sheet_name] = df_copy
342
 
343
+ return notes_data
344
+ '''
345
+ else:
346
+ # Fallback for unrecognized formula patterns
347
+ udf_code = f'''def apply_user_feedback_v{iteration}(notes_data, feedback_type='formula'):
348
+ """
349
+ UDF generated from formula feedback iteration {iteration}
350
+ Original Feedback: {feedback_text}
351
+ Generated: {datetime.now().isoformat()}
352
+ Note: Could not parse formula pattern, applying general enhancement
353
+ """
354
+ import pandas as pd
355
+
356
+ # Apply general formula-related enhancements
357
+ if notes_data and isinstance(notes_data, dict):
358
+ for sheet_name, df in notes_data.items():
359
+ if isinstance(df, pd.DataFrame):
360
+ df_copy = df.copy()
361
+
362
+ # Add formula indicators to relevant cells
363
+ if len(df.columns) >= 1:
364
+ for idx in df_copy.index:
365
+ if pd.notna(df_copy.iloc[idx, 0]):
366
+ cell_value = str(df_copy.iloc[idx, 0])
367
+ if 'total' in cell_value.lower():
368
+ df_copy.iloc[idx, 0] = cell_value + ' (Calculated field)'
369
+
370
+ notes_data[sheet_name] = df_copy
371
+
372
+ return notes_data
373
+ '''
374
+
375
+ return udf_code
376
 
377
  class BaseGenerator(ABC):
378
  """Abstract base class for financial statement generators"""
 
412
  self.use_rlhf = use_rlhf
413
 
414
  def generate(self, file_path: str, **kwargs) -> GenerationResult:
415
+ """Generate notes using AI/LLM approach with feedback integration"""
416
  try:
417
  self.attempts_made += 1
418
  execution_id = f"notes_llm_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{self.attempts_made}"
419
 
420
+ # Check for feedback context
421
+ feedback_context = kwargs.get('feedback_context', {})
422
+ session_id = feedback_context.get('session_id')
423
+ udfs_to_apply = feedback_context.get('udfs', [])
424
+
425
  # Choose workflow based on RLHF preference
426
  if self.use_rlhf:
427
  from agents.rlhf_workflows import run_rlhf_workflow
 
431
  result = run_workflow(file_path, "notes-llm")
432
 
433
  if result["status"] == "success":
434
+ # Apply UDFs to the result if available
435
+ if udfs_to_apply:
436
+ result = self._apply_udfs_to_result(result, udfs_to_apply, feedback_context)
437
+
438
  return GenerationResult(
439
  success=True,
440
  output_path=result["result"]["output_xlsx_path"],
 
445
  "generation_method": "llm",
446
  "use_rlhf": self.use_rlhf,
447
  "attempt": self.attempts_made,
448
+ "rlhf_metadata": result["result"].get("rlhf_metadata", {}),
449
+ "feedback_applied": bool(udfs_to_apply),
450
+ "udfs_applied_count": len(udfs_to_apply),
451
+ "session_id": session_id
452
  }
453
  )
454
  else:
 
480
  }
481
  )
482
 
483
+ def _apply_udfs_to_result(self, result: Dict[str, Any], udfs: List[str], feedback_context: Dict[str, Any]) -> Dict[str, Any]:
484
+ """Apply UDFs to the generation result"""
485
+ try:
486
+ # Execute each UDF and apply modifications
487
+ for udf_code in udfs:
488
+ try:
489
+ # Create a local namespace for UDF execution
490
+ local_vars = {}
491
+ exec(udf_code, {"datetime": datetime}, local_vars)
492
+
493
+ # Find the UDF function (it will be the last defined function)
494
+ udf_func = None
495
+ for var_name, var_value in local_vars.items():
496
+ if callable(var_value) and var_name.startswith('apply_user_feedback'):
497
+ udf_func = var_value
498
+ break
499
+
500
+ if udf_func:
501
+ # Apply the UDF to the result data
502
+ result["result"] = udf_func(result["result"], feedback_context.get('feedback_type', 'general'))
503
+
504
+ except Exception as e:
505
+ logger.warning(f"Failed to apply UDF: {e}")
506
+ continue
507
+
508
+ return result
509
+
510
+ except Exception as e:
511
+ logger.error(f"Error applying UDFs: {e}")
512
+ return result
513
+
514
  def refine(self, previous_result: GenerationResult, feedback: List[str]) -> GenerationResult:
515
  """Refine LLM notes generation based on feedback"""
516
  logger.info(f"Refining LLM notes generation with feedback: {feedback}")
agents/reward_model.py CHANGED
@@ -1,307 +1,131 @@
1
  """
2
- RLHF Reward Model for FinRyver
3
- Predicts quality scores for generated financial statements based on human feedback
4
  """
5
  import json
6
  import os
7
  import logging
8
- from typing import Dict, Any, List, Optional, Tuple
9
- import numpy as np
10
- from sklearn.ensemble import RandomForestRegressor
11
- from sklearn.model_selection import train_test_split
12
- from sklearn.metrics import mean_squared_error, r2_score
13
- import joblib
14
  import time
15
 
16
  logger = logging.getLogger(__name__)
17
 
18
- class FinancialRewardModel:
19
  """
20
- Reward model that predicts quality scores for financial statements
21
- Uses traditional ML initially, can be upgraded to transformer-based models
22
  """
23
-
24
  def __init__(self, model_dir: str = "data/models"):
25
  self.model_dir = model_dir
26
- self.model_path = os.path.join(model_dir, "reward_model.pkl")
27
- self.feature_names_path = os.path.join(model_dir, "feature_names.json")
28
- self.model_stats_path = os.path.join(model_dir, "model_stats.json")
29
-
30
  os.makedirs(model_dir, exist_ok=True)
31
-
32
- # Initialize model
33
- self.model = RandomForestRegressor(
34
- n_estimators=100,
35
- max_depth=10,
36
- random_state=42,
37
- n_jobs=-1
38
- )
39
-
40
- self.feature_names = []
41
  self.is_trained = False
42
- self.model_version = "1.0"
43
-
44
- # Load existing model if available
45
- self._load_model()
46
-
47
- def extract_features(self, statement_data: Dict[str, Any], statement_content: str = "") -> np.ndarray:
48
- """Extract features from statement data for reward prediction"""
49
- features = []
50
-
51
- # Basic metadata features
52
- features.append(len(statement_content)) # Content length
53
- features.append(statement_data.get("generation_time", 0)) # Generation time
54
- features.append(1 if statement_data.get("statement_type") == "notes" else 0)
55
- features.append(1 if statement_data.get("statement_type") == "balance_sheet" else 0)
56
- features.append(1 if statement_data.get("statement_type") == "pnl" else 0)
57
- features.append(1 if statement_data.get("statement_type") == "cash_flow" else 0)
58
-
59
- # Content-based features (simple heuristics)
60
- if statement_content:
61
- features.append(statement_content.count("$")) # Number of monetary values
62
- features.append(statement_content.count("\n")) # Number of lines
63
- features.append(len(statement_content.split())) # Word count
64
- features.append(statement_content.count(".")) # Number of sentences
65
- features.append(statement_content.count(",")) # Number of commas (complexity indicator)
66
-
67
- # Financial keywords
68
- financial_keywords = ["asset", "liability", "equity", "revenue", "expense", "cash", "account"]
69
- keyword_count = sum(statement_content.lower().count(keyword) for keyword in financial_keywords)
70
- features.append(keyword_count)
71
-
72
- # Professional language indicators
73
- professional_words = ["accordance", "pursuant", "whereas", "therefore", "respective"]
74
- professional_count = sum(statement_content.lower().count(word) for word in professional_words)
75
- features.append(professional_count)
76
- else:
77
- # Default values if no content available
78
- features.extend([0] * 7)
79
-
80
- # File-based features (if available)
81
- metadata = statement_data.get("metadata", {})
82
- features.append(metadata.get("file_size", 0))
83
- features.append(metadata.get("num_accounts", 0))
84
- features.append(metadata.get("complexity_score", 0))
85
-
86
- # Ensure we have consistent feature names
87
- if not self.feature_names:
88
- self.feature_names = [
89
- "content_length", "generation_time", "is_notes", "is_balance_sheet",
90
- "is_pnl", "is_cash_flow", "monetary_values", "line_count",
91
- "word_count", "sentence_count", "comma_count", "financial_keywords",
92
- "professional_words", "file_size", "num_accounts", "complexity_score"
93
- ]
94
-
95
- return np.array(features).reshape(1, -1)
96
-
97
- def train_reward_model(self, training_data: List[Dict[str, Any]]) -> Dict[str, float]:
98
- """Train reward model from human feedback data"""
99
- if len(training_data) < 2: # Lowered from 10 to 2 for testing
100
- logger.warning(f"Insufficient training data: {len(training_data)} samples")
101
- return {"error": "insufficient_data", "sample_count": len(training_data)}
102
-
103
- # Prepare training data
104
- X = []
105
- y = []
106
-
107
- for sample in training_data:
108
- # Create dummy statement data for feature extraction
109
- statement_data = {
110
- "statement_type": sample.get("statement_type", "unknown"),
111
- "generation_time": sample.get("generation_time", 0),
112
- "metadata": sample.get("metadata", {})
113
- }
114
-
115
- # Extract features
116
- features = self.extract_features(statement_data, "")
117
- X.append(features.flatten())
118
- y.append(sample["reward_score"])
119
-
120
- X = np.array(X)
121
- y = np.array(y)
122
-
123
- # Split data
124
- if len(X) > 20:
125
- X_train, X_test, y_train, y_test = train_test_split(
126
- X, y, test_size=0.2, random_state=42
127
- )
128
- else:
129
- X_train, X_test, y_train, y_test = X, X, y, y
130
-
131
- # Train model
132
- logger.info(f"Training reward model with {len(X_train)} samples")
133
- self.model.fit(X_train, y_train)
134
-
135
- # Evaluate model
136
- train_pred = self.model.predict(X_train)
137
- test_pred = self.model.predict(X_test)
138
-
139
- metrics = {
140
- "train_mse": mean_squared_error(y_train, train_pred),
141
- "test_mse": mean_squared_error(y_test, test_pred),
142
- "train_r2": r2_score(y_train, train_pred),
143
- "test_r2": r2_score(y_test, test_pred),
144
- "sample_count": len(training_data),
145
- "feature_importance": dict(zip(self.feature_names, self.model.feature_importances_))
146
  }
147
-
148
- self.is_trained = True
149
-
150
- # Save model
151
- self._save_model(metrics)
152
-
153
- logger.info(f"Reward model trained. R2 score: {metrics['test_r2']:.3f}")
154
- return metrics
155
-
156
- def predict_reward(self, statement_data: Dict[str, Any], statement_content: str = "") -> float:
157
- """Predict reward score for a generated financial statement"""
158
- if not self.is_trained:
159
- logger.warning("Reward model not trained, returning default score")
160
- return 3.0 # Default neutral score
161
-
162
- try:
163
- features = self.extract_features(statement_data, statement_content)
164
- reward = self.model.predict(features)[0]
165
-
166
- # Clamp to valid range [1, 5]
167
- reward = max(1.0, min(5.0, reward))
168
-
169
- return float(reward)
170
-
171
- except Exception as e:
172
- logger.error(f"Error predicting reward: {e}")
173
- return 3.0 # Default score on error
174
-
175
- def predict_with_confidence(self, statement_data: Dict[str, Any], statement_content: str = "") -> Tuple[float, float]:
176
- """Predict reward with confidence interval"""
177
- if not self.is_trained:
178
- return 3.0, 0.0
179
-
180
- try:
181
- features = self.extract_features(statement_data, statement_content)
182
-
183
- # For Random Forest, we can get prediction from all trees
184
- tree_predictions = [tree.predict(features)[0] for tree in self.model.estimators_]
185
-
186
- reward = np.mean(tree_predictions)
187
- confidence = 1.0 / (1.0 + np.std(tree_predictions)) # Higher std = lower confidence
188
-
189
- reward = max(1.0, min(5.0, reward))
190
-
191
- return float(reward), float(confidence)
192
-
193
- except Exception as e:
194
- logger.error(f"Error predicting reward with confidence: {e}")
195
- return 3.0, 0.0
196
-
197
- def get_feature_importance(self) -> Dict[str, float]:
198
- """Get feature importance from trained model"""
199
- if not self.is_trained:
200
- return {}
201
-
202
- return dict(zip(self.feature_names, self.model.feature_importances_))
203
-
204
- def get_model_stats(self) -> Dict[str, Any]:
205
- """Get model training statistics"""
206
- if os.path.exists(self.model_stats_path):
207
- try:
208
- with open(self.model_stats_path, "r") as f:
209
- return json.load(f)
210
- except:
211
- pass
212
- return {"status": "not_trained"}
213
-
214
- def _save_model(self, training_stats: Dict[str, Any]):
215
- """Save trained model and metadata"""
216
- try:
217
- # Save model
218
- joblib.dump(self.model, self.model_path)
219
-
220
- # Save feature names
221
- with open(self.feature_names_path, "w") as f:
222
- json.dump(self.feature_names, f)
223
-
224
- # Save training stats
225
- stats = {
226
- "model_version": self.model_version,
227
- "training_timestamp": time.time(),
228
- "is_trained": True,
229
- **training_stats
230
- }
231
-
232
- with open(self.model_stats_path, "w") as f:
233
- json.dump(stats, f, indent=2)
234
-
235
- logger.info("Reward model saved successfully")
236
-
237
- except Exception as e:
238
- logger.error(f"Error saving model: {e}")
239
-
240
- def _load_model(self):
241
- """Load existing trained model"""
242
- try:
243
- if os.path.exists(self.model_path) and os.path.exists(self.feature_names_path):
244
- self.model = joblib.load(self.model_path)
245
-
246
- with open(self.feature_names_path, "r") as f:
247
- self.feature_names = json.load(f)
248
-
249
- self.is_trained = True
250
- logger.info("Existing reward model loaded successfully")
251
-
252
- except Exception as e:
253
- logger.warning(f"Could not load existing model: {e}")
254
- self.is_trained = False
255
 
 
 
256
 
257
- class RLHFTrainer:
258
- """Coordinates RLHF training pipeline"""
259
-
260
- def __init__(self, feedback_manager, reward_model):
261
- self.feedback_manager = feedback_manager
262
- self.reward_model = reward_model
263
- self.min_feedback_threshold = 2 # Lowered for testing (was 20)
264
-
265
- def should_retrain(self) -> bool:
266
- """Determine if model should be retrained"""
267
- stats = self.feedback_manager.get_feedback_stats()
268
-
269
- # Check if we have enough new feedback
270
- total_feedback = stats.get("total_feedback", 0)
271
-
272
- # Get last training count
273
- model_stats = self.reward_model.get_model_stats()
274
- last_training_count = model_stats.get("sample_count", 0)
275
-
276
- new_feedback_count = total_feedback - last_training_count
277
-
278
- return (total_feedback >= self.min_feedback_threshold and
279
- new_feedback_count >= 2) # At least 2 new samples (was 10)
280
-
281
- def retrain_model(self) -> Dict[str, Any]:
282
- """Retrain reward model with latest feedback"""
283
- training_data = self.feedback_manager.get_training_data()
284
-
285
- if len(training_data) < self.min_feedback_threshold:
286
- return {
287
- "status": "insufficient_data",
288
- "current_count": len(training_data),
289
- "required_count": self.min_feedback_threshold
290
- }
291
-
292
- # Train model
293
- metrics = self.reward_model.train_reward_model(training_data)
294
-
295
  return {
296
  "status": "success",
297
- "training_metrics": metrics,
298
- "timestamp": time.time()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299
  }
300
-
301
- def periodic_training_check(self) -> Dict[str, Any]:
302
- """Check if retraining is needed and perform if necessary"""
303
- if self.should_retrain():
304
- logger.info("Initiating automatic model retraining")
305
- return self.retrain_model()
306
- else:
307
- return {"status": "no_retraining_needed"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """
2
+ Enhanced Text-Based RLHF Reward Model for FinRyver
3
+ Focuses on collecting and analyzing specific feedback content instead of predicting quality scores
4
  """
5
  import json
6
  import os
7
  import logging
8
+ from typing import Dict, Any, List, Optional
 
 
 
 
 
9
  import time
10
 
11
  logger = logging.getLogger(__name__)
12
 
13
+ class TextBasedRewardModel:
14
  """
15
+ Simple reward model that collects and analyzes text-based feedback
 
16
  """
17
+
18
  def __init__(self, model_dir: str = "data/models"):
19
  self.model_dir = model_dir
20
+ self.feedback_data_path = os.path.join(model_dir, "feedback_data.json")
21
+
 
 
22
  os.makedirs(model_dir, exist_ok=True)
23
+
24
+ self.feedback_data = []
 
 
 
 
 
 
 
 
25
  self.is_trained = False
26
+ self.model_version = "2.0-text-based"
27
+
28
+ # Load existing feedback data if available
29
+ self._load_feedback_data()
30
+
31
+ def collect_feedback(self, feedback_data: Dict[str, Any]) -> Dict[str, Any]:
32
+ """Collect and store text-based feedback"""
33
+
34
+ # Validate that we have text feedback
35
+ text_feedback = []
36
+ if feedback_data.get('specific_errors', '').strip():
37
+ text_feedback.append(feedback_data['specific_errors'])
38
+ if feedback_data.get('missing_items', '').strip():
39
+ text_feedback.append(feedback_data['missing_items'])
40
+ if feedback_data.get('improvement_suggestions', '').strip():
41
+ text_feedback.append(feedback_data['improvement_suggestions'])
42
+
43
+ if not text_feedback:
44
+ return {"error": "No text feedback provided"}
45
+
46
+ # Store feedback
47
+ feedback_entry = {
48
+ "timestamp": time.time(),
49
+ "statement_id": feedback_data.get("statement_id"),
50
+ "reviewer_id": feedback_data.get("reviewer_id", "anonymous"),
51
+ "statement_type": feedback_data.get("statement_type"),
52
+ "specific_errors": feedback_data.get("specific_errors", ""),
53
+ "missing_items": feedback_data.get("missing_items", ""),
54
+ "improvement_suggestions": feedback_data.get("improvement_suggestions", ""),
55
+ "would_accept_for_audit": feedback_data.get("would_accept_for_audit", False),
56
+ "complexity_level": feedback_data.get("complexity_level", "medium")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
+ self.feedback_data.append(feedback_entry)
60
+ self._save_feedback_data()
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  return {
63
  "status": "success",
64
+ "feedback_stored": True,
65
+ "total_feedback": len(self.feedback_data)
66
+ }
67
+
68
+ def get_feedback_patterns(self) -> Dict[str, Any]:
69
+ """Get patterns and insights from collected feedback"""
70
+
71
+ if not self.feedback_data:
72
+ return {"error": "No feedback data available"}
73
+
74
+ # Analyze feedback patterns
75
+ patterns = {
76
+ "total_feedback": len(self.feedback_data),
77
+ "statement_types": {},
78
+ "common_issues": [],
79
+ "improvement_suggestions": [],
80
+ "acceptance_rate": 0.0
81
  }
82
+
83
+ # Count statement types
84
+ statement_counts = {}
85
+ acceptance_count = 0
86
+
87
+ for feedback in self.feedback_data:
88
+ stmt_type = feedback.get("statement_type", "unknown")
89
+ statement_counts[stmt_type] = statement_counts.get(stmt_type, 0) + 1
90
+
91
+ if feedback.get("would_accept_for_audit"):
92
+ acceptance_count += 1
93
+
94
+ # Collect common issues
95
+ if feedback.get("specific_errors"):
96
+ patterns["common_issues"].append(feedback["specific_errors"])
97
+ if feedback.get("missing_items"):
98
+ patterns["common_issues"].append(feedback["missing_items"])
99
+ if feedback.get("improvement_suggestions"):
100
+ patterns["improvement_suggestions"].append(feedback["improvement_suggestions"])
101
+
102
+ patterns["statement_types"] = statement_counts
103
+ patterns["acceptance_rate"] = acceptance_count / len(self.feedback_data) if self.feedback_data else 0
104
+
105
+ return patterns
106
+
107
+ def get_recent_feedback(self, limit: int = 10) -> List[Dict[str, Any]]:
108
+ """Get recent feedback entries"""
109
+ return self.feedback_data[-limit:] if self.feedback_data else []
110
+
111
+ def _save_feedback_data(self):
112
+ """Save feedback data to disk"""
113
+ try:
114
+ with open(self.feedback_data_path, 'w') as f:
115
+ json.dump(self.feedback_data, f, indent=2)
116
+ except Exception as e:
117
+ logger.error(f"Error saving feedback data: {e}")
118
+
119
+ def _load_feedback_data(self):
120
+ """Load feedback data from disk"""
121
+ try:
122
+ if os.path.exists(self.feedback_data_path):
123
+ with open(self.feedback_data_path, 'r') as f:
124
+ self.feedback_data = json.load(f)
125
+ logger.info(f"Loaded {len(self.feedback_data)} feedback entries")
126
+ except Exception as e:
127
+ logger.warning(f"Error loading feedback data: {e}")
128
+ self.feedback_data = []
129
+
130
+ # Backward compatibility alias
131
+ FinancialRewardModel = TextBasedRewardModel
agents/rlhf_routes.py CHANGED
@@ -7,7 +7,7 @@ from fastapi.responses import JSONResponse, HTMLResponse
7
  from typing import Optional, Dict, Any
8
  import logging
9
  from agents.feedback_manager import FeedbackManager
10
- from agents.reward_model import FinancialRewardModel, RLHFTrainer
11
  from agents.rlhf_workflows import get_rlhf_manager
12
 
13
  logger = logging.getLogger(__name__)
@@ -17,37 +17,23 @@ rlhf_router = APIRouter(prefix="/rlhf", tags=["RLHF Feedback"])
17
 
18
  # Initialize components
19
  feedback_manager = FeedbackManager()
20
- reward_model = FinancialRewardModel()
21
- trainer = RLHFTrainer(feedback_manager, reward_model)
22
 
23
  @rlhf_router.post("/feedback")
24
  async def collect_feedback(
25
  statement_id: str = Form(...),
26
  reviewer_id: str = Form("anonymous"),
27
-
28
- # Technical accuracy metrics (1-5 scale)
29
- calculation_accuracy: float = Form(..., ge=1, le=5),
30
- account_classification: float = Form(..., ge=1, le=5),
31
- statement_balance: float = Form(..., ge=1, le=5),
32
-
33
- # Compliance metrics (1-5 scale)
34
- accounting_standards: float = Form(..., ge=1, le=5),
35
- regulatory_compliance: float = Form(..., ge=1, le=5),
36
-
37
- # Quality metrics (1-5 scale)
38
- completeness: float = Form(..., ge=1, le=5),
39
- professional_presentation: float = Form(..., ge=1, le=5),
40
-
41
- # Qualitative feedback
42
- specific_errors: str = Form(""),
43
  missing_items: str = Form(""),
44
  improvement_suggestions: str = Form(""),
45
-
46
  # Binary approval
47
  would_accept_for_audit: bool = Form(False),
48
-
49
  # Additional context
50
- complexity_level: str = Form("medium") # low, medium, high
51
  ):
52
  """
53
  Collect detailed human feedback on generated financial statements
@@ -59,17 +45,10 @@ async def collect_feedback(
59
  if not statement_info:
60
  raise HTTPException(status_code=404, detail="Statement not found")
61
 
62
- # Prepare feedback data
63
  feedback_data = {
64
  "statement_id": statement_id,
65
  "reviewer_id": reviewer_id,
66
- "calculation_accuracy": calculation_accuracy,
67
- "account_classification": account_classification,
68
- "statement_balance": statement_balance,
69
- "accounting_standards": accounting_standards,
70
- "regulatory_compliance": regulatory_compliance,
71
- "completeness": completeness,
72
- "professional_presentation": professional_presentation,
73
  "specific_errors": specific_errors,
74
  "missing_items": missing_items,
75
  "improvement_suggestions": improvement_suggestions,
@@ -77,19 +56,16 @@ async def collect_feedback(
77
  "statement_type": statement_info.get("statement_type"),
78
  "complexity_level": complexity_level
79
  }
80
-
81
- # Store feedback
82
  feedback_id = feedback_manager.store_feedback(feedback_data)
83
-
84
- # Check if model should be retrained
85
- retrain_result = trainer.periodic_training_check()
86
 
87
  return {
88
  "status": "success",
89
  "feedback_id": feedback_id,
90
- "message": "Feedback collected successfully",
91
- "model_retrain_status": retrain_result.get("status"),
92
- "overall_score": feedback_manager._compute_overall_score(feedback_data)
93
  }
94
 
95
  except Exception as e:
@@ -155,31 +131,33 @@ async def get_feedback_stats():
155
  @rlhf_router.post("/retrain")
156
  async def manual_retrain():
157
  """
158
- Manually trigger model retraining
159
  """
160
  try:
161
- result = trainer.retrain_model()
162
  return {
163
  "status": "success",
164
- "retrain_result": result
 
165
  }
166
  except Exception as e:
167
- logger.error(f"Error during manual retrain: {e}")
168
  raise HTTPException(status_code=500, detail=str(e))
169
 
170
  @rlhf_router.get("/model-info")
171
  async def get_model_info():
172
  """
173
- Get information about the current reward model
174
  """
175
  try:
 
 
176
  return {
177
  "status": "success",
178
- "model_trained": reward_model.is_trained,
179
  "model_version": reward_model.model_version,
180
- "feature_count": len(reward_model.feature_names),
181
- "feature_names": reward_model.feature_names,
182
- "model_stats": reward_model.get_model_stats()
183
  }
184
  except Exception as e:
185
  logger.error(f"Error getting model info: {e}")
@@ -222,101 +200,11 @@ def generate_review_html(statement_id: str, statement_info: Dict) -> str:
222
  <input type="text" name="reviewer_id" placeholder="Enter your identifier">
223
  </div>
224
 
225
- <h3>Technical Accuracy (1-5 scale)</h3>
226
-
227
- <div class="form-group">
228
- <label>Calculation Accuracy:</label>
229
- <select name="calculation_accuracy" required>
230
- <option value="">Select rating</option>
231
- <option value="1">1 - Major calculation errors</option>
232
- <option value="2">2 - Some calculation errors</option>
233
- <option value="3">3 - Minor calculation issues</option>
234
- <option value="4">4 - Mostly accurate calculations</option>
235
- <option value="5">5 - All calculations correct</option>
236
- </select>
237
- </div>
238
-
239
- <div class="form-group">
240
- <label>Account Classification:</label>
241
- <select name="account_classification" required>
242
- <option value="">Select rating</option>
243
- <option value="1">1 - Major classification errors</option>
244
- <option value="2">2 - Some classification errors</option>
245
- <option value="3">3 - Minor classification issues</option>
246
- <option value="4">4 - Mostly correct classification</option>
247
- <option value="5">5 - Perfect classification</option>
248
- </select>
249
- </div>
250
-
251
- <div class="form-group">
252
- <label>Statement Balance/Reconciliation:</label>
253
- <select name="statement_balance" required>
254
- <option value="">Select rating</option>
255
- <option value="1">1 - Does not balance</option>
256
- <option value="2">2 - Major balance issues</option>
257
- <option value="3">3 - Minor balance issues</option>
258
- <option value="4">4 - Mostly balanced</option>
259
- <option value="5">5 - Perfect balance</option>
260
- </select>
261
- </div>
262
-
263
- <h3>Compliance & Standards (1-5 scale)</h3>
264
-
265
- <div class="form-group">
266
- <label>Accounting Standards Compliance:</label>
267
- <select name="accounting_standards" required>
268
- <option value="">Select rating</option>
269
- <option value="1">1 - Major compliance issues</option>
270
- <option value="2">2 - Some compliance issues</option>
271
- <option value="3">3 - Minor compliance issues</option>
272
- <option value="4">4 - Mostly compliant</option>
273
- <option value="5">5 - Fully compliant</option>
274
- </select>
275
- </div>
276
-
277
- <div class="form-group">
278
- <label>Regulatory Compliance:</label>
279
- <select name="regulatory_compliance" required>
280
- <option value="">Select rating</option>
281
- <option value="1">1 - Major regulatory issues</option>
282
- <option value="2">2 - Some regulatory issues</option>
283
- <option value="3">3 - Minor regulatory issues</option>
284
- <option value="4">4 - Mostly compliant</option>
285
- <option value="5">5 - Fully compliant</option>
286
- </select>
287
- </div>
288
-
289
- <h3>Quality & Presentation (1-5 scale)</h3>
290
-
291
- <div class="form-group">
292
- <label>Completeness:</label>
293
- <select name="completeness" required>
294
- <option value="">Select rating</option>
295
- <option value="1">1 - Major items missing</option>
296
- <option value="2">2 - Some items missing</option>
297
- <option value="3">3 - Minor items missing</option>
298
- <option value="4">4 - Mostly complete</option>
299
- <option value="5">5 - Complete</option>
300
- </select>
301
- </div>
302
-
303
- <div class="form-group">
304
- <label>Professional Presentation:</label>
305
- <select name="professional_presentation" required>
306
- <option value="">Select rating</option>
307
- <option value="1">1 - Unprofessional</option>
308
- <option value="2">2 - Below standard</option>
309
- <option value="3">3 - Adequate</option>
310
- <option value="4">4 - Good presentation</option>
311
- <option value="5">5 - Excellent presentation</option>
312
- </select>
313
- </div>
314
-
315
  <h3>Detailed Feedback</h3>
316
 
317
  <div class="form-group">
318
- <label>Specific Errors (if any):</label>
319
- <textarea name="specific_errors" rows="3" placeholder="Describe any specific errors found..."></textarea>
320
  </div>
321
 
322
  <div class="form-group">
 
7
  from typing import Optional, Dict, Any
8
  import logging
9
  from agents.feedback_manager import FeedbackManager
10
+ from agents.reward_model import TextBasedRewardModel
11
  from agents.rlhf_workflows import get_rlhf_manager
12
 
13
  logger = logging.getLogger(__name__)
 
17
 
18
  # Initialize components
19
  feedback_manager = FeedbackManager()
20
+ reward_model = TextBasedRewardModel()
 
21
 
22
  @rlhf_router.post("/feedback")
23
  async def collect_feedback(
24
  statement_id: str = Form(...),
25
  reviewer_id: str = Form("anonymous"),
26
+
27
+ # Primary text-based feedback (required)
28
+ specific_errors: str = Form(..., min_length=1),
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  missing_items: str = Form(""),
30
  improvement_suggestions: str = Form(""),
31
+
32
  # Binary approval
33
  would_accept_for_audit: bool = Form(False),
34
+
35
  # Additional context
36
+ complexity_level: str = Form("medium")
37
  ):
38
  """
39
  Collect detailed human feedback on generated financial statements
 
45
  if not statement_info:
46
  raise HTTPException(status_code=404, detail="Statement not found")
47
 
48
+ # Prepare feedback data (text-focused)
49
  feedback_data = {
50
  "statement_id": statement_id,
51
  "reviewer_id": reviewer_id,
 
 
 
 
 
 
 
52
  "specific_errors": specific_errors,
53
  "missing_items": missing_items,
54
  "improvement_suggestions": improvement_suggestions,
 
56
  "statement_type": statement_info.get("statement_type"),
57
  "complexity_level": complexity_level
58
  }
59
+
60
+ # Store feedback in both feedback manager and reward model
61
  feedback_id = feedback_manager.store_feedback(feedback_data)
62
+ reward_model.collect_feedback(feedback_data)
 
 
63
 
64
  return {
65
  "status": "success",
66
  "feedback_id": feedback_id,
67
+ "message": "Text feedback collected successfully",
68
+ "feedback_stored": True
 
69
  }
70
 
71
  except Exception as e:
 
131
  @rlhf_router.post("/retrain")
132
  async def manual_retrain():
133
  """
134
+ Get current feedback patterns (no retraining needed for text-based model)
135
  """
136
  try:
137
+ feedback_patterns = reward_model.get_feedback_patterns()
138
  return {
139
  "status": "success",
140
+ "message": "Text-based model doesn't require retraining",
141
+ "feedback_patterns": feedback_patterns
142
  }
143
  except Exception as e:
144
+ logger.error(f"Error getting feedback patterns: {e}")
145
  raise HTTPException(status_code=500, detail=str(e))
146
 
147
  @rlhf_router.get("/model-info")
148
  async def get_model_info():
149
  """
150
+ Get information about the text-based reward model
151
  """
152
  try:
153
+ feedback_patterns = reward_model.get_feedback_patterns()
154
+
155
  return {
156
  "status": "success",
 
157
  "model_version": reward_model.model_version,
158
+ "model_type": "text-based",
159
+ "feedback_collected": feedback_patterns.get("total_feedback", 0),
160
+ "feedback_patterns": feedback_patterns
161
  }
162
  except Exception as e:
163
  logger.error(f"Error getting model info: {e}")
 
200
  <input type="text" name="reviewer_id" placeholder="Enter your identifier">
201
  </div>
202
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
  <h3>Detailed Feedback</h3>
204
 
205
  <div class="form-group">
206
+ <label>Specific Errors (required):</label>
207
+ <textarea name="specific_errors" rows="4" placeholder="Describe any specific errors found..." required></textarea>
208
  </div>
209
 
210
  <div class="form-group">
agents/rlhf_workflows.py CHANGED
@@ -19,7 +19,7 @@ from agents.simple_tools import (
19
  generate_llm_notes,
20
  )
21
  from agents.feedback_manager import FeedbackManager
22
- from agents.reward_model import FinancialRewardModel, RLHFTrainer
23
 
24
  logger = logging.getLogger(__name__)
25
 
@@ -42,228 +42,31 @@ class RLHFFinancialAgentState(TypedDict):
42
  feedback_collected: Optional[bool]
43
 
44
  class RLHFWorkflowManager:
45
- """Manages RLHF-enhanced workflows"""
46
-
47
  def __init__(self):
48
  self.feedback_manager = FeedbackManager()
49
- self.reward_model = FinancialRewardModel()
50
- self.trainer = RLHFTrainer(self.feedback_manager, self.reward_model)
51
-
52
- # Check for model retraining on initialization
53
- self._check_and_retrain()
54
-
55
- def _check_and_retrain(self):
56
- """Check if model needs retraining"""
57
- try:
58
- result = self.trainer.periodic_training_check()
59
- if result.get("status") == "success":
60
- logger.info("Reward model retrained successfully")
61
- except Exception as e:
62
- logger.error(f"Error during model retraining check: {e}")
63
-
64
- def make_rlhf_workflow(self, tool_func, statement_type: str):
65
- """Create RLHF-enhanced workflow"""
66
-
67
- def rlhf_node(state: RLHFFinancialAgentState) -> RLHFFinancialAgentState:
68
- state["start_time"] = time.time()
69
- state["statement_id"] = str(uuid.uuid4())
70
-
71
- try:
72
- # Generate multiple candidates if reward model is trained
73
- if self.reward_model.is_trained:
74
- candidates = self._generate_candidates(tool_func, state, num_candidates=3)
75
- state["candidates_generated"] = candidates
76
-
77
- # Select best candidate using reward model
78
- best_candidate, best_index = self._select_best_candidate(
79
- candidates, statement_type, state["file_path"]
80
- )
81
-
82
- state["result"] = best_candidate
83
- state["best_candidate_index"] = best_index
84
-
85
- else:
86
- # Single generation if no trained model
87
- result = tool_func.invoke({"file_path": state["file_path"]})
88
- state["result"] = result
89
- state["candidates_generated"] = [result]
90
- state["best_candidate_index"] = 0
91
-
92
- # Predict quality score
93
- if state["result"].get("status") == "success":
94
- predicted_quality, confidence = self._predict_quality(
95
- state["result"], statement_type, state["file_path"]
96
- )
97
- state["predicted_quality"] = predicted_quality
98
- state["confidence_score"] = confidence
99
- state["status"] = "success"
100
-
101
- # Store statement for potential feedback
102
- self._store_for_feedback(state, statement_type)
103
-
104
- else:
105
- state["status"] = "error"
106
- state["error"] = state["result"].get("error", "Unknown error")
107
-
108
- except Exception as e:
109
- state["status"] = "error"
110
- state["error"] = str(e)
111
- logger.error(f"Error in RLHF workflow: {e}")
112
-
113
- state["end_time"] = time.time()
114
- return state
115
-
116
- # Create workflow graph
117
- wf = StateGraph(RLHFFinancialAgentState)
118
- wf.add_node("rlhf_run", rlhf_node)
119
- wf.set_entry_point("rlhf_run")
120
- wf.add_edge("rlhf_run", END)
121
- return wf.compile()
122
-
123
- def _generate_candidates(self, tool_func, state: RLHFFinancialAgentState, num_candidates: int = 3) -> List[Dict[str, Any]]:
124
- """Generate multiple candidates for comparison"""
125
- candidates = []
126
-
127
- for i in range(num_candidates):
128
- try:
129
- result = tool_func.invoke({"file_path": state["file_path"]})
130
- candidates.append({
131
- "index": i,
132
- "result": result,
133
- "timestamp": time.time()
134
- })
135
- except Exception as e:
136
- logger.warning(f"Failed to generate candidate {i}: {e}")
137
- candidates.append({
138
- "index": i,
139
- "result": {"status": "error", "error": str(e)},
140
- "timestamp": time.time()
141
- })
142
-
143
- return candidates
144
-
145
- def _select_best_candidate(self, candidates: List[Dict[str, Any]], statement_type: str, file_path: str) -> tuple:
146
- """Select best candidate using reward model"""
147
- best_candidate = None
148
- best_score = -1
149
- best_index = 0
150
-
151
- for candidate in candidates:
152
- if candidate["result"].get("status") == "success":
153
- # Create statement data for reward prediction
154
- statement_data = {
155
- "statement_type": statement_type,
156
- "file_path": file_path,
157
- "generation_time": 0, # Could be calculated from timestamps
158
- "metadata": {}
159
- }
160
-
161
- # Predict reward
162
- predicted_reward, confidence = self.reward_model.predict_with_confidence(
163
- statement_data, ""
164
- )
165
-
166
- # Weight by confidence
167
- weighted_score = predicted_reward * confidence
168
-
169
- if weighted_score > best_score:
170
- best_score = weighted_score
171
- best_candidate = candidate["result"]
172
- best_index = candidate["index"]
173
-
174
- # Fallback to first successful candidate
175
- if best_candidate is None:
176
- for candidate in candidates:
177
- if candidate["result"].get("status") == "success":
178
- best_candidate = candidate["result"]
179
- best_index = candidate["index"]
180
- break
181
-
182
- # Final fallback
183
- if best_candidate is None and candidates:
184
- best_candidate = candidates[0]["result"]
185
- best_index = 0
186
-
187
- return best_candidate, best_index
188
-
189
- def _predict_quality(self, result: Dict[str, Any], statement_type: str, file_path: str) -> tuple:
190
- """Predict quality score for generated statement"""
191
- statement_data = {
192
- "statement_type": statement_type,
193
- "file_path": file_path,
194
- "generation_time": 0,
195
- "metadata": {}
196
- }
197
-
198
- return self.reward_model.predict_with_confidence(statement_data, "")
199
-
200
- def _store_for_feedback(self, state: RLHFFinancialAgentState, statement_type: str):
201
- """Store generated statement for feedback collection"""
202
- try:
203
- statement_data = {
204
- "type": statement_type,
205
- "file_path": state["file_path"],
206
- "output_path": state["result"].get("output_path"),
207
- "generation_time": state["end_time"] - state["start_time"],
208
- "predicted_quality": state.get("predicted_quality"),
209
- "confidence_score": state.get("confidence_score"),
210
- "metadata": {
211
- "candidates_count": len(state.get("candidates_generated", [])),
212
- "best_candidate_index": state.get("best_candidate_index"),
213
- "workflow_version": "rlhf_v1"
214
- }
215
- }
216
-
217
- stored_id = self.feedback_manager.store_generated_statement(statement_data)
218
- state["statement_id"] = stored_id
219
-
220
- except Exception as e:
221
- logger.error(f"Error storing statement for feedback: {e}")
222
 
223
- # Global RLHF manager instance
224
- rlhf_manager = RLHFWorkflowManager()
 
225
 
226
- # RLHF-enhanced workflows
227
- rlhf_workflows = {
228
- "notes": rlhf_manager.make_rlhf_workflow(generate_notes_full_pipeline_from_path, "notes"),
229
- "pnl": rlhf_manager.make_rlhf_workflow(generate_pnl_statement, "pnl"),
230
- "bs": rlhf_manager.make_rlhf_workflow(generate_balance_sheet, "balance_sheet"),
231
- "cf": rlhf_manager.make_rlhf_workflow(generate_cash_flow_statement, "cash_flow"),
232
- "notes-llm": rlhf_manager.make_rlhf_workflow(generate_llm_notes, "notes"),
233
- }
234
-
235
- def run_rlhf_workflow(file_path: str, kind: str) -> Dict[str, Any]:
236
- """Run RLHF-enhanced workflow"""
237
- state = RLHFFinancialAgentState(
238
- messages=[HumanMessage(content=f"Run RLHF {kind} for {file_path}")],
239
- file_path=file_path,
240
- result={},
241
- status="",
242
- start_time=0,
243
- end_time=0,
244
- error="",
245
- statement_id=None,
246
- predicted_quality=None,
247
- confidence_score=None,
248
- candidates_generated=None,
249
- best_candidate_index=None,
250
- feedback_collected=False
251
- )
252
-
253
- final_state = rlhf_workflows[kind].invoke(state)
254
-
255
- # Add RLHF metadata to result
256
- if final_state["status"] == "success":
257
- final_state["result"]["rlhf_metadata"] = {
258
- "statement_id": final_state.get("statement_id"),
259
- "predicted_quality": final_state.get("predicted_quality"),
260
- "confidence_score": final_state.get("confidence_score"),
261
- "candidates_generated": len(final_state.get("candidates_generated", [])),
262
- "model_used": "rlhf_enhanced"
263
- }
264
-
265
- return final_state
266
 
267
  def get_rlhf_manager() -> RLHFWorkflowManager:
268
- """Get global RLHF manager instance"""
269
- return rlhf_manager
 
 
 
 
 
 
 
 
 
 
 
 
19
  generate_llm_notes,
20
  )
21
  from agents.feedback_manager import FeedbackManager
22
+ from agents.reward_model import TextBasedRewardModel
23
 
24
  logger = logging.getLogger(__name__)
25
 
 
42
  feedback_collected: Optional[bool]
43
 
44
  class RLHFWorkflowManager:
45
+ """Manages RLHF-enhanced workflows with text-based feedback"""
46
+
47
  def __init__(self):
48
  self.feedback_manager = FeedbackManager()
49
+ self.reward_model = TextBasedRewardModel()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
+ def collect_feedback(self, feedback_data: Dict[str, Any]) -> Dict[str, Any]:
52
+ """Collect text-based feedback"""
53
+ return self.reward_model.collect_feedback(feedback_data)
54
 
55
+ def get_feedback_patterns(self) -> Dict[str, Any]:
56
+ """Get feedback patterns and insights"""
57
+ return self.reward_model.get_feedback_patterns()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
  def get_rlhf_manager() -> RLHFWorkflowManager:
60
+ """Get the RLHF workflow manager instance"""
61
+ return RLHFWorkflowManager()
62
+
63
+ def run_rlhf_workflow(file_path: str, kind: str) -> Dict[str, Any]:
64
+ """Run RLHF-enhanced workflow (placeholder - simplified)"""
65
+ # For now, just return a basic structure
66
+ # This can be enhanced later with actual RLHF logic
67
+ return {
68
+ "status": "error",
69
+ "error": "RLHF workflow not implemented for this endpoint",
70
+ "file_path": file_path,
71
+ "kind": kind
72
+ }
agents/simple_tools.py CHANGED
@@ -432,6 +432,31 @@ def generate_llm_notes(file_path: str, note_numbers: str = "") -> Dict[str, Any]
432
  input_json = "data/generated_notes/notes.json"
433
  output_excel = "data/generated_notes_excel/notes.xlsx"
434
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
435
  result3 = subprocess.run(
436
  ["python", "notes/llm_notes_excel_converter.py", input_json, output_excel],
437
  env=env,
 
432
  input_json = "data/generated_notes/notes.json"
433
  output_excel = "data/generated_notes_excel/notes.xlsx"
434
 
435
+ # Check if the JSON file was created and has content
436
+ if not os.path.exists(input_json):
437
+ execution_time = round(time.time() - start_time, 2)
438
+ return {
439
+ "status": "error",
440
+ "error": "No notes JSON file was generated - LLM may have failed to produce any notes",
441
+ "execution_id": execution_id,
442
+ "execution_time": execution_time
443
+ }
444
+
445
+ # Check if JSON file has content
446
+ try:
447
+ with open(input_json, 'r', encoding='utf-8') as f:
448
+ json_content = json.load(f)
449
+ if isinstance(json_content, dict) and 'notes' in json_content and not json_content['notes']:
450
+ logger.warning("JSON file exists but contains no notes")
451
+ except Exception as e:
452
+ execution_time = round(time.time() - start_time, 2)
453
+ return {
454
+ "status": "error",
455
+ "error": f"Invalid JSON file generated: {str(e)}",
456
+ "execution_id": execution_id,
457
+ "execution_time": execution_time
458
+ }
459
+
460
  result3 = subprocess.run(
461
  ["python", "notes/llm_notes_excel_converter.py", input_json, output_excel],
462
  env=env,
app.py CHANGED
@@ -5,18 +5,21 @@ import os
5
  import shutil
6
  import logging
7
  import json
 
 
8
  from agents.generator_validator import create_notes_pipeline, InteractiveFeedbackManager
9
  from agents.langgraph import run_workflow
10
  from agents.rlhf_workflows import run_rlhf_workflow
11
  from agents.rlhf_routes import rlhf_router
12
 
 
 
13
 
14
  # Configure logging for the application
15
  logging.basicConfig(level=logging.INFO)
16
  logger = logging.getLogger("financial_notes_api")
17
 
18
 
19
-
20
  app = FastAPI(
21
  title="Financial Notes Generator API",
22
  description="API for generating financial notes, balance sheets, cash flow statements, and P&L reports with RLHF capabilities and Interactive Feedback.",
@@ -212,8 +215,22 @@ async def generate_with_feedback(
212
  # Create pipeline with feedback integration
213
  pipeline = create_notes_pipeline(use_rlhf=False)
214
 
215
- # Process through pipeline
216
- generation_result, validation_result = pipeline.process(file_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
 
218
  if generation_result.success and validation_result.is_valid:
219
  response = FileResponse(
@@ -250,31 +267,31 @@ async def generate_with_feedback(
250
  raise HTTPException(status_code=500, detail=f"Generation failed: {str(e)}")
251
 
252
 
 
253
  @router.post("/notes")
254
- async def notes_route(file: UploadFile = File(...), use_rlhf: bool = Query(False)):
255
- file_path = f"data/input/{file.filename}"
256
- os.makedirs("data/input", exist_ok=True)
257
- with open(file_path, "wb") as buffer:
258
- shutil.copyfileobj(file.file, buffer)
259
-
260
- # Choose workflow based on RLHF preference
261
- if use_rlhf:
262
- result = run_rlhf_workflow(file_path, "notes")
263
- else:
264
- result = run_workflow(file_path, "notes")
265
-
266
- if result["status"] == "success":
267
- response = FileResponse(result["result"]["output_xlsx_path"], filename=os.path.basename(result["result"]["output_xlsx_path"]))
268
-
269
- # Add RLHF metadata to headers if available
270
- if "rlhf_metadata" in result.get("result", {}):
271
- rlhf_data = result["result"]["rlhf_metadata"]
272
- response.headers["X-RLHF-Statement-ID"] = str(rlhf_data.get("statement_id", ""))
273
- response.headers["X-RLHF-Quality-Score"] = str(rlhf_data.get("predicted_quality", ""))
274
- response.headers["X-RLHF-Confidence"] = str(rlhf_data.get("confidence_score", ""))
275
-
276
- return response
277
- raise HTTPException(status_code=500, detail=result["error"])
278
 
279
  @router.post("/pnl")
280
  async def pnl_route(file: UploadFile = File(...), use_rlhf: bool = Query(False)):
 
5
  import shutil
6
  import logging
7
  import json
8
+ from dotenv import load_dotenv
9
+ from agents.simple_tools import generate_notes_full_pipeline_from_path
10
  from agents.generator_validator import create_notes_pipeline, InteractiveFeedbackManager
11
  from agents.langgraph import run_workflow
12
  from agents.rlhf_workflows import run_rlhf_workflow
13
  from agents.rlhf_routes import rlhf_router
14
 
15
+ # Load environment variables from .env file
16
+ load_dotenv()
17
 
18
  # Configure logging for the application
19
  logging.basicConfig(level=logging.INFO)
20
  logger = logging.getLogger("financial_notes_api")
21
 
22
 
 
23
  app = FastAPI(
24
  title="Financial Notes Generator API",
25
  description="API for generating financial notes, balance sheets, cash flow statements, and P&L reports with RLHF capabilities and Interactive Feedback.",
 
215
  # Create pipeline with feedback integration
216
  pipeline = create_notes_pipeline(use_rlhf=False)
217
 
218
+ # Prepare feedback context for the generator
219
+ feedback_context = {
220
+ 'session_id': session_id,
221
+ 'udfs': session.archived_udfs, # Pass all archived UDFs
222
+ 'feedback_history': [
223
+ {
224
+ 'text': f.feedback_text,
225
+ 'type': f.feedback_type,
226
+ 'iteration': f.iteration_number
227
+ } for f in session.feedback_history
228
+ ],
229
+ 'current_iteration': session.current_iteration
230
+ }
231
+
232
+ # Process through pipeline with feedback context
233
+ generation_result, validation_result = pipeline.process(file_path, feedback_context=feedback_context)
234
 
235
  if generation_result.success and validation_result.is_valid:
236
  response = FileResponse(
 
267
  raise HTTPException(status_code=500, detail=f"Generation failed: {str(e)}")
268
 
269
 
270
+
271
  @router.post("/notes")
272
+ async def notes_route(file: UploadFile = File(...)):
273
+ """Generate financial notes directly from uploaded file"""
274
+ try:
275
+ # Save uploaded file
276
+ file_path = f"data/input/{file.filename}"
277
+ os.makedirs("data/input", exist_ok=True)
278
+ with open(file_path, "wb") as buffer:
279
+ shutil.copyfileobj(file.file, buffer)
280
+
281
+ # Generate notes directly
282
+ result = generate_notes_full_pipeline_from_path(file_path)
283
+
284
+ if result["status"] == "success":
285
+ # Return the generated Excel file
286
+ output_path = result["output_xlsx_path"]
287
+ return FileResponse(output_path, filename=os.path.basename(output_path))
288
+
289
+ # If generation failed, raise HTTP exception
290
+ raise HTTPException(status_code=500, detail=result.get("error", "Notes generation failed"))
291
+
292
+ except Exception as e:
293
+ logger.error(f"Error in notes generation: {e}")
294
+ raise HTTPException(status_code=500, detail=f"Error generating notes: {str(e)}")
 
295
 
296
  @router.post("/pnl")
297
  async def pnl_route(file: UploadFile = File(...), use_rlhf: bool = Query(False)):
notes/data_extraction.py CHANGED
@@ -164,6 +164,20 @@ def extract_trial_balance_data(
164
  Returns a list of validated TrialBalanceRecord objects.
165
  """
166
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  df_raw = pd.read_excel(file_path, sheet_name=sheet_name, header=header_row)
168
  except Exception as e:
169
  logger.error(f"Error reading Excel file: {e}")
 
164
  Returns a list of validated TrialBalanceRecord objects.
165
  """
166
  try:
167
+ # First, try to find a sheet containing 'trial' or 'balance' in the name
168
+ excel_file = pd.ExcelFile(file_path)
169
+ trial_sheet_names = [name for name in excel_file.sheet_names if 'trial' in name.lower() or 'balance' in name.lower()]
170
+
171
+ if trial_sheet_names:
172
+ # Use the first matching sheet
173
+ sheet_name = trial_sheet_names[0]
174
+ logger.info(f"Found trial balance sheet: {sheet_name}")
175
+ # For trial balance sheets, the data usually starts after 5-6 header rows
176
+ header_row = 5
177
+ else:
178
+ logger.warning(f"No trial balance sheet found, using default sheet index {sheet_name}")
179
+ header_row = header_row # Use the passed parameter
180
+
181
  df_raw = pd.read_excel(file_path, sheet_name=sheet_name, header=header_row)
182
  except Exception as e:
183
  logger.error(f"Error reading Excel file: {e}")
notes/llm_notes_generator.py CHANGED
@@ -26,7 +26,7 @@ from pydantic_settings import BaseSettings
26
  from utils.utils import convert_note_json_to_lakhs
27
 
28
  # Load environment variables
29
- load_dotenv()
30
 
31
  # Configure logging
32
  logging.basicConfig(level=logging.INFO)
 
26
  from utils.utils import convert_note_json_to_lakhs
27
 
28
  # Load environment variables
29
+ load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
30
 
31
  # Configure logging
32
  logging.basicConfig(level=logging.INFO)