finryver-dev / agents /rlhf_routes.py
Sahil Garg
udf added to /notes-llm alongwith RLHF
6611563
raw
history blame
9.11 kB
"""
RLHF Feedback Collection Routes for FinRyver
Handles human feedback collection for financial statement quality
"""
from fastapi import APIRouter, HTTPException, Form, Query, Request
from fastapi.responses import JSONResponse, HTMLResponse
from typing import Optional, Dict, Any
import logging
from agents.feedback_manager import FeedbackManager
from agents.reward_model import TextBasedRewardModel
from agents.rlhf_workflows import get_rlhf_manager
logger = logging.getLogger(__name__)
# Create RLHF router
rlhf_router = APIRouter(prefix="/rlhf", tags=["RLHF Feedback"])
# Initialize components
feedback_manager = FeedbackManager()
reward_model = TextBasedRewardModel()
@rlhf_router.post("/feedback")
async def collect_feedback(
statement_id: str = Form(...),
reviewer_id: str = Form("anonymous"),
# Primary text-based feedback (required)
specific_errors: str = Form(..., min_length=1),
missing_items: str = Form(""),
improvement_suggestions: str = Form(""),
# Binary approval
would_accept_for_audit: bool = Form(False),
# Additional context
complexity_level: str = Form("medium")
):
"""
Collect detailed human feedback on generated financial statements
This feedback is used to train and improve the AI models
"""
try:
# Get statement info
statement_info = feedback_manager.get_statement_for_review(statement_id)
if not statement_info:
raise HTTPException(status_code=404, detail="Statement not found")
# Prepare feedback data (text-focused)
feedback_data = {
"statement_id": statement_id,
"reviewer_id": reviewer_id,
"specific_errors": specific_errors,
"missing_items": missing_items,
"improvement_suggestions": improvement_suggestions,
"would_accept_for_audit": would_accept_for_audit,
"statement_type": statement_info.get("statement_type"),
"complexity_level": complexity_level
}
# Store feedback in both feedback manager and reward model
feedback_id = feedback_manager.store_feedback(feedback_data)
reward_model.collect_feedback(feedback_data)
return {
"status": "success",
"feedback_id": feedback_id,
"message": "Text feedback collected successfully",
"feedback_stored": True
}
except Exception as e:
logger.error(f"Error collecting feedback: {e}")
raise HTTPException(status_code=500, detail=f"Error collecting feedback: {str(e)}")
@rlhf_router.get("/review/{statement_id}")
async def get_review_interface(statement_id: str):
"""
Get a review interface for human feedback collection
Returns HTML form for statement review
"""
try:
statement_info = feedback_manager.get_statement_for_review(statement_id)
if not statement_info:
raise HTTPException(status_code=404, detail="Statement not found")
# Generate HTML review form
html_content = generate_review_html(statement_id, statement_info)
return HTMLResponse(content=html_content)
except Exception as e:
logger.error(f"Error getting review interface: {e}")
raise HTTPException(status_code=500, detail=str(e))
@rlhf_router.get("/pending-reviews")
async def get_pending_reviews(limit: int = Query(10, ge=1, le=50)):
"""
Get statements that need human review
"""
try:
pending_statements = feedback_manager.get_pending_reviews(limit)
return {
"status": "success",
"pending_reviews": pending_statements,
"count": len(pending_statements)
}
except Exception as e:
logger.error(f"Error getting pending reviews: {e}")
raise HTTPException(status_code=500, detail=str(e))
@rlhf_router.get("/stats")
async def get_feedback_stats():
"""
Get feedback and model training statistics
"""
try:
feedback_stats = feedback_manager.get_feedback_stats()
model_stats = reward_model.get_model_stats()
feature_importance = reward_model.get_feature_importance()
return {
"status": "success",
"feedback_stats": feedback_stats,
"model_stats": model_stats,
"feature_importance": feature_importance,
"model_trained": reward_model.is_trained
}
except Exception as e:
logger.error(f"Error getting stats: {e}")
raise HTTPException(status_code=500, detail=str(e))
@rlhf_router.post("/retrain")
async def manual_retrain():
"""
Get current feedback patterns (no retraining needed for text-based model)
"""
try:
feedback_patterns = reward_model.get_feedback_patterns()
return {
"status": "success",
"message": "Text-based model doesn't require retraining",
"feedback_patterns": feedback_patterns
}
except Exception as e:
logger.error(f"Error getting feedback patterns: {e}")
raise HTTPException(status_code=500, detail=str(e))
@rlhf_router.get("/model-info")
async def get_model_info():
"""
Get information about the text-based reward model
"""
try:
feedback_patterns = reward_model.get_feedback_patterns()
return {
"status": "success",
"model_version": reward_model.model_version,
"model_type": "text-based",
"feedback_collected": feedback_patterns.get("total_feedback", 0),
"feedback_patterns": feedback_patterns
}
except Exception as e:
logger.error(f"Error getting model info: {e}")
raise HTTPException(status_code=500, detail=str(e))
def generate_review_html(statement_id: str, statement_info: Dict) -> str:
"""Generate HTML form for statement review"""
return f"""
<!DOCTYPE html>
<html>
<head>
<title>FinRyver - Statement Review</title>
<style>
body {{ font-family: Arial, sans-serif; margin: 40px; }}
.form-group {{ margin: 15px 0; }}
label {{ display: block; margin-bottom: 5px; font-weight: bold; }}
input, select, textarea {{ width: 100%; padding: 8px; margin-bottom: 10px; }}
.rating {{ display: flex; gap: 10px; }}
.rating input {{ width: auto; }}
button {{ background-color: #007bff; color: white; padding: 10px 20px; border: none; cursor: pointer; }}
.statement-info {{ background-color: #f8f9fa; padding: 15px; margin-bottom: 20px; border-radius: 5px; }}
</style>
</head>
<body>
<h1>Financial Statement Review</h1>
<div class="statement-info">
<h3>Statement Information</h3>
<p><strong>Statement ID:</strong> {statement_id}</p>
<p><strong>Type:</strong> {statement_info.get('statement_type', 'Unknown')}</p>
<p><strong>Generated:</strong> {statement_info.get('timestamp', 'Unknown')}</p>
<p><strong>File:</strong> {statement_info.get('file_path', 'Unknown')}</p>
</div>
<form action="/rlhf/feedback" method="post">
<input type="hidden" name="statement_id" value="{statement_id}">
<div class="form-group">
<label>Reviewer ID (optional):</label>
<input type="text" name="reviewer_id" placeholder="Enter your identifier">
</div>
<h3>Detailed Feedback</h3>
<div class="form-group">
<label>Specific Errors (required):</label>
<textarea name="specific_errors" rows="4" placeholder="Describe any specific errors found..." required></textarea>
</div>
<div class="form-group">
<label>Missing Items (if any):</label>
<textarea name="missing_items" rows="3" placeholder="List any missing items or information..."></textarea>
</div>
<div class="form-group">
<label>Improvement Suggestions:</label>
<textarea name="improvement_suggestions" rows="3" placeholder="Suggest improvements..."></textarea>
</div>
<div class="form-group">
<label>Complexity Level:</label>
<select name="complexity_level">
<option value="low">Low</option>
<option value="medium" selected>Medium</option>
<option value="high">High</option>
</select>
</div>
<div class="form-group">
<label>
<input type="checkbox" name="would_accept_for_audit" value="true">
Would accept this statement for audit/compliance purposes
</label>
</div>
<button type="submit">Submit Feedback</button>
</form>
</body>
</html>
"""