Spaces:

point9
/

finryver-dev

Sleeping

finryver-dev / agents /rlhf_routes.py

Sahil Garg

udf added to /notes-llm alongwith RLHF

6611563 4 months ago

9.11 kB

	"""
	RLHF Feedback Collection Routes for FinRyver
	Handles human feedback collection for financial statement quality
	"""
	from fastapi import APIRouter, HTTPException, Form, Query, Request
	from fastapi.responses import JSONResponse, HTMLResponse
	from typing import Optional, Dict, Any
	import logging
	from agents.feedback_manager import FeedbackManager
	from agents.reward_model import TextBasedRewardModel
	from agents.rlhf_workflows import get_rlhf_manager

	logger = logging.getLogger(__name__)

	# Create RLHF router
	rlhf_router = APIRouter(prefix="/rlhf", tags=["RLHF Feedback"])

	# Initialize components
	feedback_manager = FeedbackManager()
	reward_model = TextBasedRewardModel()

	@rlhf_router.post("/feedback")
	async def collect_feedback(
	statement_id: str = Form(...),
	reviewer_id: str = Form("anonymous"),

	# Primary text-based feedback (required)
	specific_errors: str = Form(..., min_length=1),
	missing_items: str = Form(""),
	improvement_suggestions: str = Form(""),

	# Binary approval
	would_accept_for_audit: bool = Form(False),

	# Additional context
	complexity_level: str = Form("medium")
	):
	"""
	Collect detailed human feedback on generated financial statements
	This feedback is used to train and improve the AI models
	"""
	try:
	# Get statement info
	statement_info = feedback_manager.get_statement_for_review(statement_id)
	if not statement_info:
	raise HTTPException(status_code=404, detail="Statement not found")

	# Prepare feedback data (text-focused)
	feedback_data = {
	"statement_id": statement_id,
	"reviewer_id": reviewer_id,
	"specific_errors": specific_errors,
	"missing_items": missing_items,
	"improvement_suggestions": improvement_suggestions,
	"would_accept_for_audit": would_accept_for_audit,
	"statement_type": statement_info.get("statement_type"),
	"complexity_level": complexity_level
	}

	# Store feedback in both feedback manager and reward model
	feedback_id = feedback_manager.store_feedback(feedback_data)
	reward_model.collect_feedback(feedback_data)

	return {
	"status": "success",
	"feedback_id": feedback_id,
	"message": "Text feedback collected successfully",
	"feedback_stored": True
	}

	except Exception as e:
	logger.error(f"Error collecting feedback: {e}")
	raise HTTPException(status_code=500, detail=f"Error collecting feedback: {str(e)}")

	@rlhf_router.get("/review/{statement_id}")
	async def get_review_interface(statement_id: str):
	"""
	Get a review interface for human feedback collection
	Returns HTML form for statement review
	"""
	try:
	statement_info = feedback_manager.get_statement_for_review(statement_id)
	if not statement_info:
	raise HTTPException(status_code=404, detail="Statement not found")

	# Generate HTML review form
	html_content = generate_review_html(statement_id, statement_info)
	return HTMLResponse(content=html_content)

	except Exception as e:
	logger.error(f"Error getting review interface: {e}")
	raise HTTPException(status_code=500, detail=str(e))

	@rlhf_router.get("/pending-reviews")
	async def get_pending_reviews(limit: int = Query(10, ge=1, le=50)):
	"""
	Get statements that need human review
	"""
	try:
	pending_statements = feedback_manager.get_pending_reviews(limit)
	return {
	"status": "success",
	"pending_reviews": pending_statements,
	"count": len(pending_statements)
	}
	except Exception as e:
	logger.error(f"Error getting pending reviews: {e}")
	raise HTTPException(status_code=500, detail=str(e))

	@rlhf_router.get("/stats")
	async def get_feedback_stats():
	"""
	Get feedback and model training statistics
	"""
	try:
	feedback_stats = feedback_manager.get_feedback_stats()
	model_stats = reward_model.get_model_stats()
	feature_importance = reward_model.get_feature_importance()

	return {
	"status": "success",
	"feedback_stats": feedback_stats,
	"model_stats": model_stats,
	"feature_importance": feature_importance,
	"model_trained": reward_model.is_trained
	}
	except Exception as e:
	logger.error(f"Error getting stats: {e}")
	raise HTTPException(status_code=500, detail=str(e))

	@rlhf_router.post("/retrain")
	async def manual_retrain():
	"""
	Get current feedback patterns (no retraining needed for text-based model)
	"""
	try:
	feedback_patterns = reward_model.get_feedback_patterns()
	return {
	"status": "success",
	"message": "Text-based model doesn't require retraining",
	"feedback_patterns": feedback_patterns
	}
	except Exception as e:
	logger.error(f"Error getting feedback patterns: {e}")
	raise HTTPException(status_code=500, detail=str(e))

	@rlhf_router.get("/model-info")
	async def get_model_info():
	"""
	Get information about the text-based reward model
	"""
	try:
	feedback_patterns = reward_model.get_feedback_patterns()

	return {
	"status": "success",
	"model_version": reward_model.model_version,
	"model_type": "text-based",
	"feedback_collected": feedback_patterns.get("total_feedback", 0),
	"feedback_patterns": feedback_patterns
	}
	except Exception as e:
	logger.error(f"Error getting model info: {e}")
	raise HTTPException(status_code=500, detail=str(e))

	def generate_review_html(statement_id: str, statement_info: Dict) -> str:
	"""Generate HTML form for statement review"""
	return f"""
	<!DOCTYPE html>
	<html>
	<head>
	<title>FinRyver - Statement Review</title>
	<style>
	body {{ font-family: Arial, sans-serif; margin: 40px; }}
	.form-group {{ margin: 15px 0; }}
	label {{ display: block; margin-bottom: 5px; font-weight: bold; }}
	input, select, textarea {{ width: 100%; padding: 8px; margin-bottom: 10px; }}
	.rating {{ display: flex; gap: 10px; }}
	.rating input {{ width: auto; }}
	button {{ background-color: #007bff; color: white; padding: 10px 20px; border: none; cursor: pointer; }}
	.statement-info {{ background-color: #f8f9fa; padding: 15px; margin-bottom: 20px; border-radius: 5px; }}
	</style>
	</head>
	<body>
	<h1>Financial Statement Review</h1>

	<div class="statement-info">
	<h3>Statement Information</h3>
	<p><strong>Statement ID:</strong> {statement_id}</p>
	<p><strong>Type:</strong> {statement_info.get('statement_type', 'Unknown')}</p>
	<p><strong>Generated:</strong> {statement_info.get('timestamp', 'Unknown')}</p>
	<p><strong>File:</strong> {statement_info.get('file_path', 'Unknown')}</p>
	</div>

	<form action="/rlhf/feedback" method="post">
	<input type="hidden" name="statement_id" value="{statement_id}">

	<div class="form-group">
	<label>Reviewer ID (optional):</label>
	<input type="text" name="reviewer_id" placeholder="Enter your identifier">
	</div>

	<h3>Detailed Feedback</h3>

	<div class="form-group">
	<label>Specific Errors (required):</label>
	<textarea name="specific_errors" rows="4" placeholder="Describe any specific errors found..." required></textarea>
	</div>

	<div class="form-group">
	<label>Missing Items (if any):</label>
	<textarea name="missing_items" rows="3" placeholder="List any missing items or information..."></textarea>
	</div>

	<div class="form-group">
	<label>Improvement Suggestions:</label>
	<textarea name="improvement_suggestions" rows="3" placeholder="Suggest improvements..."></textarea>
	</div>

	<div class="form-group">
	<label>Complexity Level:</label>
	<select name="complexity_level">
	<option value="low">Low</option>
	<option value="medium" selected>Medium</option>
	<option value="high">High</option>
	</select>
	</div>

	<div class="form-group">
	<label>
	<input type="checkbox" name="would_accept_for_audit" value="true">
	Would accept this statement for audit/compliance purposes
	</label>
	</div>

	<button type="submit">Submit Feedback</button>
	</form>
	</body>
	</html>
	"""