Spaces:

sonuprasad23
/

hrbot

Sleeping

hrbot / app.py

Sonu Prasad

integrated dockling

3cf9b4f 2 months ago

11.1 kB

	"""HR Report Generator API - HuggingFace Spaces Backend.

	This Flask API serves as the backend for the HR Report Generator.
	It validates origins against allowed domains stored in HF secrets.
	"""

	import os
	from datetime import datetime
	from pathlib import Path
	from functools import wraps

	from flask import Flask, request, jsonify
	from flask_cors import CORS
	from dotenv import load_dotenv

	# Load environment variables
	load_dotenv()

	# Import our modules
	from src.config import settings
	from src.knowledge.vector_store import FAISSVectorStore
	from src.rag.synthesizer import ReportSynthesizer

	app = Flask(__name__)

	# ============================================
	# CORS and Origin Validation
	# ============================================

	# Allowed origins from environment (set in HF Secrets)
	# Format: comma-separated list of allowed origins
	ALLOWED_ORIGINS_STR = os.getenv("ALLOWED_ORIGINS", "")
	ALLOWED_ORIGINS = [
	origin.strip()
	for origin in ALLOWED_ORIGINS_STR.split(",")
	if origin.strip()
	]

	# Default allowed origins if none specified
	if not ALLOWED_ORIGINS:
	ALLOWED_ORIGINS = [
	"https://checkin.hillsideprimarycare.com",
	"https://hsmg.netlify.app",
	"http://localhost:3000",
	"http://localhost:5500",
	"http://127.0.0.1:5500",
	]

	# Enable CORS with specific origins
	CORS(app, resources={
	r"/api/*": {
	"origins": ALLOWED_ORIGINS,
	"methods": ["GET", "POST", "OPTIONS"],
	"allow_headers": ["Content-Type", "Authorization"],
	}
	})


	def validate_origin(f):
	"""Decorator to validate request origin against allowed list."""
	@wraps(f)
	def decorated_function(args, *kwargs):
	origin = request.headers.get("Origin", "")
	referer = request.headers.get("Referer", "")

	# Check if origin or referer matches allowed origins
	is_allowed = False
	for allowed in ALLOWED_ORIGINS:
	if origin.startswith(allowed) or referer.startswith(allowed):
	is_allowed = True
	break

	if not is_allowed and origin: # Allow requests without origin (e.g., curl)
	return jsonify({
	"success": False,
	"error": "Origin not allowed"
	}), 403

	return f(args, *kwargs)
	return decorated_function


	# ============================================
	# Configuration
	# ============================================

	def get_active_model():
	"""Read the active model from endpoints.txt."""
	endpoints_file = Path("endpoints.txt")
	default_model = os.getenv("LLM_MODEL", "google/gemma-2-9b-it:free")

	if not endpoints_file.exists():
	return default_model

	try:
	content = endpoints_file.read_text()
	for line in content.splitlines():
	line = line.strip()
	# Skip comments and empty lines
	if line and not line.startswith("#"):
	return line
	except Exception:
	pass

	return default_model


	# Document type templates
	DOCUMENT_TEMPLATES = {
	'Memorandum': {
	'title': 'MEMORANDUM FOR PERSONNEL FILE',
	'sections': [
	'Purpose of Document',
	'Incident Background',
	'Policy References',
	'Employee Discussion',
	'Corrective Measures',
	'Conclusion'
	]
	},
	'Termination Letter': {
	'title': 'EMPLOYEE TERMINATION LETTER',
	'sections': [
	'Purpose of Document',
	'Employment History',
	'Reason for Termination',
	'Policy Violations',
	'Previous Warnings',
	'Final Pay and Benefits',
	'Return of Company Property'
	]
	},
	'Written Disciplinary Action': {
	'title': 'WRITTEN DISCIPLINARY ACTION',
	'sections': [
	'Purpose of Document',
	'Incident Details',
	'Policy References',
	'Previous Coaching / Warnings',
	'Corrective Action Required',
	'Employee Acknowledgment'
	]
	},
	'90 Day Performance Evaluation': {
	'title': '90-DAY PERFORMANCE EVALUATION',
	'sections': [
	'Evaluation Period',
	'Performance Summary',
	'Areas of Strength',
	'Areas for Improvement',
	'Goals for Next Period',
	'Recommendation'
	]
	}
	}


	# ============================================
	# API Routes
	# ============================================

	@app.route('/')
	def index():
	"""Health check and API info."""
	return jsonify({
	"status": "ok",
	"service": "HR Report Generator API",
	"version": "1.0.0",
	"allowed_origins": ALLOWED_ORIGINS,
	"active_model": get_active_model(),
	})


	@app.route('/api/health')
	def health():
	"""Health check endpoint."""
	return jsonify({"status": "healthy"})


	@app.route('/api/generate', methods=['POST', 'OPTIONS'])
	@validate_origin
	def generate_report():
	"""Generate an HR document based on type."""
	if request.method == 'OPTIONS':
	return '', 204

	try:
	data = request.json

	doc_type = data.get('doc_type', 'Memorandum')
	employee_name = data.get('employee_name', '')
	date_from = data.get('date_from', '')
	date_to = data.get('date_to', '')
	reason = data.get('reason', '')
	additional_notes = data.get('additional_notes', '')

	if not all([employee_name, date_from, reason]):
	return jsonify({'success': False, 'error': 'Missing required fields'}), 400

	# Format date range
	if date_to and date_to != date_from:
	incident_date_range = f"{date_from} to {date_to}"
	else:
	incident_date_range = date_from

	# Get document template info
	template_info = DOCUMENT_TEMPLATES.get(doc_type, DOCUMENT_TEMPLATES['Memorandum'])

	# Get active model from endpoints.txt
	model_name = get_active_model()

	# Build the synthesizer input
	synth_input = {
	'employee_name': employee_name,
	'incident_date_range': incident_date_range,
	'incident_reason': additional_notes or reason,
	'violation_type': reason,
	'report_date': datetime.now().strftime('%Y-%m-%d'),
	'doc_type': doc_type,
	'doc_title': template_info['title'],
	'required_sections': template_info['sections'],
	}

	# Generate the report
	synthesizer = ReportSynthesizer(model_name=model_name)
	result = synthesizer.synthesize_from_dict(synth_input)

	if not result.success:
	return jsonify({'success': False, 'error': result.error}), 500

	return jsonify({
	'success': True,
	'markdown': result.markdown_report,
	'validation': {
	'is_valid': True,
	'errors': [],
	'warnings': [],
	},
	'sources': result.retrieval_context.sources_used,
	'model_used': model_name,
	})

	except Exception as e:
	import traceback
	traceback.print_exc()
	return jsonify({'success': False, 'error': str(e)}), 500


	@app.route('/api/upload-warnings', methods=['POST', 'OPTIONS'])
	@validate_origin
	def upload_warnings():
	"""Upload previous warning/coaching documents and extract text with PyMuPDF."""
	if request.method == 'OPTIONS':
	return '', 204

	if 'files' not in request.files:
	return jsonify({'success': False, 'error': 'No files provided'}), 400

	from werkzeug.utils import secure_filename

	files = request.files.getlist('files')
	uploaded = []
	processed = []
	upload_dir = Path('data/uploads')
	upload_dir.mkdir(parents=True, exist_ok=True)

	# Try to import PDF extractor
	extractor_available = False
	extractor = None
	try:
	from src.document_processor.converter import PDFExtractor
	extractor = PDFExtractor()
	extractor_available = True
	except ImportError:
	pass

	for file in files:
	if file.filename and file.filename.endswith('.pdf'):
	filename = secure_filename(file.filename)
	filepath = upload_dir / filename
	file.save(filepath)
	uploaded.append(filename)

	# Extract text with PyMuPDF if available
	if extractor_available and extractor:
	try:
	result = extractor.extract_text(filepath)
	if result['success']:
	processed.append({
	'filename': filename,
	'text_length': len(result['text']),
	'page_count': result['page_count'],
	'success': True
	})
	else:
	processed.append({
	'filename': filename,
	'error': result['error'],
	'success': False
	})
	except Exception as e:
	processed.append({
	'filename': filename,
	'error': str(e),
	'success': False
	})

	return jsonify({
	'success': True,
	'uploaded': uploaded,
	'processed': processed,
	'extractor_available': extractor_available
	})


	@app.route('/api/status')
	@validate_origin
	def get_status():
	"""Get system status."""
	try:
	vector_store = FAISSVectorStore()
	loaded = vector_store.load()

	return jsonify({
	'knowledge_base': {
	'loaded': loaded,
	'chunks': vector_store.size if loaded else 0,
	'sources': vector_store.get_sources() if loaded else [],
	},
	'active_model': get_active_model(),
	'allowed_origins': ALLOWED_ORIGINS,
	})
	except Exception as e:
	return jsonify({
	'knowledge_base': {'loaded': False, 'chunks': 0, 'sources': []},
	'error': str(e),
	})


	@app.route('/api/config')
	def get_config():
	"""Get public configuration (no secrets)."""
	return jsonify({
	'document_types': list(DOCUMENT_TEMPLATES.keys()),
	'active_model': get_active_model(),
	})


	# ============================================
	# Main
	# ============================================

	if __name__ == '__main__':
	# Ensure directories exist
	settings.ensure_directories()

	print("=" * 50)
	print("HR Report Generator API")
	print("=" * 50)
	print(f"Active Model: {get_active_model()}")
	print(f"Allowed Origins: {ALLOWED_ORIGINS}")
	print("=" * 50)

	app.run(host='0.0.0.0', port=7860, debug=True)