"""HR Report Generator API - HuggingFace Spaces Backend. This Flask API serves as the backend for the HR Report Generator. It validates origins against allowed domains stored in HF secrets. """ import os from datetime import datetime from pathlib import Path from functools import wraps from flask import Flask, request, jsonify from flask_cors import CORS from dotenv import load_dotenv # Load environment variables load_dotenv() # Import our modules from src.config import settings from src.knowledge.vector_store import FAISSVectorStore from src.rag.synthesizer import ReportSynthesizer app = Flask(__name__) # ============================================ # CORS and Origin Validation # ============================================ # Allowed origins from environment (set in HF Secrets) # Format: comma-separated list of allowed origins ALLOWED_ORIGINS_STR = os.getenv("ALLOWED_ORIGINS", "") ALLOWED_ORIGINS = [ origin.strip() for origin in ALLOWED_ORIGINS_STR.split(",") if origin.strip() ] # Default allowed origins if none specified if not ALLOWED_ORIGINS: ALLOWED_ORIGINS = [ "https://checkin.hillsideprimarycare.com", "https://hsmg.netlify.app", "http://localhost:3000", "http://localhost:5500", "http://127.0.0.1:5500", ] # Enable CORS with specific origins CORS(app, resources={ r"/api/*": { "origins": ALLOWED_ORIGINS, "methods": ["GET", "POST", "OPTIONS"], "allow_headers": ["Content-Type", "Authorization"], } }) def validate_origin(f): """Decorator to validate request origin against allowed list.""" @wraps(f) def decorated_function(*args, **kwargs): origin = request.headers.get("Origin", "") referer = request.headers.get("Referer", "") # Check if origin or referer matches allowed origins is_allowed = False for allowed in ALLOWED_ORIGINS: if origin.startswith(allowed) or referer.startswith(allowed): is_allowed = True break if not is_allowed and origin: # Allow requests without origin (e.g., curl) return jsonify({ "success": False, "error": "Origin not allowed" }), 403 return f(*args, **kwargs) return decorated_function # ============================================ # Configuration # ============================================ def get_active_model(): """Read the active model from endpoints.txt.""" endpoints_file = Path("endpoints.txt") default_model = os.getenv("LLM_MODEL", "google/gemma-2-9b-it:free") if not endpoints_file.exists(): return default_model try: content = endpoints_file.read_text() for line in content.splitlines(): line = line.strip() # Skip comments and empty lines if line and not line.startswith("#"): return line except Exception: pass return default_model # Document type templates DOCUMENT_TEMPLATES = { 'Memorandum': { 'title': 'MEMORANDUM FOR PERSONNEL FILE', 'sections': [ 'Purpose of Document', 'Incident Background', 'Policy References', 'Employee Discussion', 'Corrective Measures', 'Conclusion' ] }, 'Termination Letter': { 'title': 'EMPLOYEE TERMINATION LETTER', 'sections': [ 'Purpose of Document', 'Employment History', 'Reason for Termination', 'Policy Violations', 'Previous Warnings', 'Final Pay and Benefits', 'Return of Company Property' ] }, 'Written Disciplinary Action': { 'title': 'WRITTEN DISCIPLINARY ACTION', 'sections': [ 'Purpose of Document', 'Incident Details', 'Policy References', 'Previous Coaching / Warnings', 'Corrective Action Required', 'Employee Acknowledgment' ] }, '90 Day Performance Evaluation': { 'title': '90-DAY PERFORMANCE EVALUATION', 'sections': [ 'Evaluation Period', 'Performance Summary', 'Areas of Strength', 'Areas for Improvement', 'Goals for Next Period', 'Recommendation' ] } } # ============================================ # API Routes # ============================================ @app.route('/') def index(): """Health check and API info.""" return jsonify({ "status": "ok", "service": "HR Report Generator API", "version": "1.0.0", "allowed_origins": ALLOWED_ORIGINS, "active_model": get_active_model(), }) @app.route('/api/health') def health(): """Health check endpoint.""" return jsonify({"status": "healthy"}) @app.route('/api/generate', methods=['POST', 'OPTIONS']) @validate_origin def generate_report(): """Generate an HR document based on type.""" if request.method == 'OPTIONS': return '', 204 try: data = request.json doc_type = data.get('doc_type', 'Memorandum') employee_name = data.get('employee_name', '') date_from = data.get('date_from', '') date_to = data.get('date_to', '') reason = data.get('reason', '') additional_notes = data.get('additional_notes', '') if not all([employee_name, date_from, reason]): return jsonify({'success': False, 'error': 'Missing required fields'}), 400 # Format date range if date_to and date_to != date_from: incident_date_range = f"{date_from} to {date_to}" else: incident_date_range = date_from # Get document template info template_info = DOCUMENT_TEMPLATES.get(doc_type, DOCUMENT_TEMPLATES['Memorandum']) # Get active model from endpoints.txt model_name = get_active_model() # Build the synthesizer input synth_input = { 'employee_name': employee_name, 'incident_date_range': incident_date_range, 'incident_reason': additional_notes or reason, 'violation_type': reason, 'report_date': datetime.now().strftime('%Y-%m-%d'), 'doc_type': doc_type, 'doc_title': template_info['title'], 'required_sections': template_info['sections'], } # Generate the report synthesizer = ReportSynthesizer(model_name=model_name) result = synthesizer.synthesize_from_dict(synth_input) if not result.success: return jsonify({'success': False, 'error': result.error}), 500 return jsonify({ 'success': True, 'markdown': result.markdown_report, 'validation': { 'is_valid': True, 'errors': [], 'warnings': [], }, 'sources': result.retrieval_context.sources_used, 'model_used': model_name, }) except Exception as e: import traceback traceback.print_exc() return jsonify({'success': False, 'error': str(e)}), 500 @app.route('/api/upload-warnings', methods=['POST', 'OPTIONS']) @validate_origin def upload_warnings(): """Upload previous warning/coaching documents and extract text with PyMuPDF.""" if request.method == 'OPTIONS': return '', 204 if 'files' not in request.files: return jsonify({'success': False, 'error': 'No files provided'}), 400 from werkzeug.utils import secure_filename files = request.files.getlist('files') uploaded = [] processed = [] upload_dir = Path('data/uploads') upload_dir.mkdir(parents=True, exist_ok=True) # Try to import PDF extractor extractor_available = False extractor = None try: from src.document_processor.converter import PDFExtractor extractor = PDFExtractor() extractor_available = True except ImportError: pass for file in files: if file.filename and file.filename.endswith('.pdf'): filename = secure_filename(file.filename) filepath = upload_dir / filename file.save(filepath) uploaded.append(filename) # Extract text with PyMuPDF if available if extractor_available and extractor: try: result = extractor.extract_text(filepath) if result['success']: processed.append({ 'filename': filename, 'text_length': len(result['text']), 'page_count': result['page_count'], 'success': True }) else: processed.append({ 'filename': filename, 'error': result['error'], 'success': False }) except Exception as e: processed.append({ 'filename': filename, 'error': str(e), 'success': False }) return jsonify({ 'success': True, 'uploaded': uploaded, 'processed': processed, 'extractor_available': extractor_available }) @app.route('/api/status') @validate_origin def get_status(): """Get system status.""" try: vector_store = FAISSVectorStore() loaded = vector_store.load() return jsonify({ 'knowledge_base': { 'loaded': loaded, 'chunks': vector_store.size if loaded else 0, 'sources': vector_store.get_sources() if loaded else [], }, 'active_model': get_active_model(), 'allowed_origins': ALLOWED_ORIGINS, }) except Exception as e: return jsonify({ 'knowledge_base': {'loaded': False, 'chunks': 0, 'sources': []}, 'error': str(e), }) @app.route('/api/config') def get_config(): """Get public configuration (no secrets).""" return jsonify({ 'document_types': list(DOCUMENT_TEMPLATES.keys()), 'active_model': get_active_model(), }) # ============================================ # Main # ============================================ if __name__ == '__main__': # Ensure directories exist settings.ensure_directories() print("=" * 50) print("HR Report Generator API") print("=" * 50) print(f"Active Model: {get_active_model()}") print(f"Allowed Origins: {ALLOWED_ORIGINS}") print("=" * 50) app.run(host='0.0.0.0', port=7860, debug=True)