Spaces:
Sleeping
Sleeping
| """HR Report Generator API - HuggingFace Spaces Backend. | |
| This Flask API serves as the backend for the HR Report Generator. | |
| It validates origins against allowed domains stored in HF secrets. | |
| """ | |
| import os | |
| from datetime import datetime | |
| from pathlib import Path | |
| from functools import wraps | |
| from flask import Flask, request, jsonify | |
| from flask_cors import CORS | |
| from dotenv import load_dotenv | |
| # Load environment variables | |
| load_dotenv() | |
| # Import our modules | |
| from src.config import settings | |
| from src.knowledge.vector_store import FAISSVectorStore | |
| from src.rag.synthesizer import ReportSynthesizer | |
| app = Flask(__name__) | |
| # ============================================ | |
| # CORS and Origin Validation | |
| # ============================================ | |
| # Allowed origins from environment (set in HF Secrets) | |
| # Format: comma-separated list of allowed origins | |
| ALLOWED_ORIGINS_STR = os.getenv("ALLOWED_ORIGINS", "") | |
| ALLOWED_ORIGINS = [ | |
| origin.strip() | |
| for origin in ALLOWED_ORIGINS_STR.split(",") | |
| if origin.strip() | |
| ] | |
| # Default allowed origins if none specified | |
| if not ALLOWED_ORIGINS: | |
| ALLOWED_ORIGINS = [ | |
| "https://checkin.hillsideprimarycare.com", | |
| "https://hsmg.netlify.app", | |
| "http://localhost:3000", | |
| "http://localhost:5500", | |
| "http://127.0.0.1:5500", | |
| ] | |
| # Enable CORS with specific origins | |
| CORS(app, resources={ | |
| r"/api/*": { | |
| "origins": ALLOWED_ORIGINS, | |
| "methods": ["GET", "POST", "OPTIONS"], | |
| "allow_headers": ["Content-Type", "Authorization"], | |
| } | |
| }) | |
| def validate_origin(f): | |
| """Decorator to validate request origin against allowed list.""" | |
| def decorated_function(*args, **kwargs): | |
| origin = request.headers.get("Origin", "") | |
| referer = request.headers.get("Referer", "") | |
| # Check if origin or referer matches allowed origins | |
| is_allowed = False | |
| for allowed in ALLOWED_ORIGINS: | |
| if origin.startswith(allowed) or referer.startswith(allowed): | |
| is_allowed = True | |
| break | |
| if not is_allowed and origin: # Allow requests without origin (e.g., curl) | |
| return jsonify({ | |
| "success": False, | |
| "error": "Origin not allowed" | |
| }), 403 | |
| return f(*args, **kwargs) | |
| return decorated_function | |
| # ============================================ | |
| # Configuration | |
| # ============================================ | |
| def get_active_model(): | |
| """Read the active model from endpoints.txt.""" | |
| endpoints_file = Path("endpoints.txt") | |
| default_model = os.getenv("LLM_MODEL", "google/gemma-2-9b-it:free") | |
| if not endpoints_file.exists(): | |
| return default_model | |
| try: | |
| content = endpoints_file.read_text() | |
| for line in content.splitlines(): | |
| line = line.strip() | |
| # Skip comments and empty lines | |
| if line and not line.startswith("#"): | |
| return line | |
| except Exception: | |
| pass | |
| return default_model | |
| # Document type templates | |
| DOCUMENT_TEMPLATES = { | |
| 'Memorandum': { | |
| 'title': 'MEMORANDUM FOR PERSONNEL FILE', | |
| 'sections': [ | |
| 'Purpose of Document', | |
| 'Incident Background', | |
| 'Policy References', | |
| 'Employee Discussion', | |
| 'Corrective Measures', | |
| 'Conclusion' | |
| ] | |
| }, | |
| 'Termination Letter': { | |
| 'title': 'EMPLOYEE TERMINATION LETTER', | |
| 'sections': [ | |
| 'Purpose of Document', | |
| 'Employment History', | |
| 'Reason for Termination', | |
| 'Policy Violations', | |
| 'Previous Warnings', | |
| 'Final Pay and Benefits', | |
| 'Return of Company Property' | |
| ] | |
| }, | |
| 'Written Disciplinary Action': { | |
| 'title': 'WRITTEN DISCIPLINARY ACTION', | |
| 'sections': [ | |
| 'Purpose of Document', | |
| 'Incident Details', | |
| 'Policy References', | |
| 'Previous Coaching / Warnings', | |
| 'Corrective Action Required', | |
| 'Employee Acknowledgment' | |
| ] | |
| }, | |
| '90 Day Performance Evaluation': { | |
| 'title': '90-DAY PERFORMANCE EVALUATION', | |
| 'sections': [ | |
| 'Evaluation Period', | |
| 'Performance Summary', | |
| 'Areas of Strength', | |
| 'Areas for Improvement', | |
| 'Goals for Next Period', | |
| 'Recommendation' | |
| ] | |
| } | |
| } | |
| # ============================================ | |
| # API Routes | |
| # ============================================ | |
| def index(): | |
| """Health check and API info.""" | |
| return jsonify({ | |
| "status": "ok", | |
| "service": "HR Report Generator API", | |
| "version": "1.0.0", | |
| "allowed_origins": ALLOWED_ORIGINS, | |
| "active_model": get_active_model(), | |
| }) | |
| def health(): | |
| """Health check endpoint.""" | |
| return jsonify({"status": "healthy"}) | |
| def generate_report(): | |
| """Generate an HR document based on type.""" | |
| if request.method == 'OPTIONS': | |
| return '', 204 | |
| try: | |
| data = request.json | |
| doc_type = data.get('doc_type', 'Memorandum') | |
| employee_name = data.get('employee_name', '') | |
| date_from = data.get('date_from', '') | |
| date_to = data.get('date_to', '') | |
| reason = data.get('reason', '') | |
| additional_notes = data.get('additional_notes', '') | |
| if not all([employee_name, date_from, reason]): | |
| return jsonify({'success': False, 'error': 'Missing required fields'}), 400 | |
| # Format date range | |
| if date_to and date_to != date_from: | |
| incident_date_range = f"{date_from} to {date_to}" | |
| else: | |
| incident_date_range = date_from | |
| # Get document template info | |
| template_info = DOCUMENT_TEMPLATES.get(doc_type, DOCUMENT_TEMPLATES['Memorandum']) | |
| # Get active model from endpoints.txt | |
| model_name = get_active_model() | |
| # Build the synthesizer input | |
| synth_input = { | |
| 'employee_name': employee_name, | |
| 'incident_date_range': incident_date_range, | |
| 'incident_reason': additional_notes or reason, | |
| 'violation_type': reason, | |
| 'report_date': datetime.now().strftime('%Y-%m-%d'), | |
| 'doc_type': doc_type, | |
| 'doc_title': template_info['title'], | |
| 'required_sections': template_info['sections'], | |
| } | |
| # Generate the report | |
| synthesizer = ReportSynthesizer(model_name=model_name) | |
| result = synthesizer.synthesize_from_dict(synth_input) | |
| if not result.success: | |
| return jsonify({'success': False, 'error': result.error}), 500 | |
| return jsonify({ | |
| 'success': True, | |
| 'markdown': result.markdown_report, | |
| 'validation': { | |
| 'is_valid': True, | |
| 'errors': [], | |
| 'warnings': [], | |
| }, | |
| 'sources': result.retrieval_context.sources_used, | |
| 'model_used': model_name, | |
| }) | |
| except Exception as e: | |
| import traceback | |
| traceback.print_exc() | |
| return jsonify({'success': False, 'error': str(e)}), 500 | |
| def upload_warnings(): | |
| """Upload previous warning/coaching documents and extract text with PyMuPDF.""" | |
| if request.method == 'OPTIONS': | |
| return '', 204 | |
| if 'files' not in request.files: | |
| return jsonify({'success': False, 'error': 'No files provided'}), 400 | |
| from werkzeug.utils import secure_filename | |
| files = request.files.getlist('files') | |
| uploaded = [] | |
| processed = [] | |
| upload_dir = Path('data/uploads') | |
| upload_dir.mkdir(parents=True, exist_ok=True) | |
| # Try to import PDF extractor | |
| extractor_available = False | |
| extractor = None | |
| try: | |
| from src.document_processor.converter import PDFExtractor | |
| extractor = PDFExtractor() | |
| extractor_available = True | |
| except ImportError: | |
| pass | |
| for file in files: | |
| if file.filename and file.filename.endswith('.pdf'): | |
| filename = secure_filename(file.filename) | |
| filepath = upload_dir / filename | |
| file.save(filepath) | |
| uploaded.append(filename) | |
| # Extract text with PyMuPDF if available | |
| if extractor_available and extractor: | |
| try: | |
| result = extractor.extract_text(filepath) | |
| if result['success']: | |
| processed.append({ | |
| 'filename': filename, | |
| 'text_length': len(result['text']), | |
| 'page_count': result['page_count'], | |
| 'success': True | |
| }) | |
| else: | |
| processed.append({ | |
| 'filename': filename, | |
| 'error': result['error'], | |
| 'success': False | |
| }) | |
| except Exception as e: | |
| processed.append({ | |
| 'filename': filename, | |
| 'error': str(e), | |
| 'success': False | |
| }) | |
| return jsonify({ | |
| 'success': True, | |
| 'uploaded': uploaded, | |
| 'processed': processed, | |
| 'extractor_available': extractor_available | |
| }) | |
| def get_status(): | |
| """Get system status.""" | |
| try: | |
| vector_store = FAISSVectorStore() | |
| loaded = vector_store.load() | |
| return jsonify({ | |
| 'knowledge_base': { | |
| 'loaded': loaded, | |
| 'chunks': vector_store.size if loaded else 0, | |
| 'sources': vector_store.get_sources() if loaded else [], | |
| }, | |
| 'active_model': get_active_model(), | |
| 'allowed_origins': ALLOWED_ORIGINS, | |
| }) | |
| except Exception as e: | |
| return jsonify({ | |
| 'knowledge_base': {'loaded': False, 'chunks': 0, 'sources': []}, | |
| 'error': str(e), | |
| }) | |
| def get_config(): | |
| """Get public configuration (no secrets).""" | |
| return jsonify({ | |
| 'document_types': list(DOCUMENT_TEMPLATES.keys()), | |
| 'active_model': get_active_model(), | |
| }) | |
| # ============================================ | |
| # Main | |
| # ============================================ | |
| if __name__ == '__main__': | |
| # Ensure directories exist | |
| settings.ensure_directories() | |
| print("=" * 50) | |
| print("HR Report Generator API") | |
| print("=" * 50) | |
| print(f"Active Model: {get_active_model()}") | |
| print(f"Allowed Origins: {ALLOWED_ORIGINS}") | |
| print("=" * 50) | |
| app.run(host='0.0.0.0', port=7860, debug=True) | |