hrbot / app.py
Sonu Prasad
integrated dockling
3cf9b4f
"""HR Report Generator API - HuggingFace Spaces Backend.
This Flask API serves as the backend for the HR Report Generator.
It validates origins against allowed domains stored in HF secrets.
"""
import os
from datetime import datetime
from pathlib import Path
from functools import wraps
from flask import Flask, request, jsonify
from flask_cors import CORS
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
# Import our modules
from src.config import settings
from src.knowledge.vector_store import FAISSVectorStore
from src.rag.synthesizer import ReportSynthesizer
app = Flask(__name__)
# ============================================
# CORS and Origin Validation
# ============================================
# Allowed origins from environment (set in HF Secrets)
# Format: comma-separated list of allowed origins
ALLOWED_ORIGINS_STR = os.getenv("ALLOWED_ORIGINS", "")
ALLOWED_ORIGINS = [
origin.strip()
for origin in ALLOWED_ORIGINS_STR.split(",")
if origin.strip()
]
# Default allowed origins if none specified
if not ALLOWED_ORIGINS:
ALLOWED_ORIGINS = [
"https://checkin.hillsideprimarycare.com",
"https://hsmg.netlify.app",
"http://localhost:3000",
"http://localhost:5500",
"http://127.0.0.1:5500",
]
# Enable CORS with specific origins
CORS(app, resources={
r"/api/*": {
"origins": ALLOWED_ORIGINS,
"methods": ["GET", "POST", "OPTIONS"],
"allow_headers": ["Content-Type", "Authorization"],
}
})
def validate_origin(f):
"""Decorator to validate request origin against allowed list."""
@wraps(f)
def decorated_function(*args, **kwargs):
origin = request.headers.get("Origin", "")
referer = request.headers.get("Referer", "")
# Check if origin or referer matches allowed origins
is_allowed = False
for allowed in ALLOWED_ORIGINS:
if origin.startswith(allowed) or referer.startswith(allowed):
is_allowed = True
break
if not is_allowed and origin: # Allow requests without origin (e.g., curl)
return jsonify({
"success": False,
"error": "Origin not allowed"
}), 403
return f(*args, **kwargs)
return decorated_function
# ============================================
# Configuration
# ============================================
def get_active_model():
"""Read the active model from endpoints.txt."""
endpoints_file = Path("endpoints.txt")
default_model = os.getenv("LLM_MODEL", "google/gemma-2-9b-it:free")
if not endpoints_file.exists():
return default_model
try:
content = endpoints_file.read_text()
for line in content.splitlines():
line = line.strip()
# Skip comments and empty lines
if line and not line.startswith("#"):
return line
except Exception:
pass
return default_model
# Document type templates
DOCUMENT_TEMPLATES = {
'Memorandum': {
'title': 'MEMORANDUM FOR PERSONNEL FILE',
'sections': [
'Purpose of Document',
'Incident Background',
'Policy References',
'Employee Discussion',
'Corrective Measures',
'Conclusion'
]
},
'Termination Letter': {
'title': 'EMPLOYEE TERMINATION LETTER',
'sections': [
'Purpose of Document',
'Employment History',
'Reason for Termination',
'Policy Violations',
'Previous Warnings',
'Final Pay and Benefits',
'Return of Company Property'
]
},
'Written Disciplinary Action': {
'title': 'WRITTEN DISCIPLINARY ACTION',
'sections': [
'Purpose of Document',
'Incident Details',
'Policy References',
'Previous Coaching / Warnings',
'Corrective Action Required',
'Employee Acknowledgment'
]
},
'90 Day Performance Evaluation': {
'title': '90-DAY PERFORMANCE EVALUATION',
'sections': [
'Evaluation Period',
'Performance Summary',
'Areas of Strength',
'Areas for Improvement',
'Goals for Next Period',
'Recommendation'
]
}
}
# ============================================
# API Routes
# ============================================
@app.route('/')
def index():
"""Health check and API info."""
return jsonify({
"status": "ok",
"service": "HR Report Generator API",
"version": "1.0.0",
"allowed_origins": ALLOWED_ORIGINS,
"active_model": get_active_model(),
})
@app.route('/api/health')
def health():
"""Health check endpoint."""
return jsonify({"status": "healthy"})
@app.route('/api/generate', methods=['POST', 'OPTIONS'])
@validate_origin
def generate_report():
"""Generate an HR document based on type."""
if request.method == 'OPTIONS':
return '', 204
try:
data = request.json
doc_type = data.get('doc_type', 'Memorandum')
employee_name = data.get('employee_name', '')
date_from = data.get('date_from', '')
date_to = data.get('date_to', '')
reason = data.get('reason', '')
additional_notes = data.get('additional_notes', '')
if not all([employee_name, date_from, reason]):
return jsonify({'success': False, 'error': 'Missing required fields'}), 400
# Format date range
if date_to and date_to != date_from:
incident_date_range = f"{date_from} to {date_to}"
else:
incident_date_range = date_from
# Get document template info
template_info = DOCUMENT_TEMPLATES.get(doc_type, DOCUMENT_TEMPLATES['Memorandum'])
# Get active model from endpoints.txt
model_name = get_active_model()
# Build the synthesizer input
synth_input = {
'employee_name': employee_name,
'incident_date_range': incident_date_range,
'incident_reason': additional_notes or reason,
'violation_type': reason,
'report_date': datetime.now().strftime('%Y-%m-%d'),
'doc_type': doc_type,
'doc_title': template_info['title'],
'required_sections': template_info['sections'],
}
# Generate the report
synthesizer = ReportSynthesizer(model_name=model_name)
result = synthesizer.synthesize_from_dict(synth_input)
if not result.success:
return jsonify({'success': False, 'error': result.error}), 500
return jsonify({
'success': True,
'markdown': result.markdown_report,
'validation': {
'is_valid': True,
'errors': [],
'warnings': [],
},
'sources': result.retrieval_context.sources_used,
'model_used': model_name,
})
except Exception as e:
import traceback
traceback.print_exc()
return jsonify({'success': False, 'error': str(e)}), 500
@app.route('/api/upload-warnings', methods=['POST', 'OPTIONS'])
@validate_origin
def upload_warnings():
"""Upload previous warning/coaching documents and extract text with PyMuPDF."""
if request.method == 'OPTIONS':
return '', 204
if 'files' not in request.files:
return jsonify({'success': False, 'error': 'No files provided'}), 400
from werkzeug.utils import secure_filename
files = request.files.getlist('files')
uploaded = []
processed = []
upload_dir = Path('data/uploads')
upload_dir.mkdir(parents=True, exist_ok=True)
# Try to import PDF extractor
extractor_available = False
extractor = None
try:
from src.document_processor.converter import PDFExtractor
extractor = PDFExtractor()
extractor_available = True
except ImportError:
pass
for file in files:
if file.filename and file.filename.endswith('.pdf'):
filename = secure_filename(file.filename)
filepath = upload_dir / filename
file.save(filepath)
uploaded.append(filename)
# Extract text with PyMuPDF if available
if extractor_available and extractor:
try:
result = extractor.extract_text(filepath)
if result['success']:
processed.append({
'filename': filename,
'text_length': len(result['text']),
'page_count': result['page_count'],
'success': True
})
else:
processed.append({
'filename': filename,
'error': result['error'],
'success': False
})
except Exception as e:
processed.append({
'filename': filename,
'error': str(e),
'success': False
})
return jsonify({
'success': True,
'uploaded': uploaded,
'processed': processed,
'extractor_available': extractor_available
})
@app.route('/api/status')
@validate_origin
def get_status():
"""Get system status."""
try:
vector_store = FAISSVectorStore()
loaded = vector_store.load()
return jsonify({
'knowledge_base': {
'loaded': loaded,
'chunks': vector_store.size if loaded else 0,
'sources': vector_store.get_sources() if loaded else [],
},
'active_model': get_active_model(),
'allowed_origins': ALLOWED_ORIGINS,
})
except Exception as e:
return jsonify({
'knowledge_base': {'loaded': False, 'chunks': 0, 'sources': []},
'error': str(e),
})
@app.route('/api/config')
def get_config():
"""Get public configuration (no secrets)."""
return jsonify({
'document_types': list(DOCUMENT_TEMPLATES.keys()),
'active_model': get_active_model(),
})
# ============================================
# Main
# ============================================
if __name__ == '__main__':
# Ensure directories exist
settings.ensure_directories()
print("=" * 50)
print("HR Report Generator API")
print("=" * 50)
print(f"Active Model: {get_active_model()}")
print(f"Allowed Origins: {ALLOWED_ORIGINS}")
print("=" * 50)
app.run(host='0.0.0.0', port=7860, debug=True)