Spaces:
Sleeping
Sleeping
| """ | |
| Application factory for creating and configuring the Flask app with HuggingFace services. | |
| This approach allows for easier testing and management of application state. | |
| """ | |
| import logging | |
| import os | |
| import time | |
| from dotenv import load_dotenv | |
| from flask import Flask, jsonify, render_template | |
| logger = logging.getLogger(__name__) | |
| def _run_hf_diagnostic_quiet() -> None: | |
| """Run a compact HF diagnostic without verbose prints during tests.""" | |
| hf_token = os.getenv("HF_TOKEN") | |
| if not hf_token: | |
| logger.info("HF_TOKEN not set - skipping HF diagnostic") | |
| return | |
| try: | |
| import requests | |
| from huggingface_hub import InferenceClient, whoami | |
| user_info = whoami() | |
| logger.info("HF API auth ok: %s", user_info.get("name", "unknown")) | |
| client = InferenceClient() | |
| _ = client.feature_extraction("test", model="intfloat/multilingual-e5-large") | |
| api_url = "https://router.huggingface.co/hf-inference/models/intfloat/multilingual-e5-large" | |
| headers = {"Authorization": f"Bearer {hf_token}"} | |
| response = requests.post( | |
| api_url, | |
| headers=headers, | |
| json={"inputs": ["test text"]}, | |
| timeout=10, | |
| ) | |
| logger.info("HF direct HTTP status: %s", response.status_code) | |
| except Exception: | |
| logger.debug("HF diagnostic failed (non-fatal)", exc_info=True) | |
| # Load environment variables from .env file | |
| load_dotenv() | |
| # Run a compact diagnostic at import time (non-blocking) | |
| try: | |
| # Skip HF diagnostic when running tests to avoid network calls | |
| if os.getenv("PYTEST_RUNNING") != "1": | |
| _run_hf_diagnostic_quiet() | |
| except Exception: | |
| logger.debug("Failed to run HF diagnostic at import", exc_info=True) | |
| class InitializationTimeoutError(Exception): | |
| """Custom exception for initialization timeouts.""" | |
| pass | |
| def ensure_hf_processing_on_startup(): | |
| """ | |
| Ensure HF document processing happens on startup when enabled. | |
| This is critical for Hugging Face deployments where the vector store needs to be built on startup. | |
| For HF Spaces, this will run the complete chunking->embedding->storage pipeline. | |
| """ | |
| logging.info(f"[PID {os.getpid()}] Starting HF document processing on startup") | |
| # Check if we should run HF-hosted document processing | |
| enable_hf_processing = os.getenv("ENABLE_HF_PROCESSING", "true").lower() == "true" | |
| enable_hf_services = os.getenv("ENABLE_HF_SERVICES", "false").lower() == "true" | |
| # FORCE HF services when HF_TOKEN is available (same override as config.py and app factory) | |
| hf_token_available = bool(os.getenv("HF_TOKEN")) | |
| if hf_token_available: | |
| logging.info(f"[PID {os.getpid()}] π§ HF_TOKEN detected - FORCING HF services in startup function") | |
| enable_hf_services = True | |
| # Validate HF authentication for HF services | |
| if enable_hf_services or enable_hf_processing: | |
| hf_token = os.getenv("HF_TOKEN") | |
| if not hf_token: | |
| logging.error(f"[PID {os.getpid()}] β CRITICAL: HF_TOKEN not available!") | |
| logging.error(f"[PID {os.getpid()}] π§ HF Services are enabled but authentication is missing") | |
| logging.error(f"[PID {os.getpid()}] π‘ This is a HF Spaces configuration issue that must be fixed") | |
| logging.error(f"[PID {os.getpid()}] π§ ACTION REQUIRED:") | |
| logging.error(f"[PID {os.getpid()}] 1. Go to your HF Space settings") | |
| logging.error(f"[PID {os.getpid()}] 2. Add HF_TOKEN as a repository secret") | |
| logging.error(f"[PID {os.getpid()}] 3. Restart your HF Space") | |
| logging.error(f"[PID {os.getpid()}] β οΈ App will continue but HF services will fail until this is fixed") | |
| else: | |
| logging.info(f"[PID {os.getpid()}] β HF_TOKEN found - HF services should work") | |
| logging.info(f"[PID {os.getpid()}] Startup configuration:") | |
| logging.info(f"[PID {os.getpid()}] - ENABLE_HF_PROCESSING: {enable_hf_processing}") | |
| logging.info(f"[PID {os.getpid()}] - ENABLE_HF_SERVICES: {enable_hf_services}") | |
| if enable_hf_processing: | |
| logging.info(f"[PID {os.getpid()}] π Starting HF-hosted document processing pipeline...") | |
| try: | |
| from scripts.hf_process_documents import run_hf_pipeline | |
| # Log before processing | |
| logging.info(f"[PID {os.getpid()}] π Beginning document chunking and embedding generation...") | |
| start_time = time.time() | |
| result = run_hf_pipeline() | |
| elapsed_time = time.time() - start_time | |
| if result: | |
| # Use logging-format style to avoid long f-strings and keep line length under limits | |
| logging.info( | |
| "[PID %s] β HF document processing pipeline completed successfully in %.2fs", | |
| os.getpid(), | |
| elapsed_time, | |
| ) | |
| else: | |
| logging.warning( | |
| "[PID %s] β οΈ HF processing completed with warnings in %.2fs", | |
| os.getpid(), | |
| elapsed_time, | |
| ) | |
| except Exception as e: | |
| logging.error(f"[PID {os.getpid()}] β HF processing failed: {e}", exc_info=True) | |
| logging.warning(f"[PID {os.getpid()}] Continuing with existing embeddings...") | |
| # Check HF vector database status | |
| if enable_hf_services: | |
| logging.info(f"[PID {os.getpid()}] π Checking HF vector database status...") | |
| logging.info(f"[PID {os.getpid()}] π± HF Services Mode: Persistent vector storage enabled") | |
| try: | |
| from src.vector_store.hf_dataset_store import HFDatasetVectorStore | |
| logging.info(f"[PID {os.getpid()}] π Connecting to HF Dataset vector store...") | |
| hf_store = HFDatasetVectorStore() | |
| # Try to load existing dataset to check status | |
| try: | |
| logging.info(f"[PID {os.getpid()}] π₯ Loading embeddings from HF Dataset...") | |
| documents, embeddings, metadata = hf_store.load_embeddings() | |
| if documents and embeddings: | |
| logging.info(f"[PID {os.getpid()}] β HF Dataset loaded successfully!") | |
| logging.info( | |
| "[PID %s] π Found: %s documents, %s embeddings", | |
| os.getpid(), | |
| len(documents), | |
| len(embeddings), | |
| ) | |
| logging.info( | |
| "[PID %s] π Embedding dimension: %s", | |
| os.getpid(), | |
| len(embeddings[0]) if embeddings else "N/A", | |
| ) | |
| logging.info(f"[PID {os.getpid()}] π Sample metadata: {metadata[0] if metadata else 'None'}") | |
| else: | |
| logging.info(f"[PID {os.getpid()}] π HF Dataset is empty or not found - ready for new data") | |
| except Exception as e: | |
| logging.info(f"[PID {os.getpid()}] π HF Dataset not accessible: {e}") | |
| logging.info(f"[PID {os.getpid()}] π‘ This is normal for new deployments") | |
| except Exception as e: | |
| logging.error(f"[PID {os.getpid()}] β Error checking HF vector database: {e}") | |
| # When HF services are enabled, skip traditional vector database setup | |
| logging.info(f"[PID {os.getpid()}] β HF services enabled - using HF Dataset vector store") | |
| logging.info(f"[PID {os.getpid()}] π― HF Dataset store will be used by RAG pipeline") | |
| return | |
| else: | |
| logging.info(f"[PID {os.getpid()}] π HF services disabled - using local mode") | |
| logging.info(f"[PID {os.getpid()}] π» Local Mode: File-based vector storage") | |
| def create_app( | |
| config_name: str = "default", | |
| initialize_vectordb: bool = True, | |
| initialize_llm: bool = True, | |
| ) -> Flask: | |
| """ | |
| Create the Flask application with HuggingFace services configuration. | |
| Args: | |
| config_name: Configuration name to use (default, test, production) | |
| initialize_vectordb: Whether to initialize vector database connection | |
| initialize_llm: Whether to initialize LLM | |
| Returns: | |
| Configured Flask application | |
| """ | |
| logging.info("=" * 80) | |
| logging.info("π APPLICATION STARTUP INITIATED (HF EDITION)") | |
| logging.info("=" * 80) | |
| # Plain string (no placeholders) to avoid F541 (f-string without placeholders) | |
| logging.info("π Startup Configuration:") | |
| logging.info(f" β’ Config Name: {config_name}") | |
| logging.info(f" β’ Initialize VectorDB: {initialize_vectordb}") | |
| logging.info(f" β’ Initialize LLM: {initialize_llm}") | |
| logging.info(f" β’ Process ID: {os.getpid()}") | |
| logging.info(f" β’ Working Directory: {os.getcwd()}") | |
| # Log environment variables for debugging | |
| logging.info("π§ Environment Configuration:") # Replaced f-string with plain string | |
| env_vars = [ | |
| "ENABLE_HF_SERVICES", | |
| "ENABLE_HF_PROCESSING", | |
| "REBUILD_EMBEDDINGS_ON_START", | |
| "HF_TOKEN", | |
| "OPENROUTER_API_KEY", | |
| "RENDER", | |
| "ENABLE_MEMORY_MONITORING", | |
| ] | |
| for var in env_vars: | |
| value = os.getenv(var, "not_set") | |
| # Mask sensitive values | |
| if "TOKEN" in var or "KEY" in var: | |
| display_value = f"{value[:10]}..." if value != "not_set" and len(value) > 10 else value | |
| else: | |
| display_value = value | |
| logging.info(f" β’ {var}: {display_value}") | |
| logging.info("-" * 80) | |
| try: | |
| # Initialize Render-specific monitoring if running on Render | |
| is_render = os.environ.get("RENDER", "0") == "1" | |
| memory_monitoring_enabled = False | |
| if is_render: | |
| try: | |
| logging.info("π§ Render environment detected - initializing memory monitoring") | |
| from src.utils.memory_utils import setup_memory_monitoring | |
| memory_monitoring_enabled = setup_memory_monitoring() | |
| if memory_monitoring_enabled: | |
| logging.info("β Memory monitoring enabled for Render deployment") | |
| else: | |
| logging.warning("β οΈ Memory monitoring initialization failed") | |
| except Exception as e: | |
| logging.warning(f"β οΈ Memory monitoring setup failed: {e}") | |
| # CRITICAL: ENSURE EMBEDDINGS ON STARTUP FOR HF SPACES | |
| # This must run BEFORE Flask app creation to ensure vector store is ready | |
| if initialize_vectordb: | |
| logging.info("π Running HF startup processing...") | |
| ensure_hf_processing_on_startup() | |
| # CREATE FLASK APP | |
| logging.info("ποΈ Creating Flask application...") | |
| app = Flask(__name__, template_folder="../templates", static_folder="../static") | |
| # CONFIGURE APP | |
| logging.info("βοΈ Configuring Flask application...") | |
| # Load configuration | |
| from src.config import config | |
| app.config.from_object(config[config_name]) | |
| # Configure JSON to handle numpy types | |
| try: | |
| import numpy as np | |
| from flask.json.provider import DefaultJSONProvider | |
| class NumpyJSONProvider(DefaultJSONProvider): | |
| def default(self, obj): | |
| if isinstance(obj, np.integer): | |
| return int(obj) | |
| elif isinstance(obj, np.floating): | |
| return float(obj) | |
| elif isinstance(obj, np.ndarray): | |
| return obj.tolist() | |
| return super().default(obj) | |
| app.json = NumpyJSONProvider(app) | |
| logging.info("β Custom JSON provider configured for numpy types") | |
| except Exception as e: | |
| logging.warning(f"β οΈ Failed to configure custom JSON provider: {e}") | |
| # REGISTER BLUEPRINTS AND ROUTES | |
| logging.info("π Registering application routes...") | |
| # Main routes (home, chat, health, search) | |
| from src.routes.main_routes import main_bp | |
| app.register_blueprint(main_bp) | |
| # Document management routes | |
| from src.document_management.routes import document_bp | |
| app.register_blueprint(document_bp, url_prefix="/api/documents") | |
| # Evaluation dashboard routes | |
| try: | |
| from src.evaluation.dashboard import evaluation_bp | |
| app.register_blueprint(evaluation_bp) | |
| except Exception as e: | |
| logging.warning(f"β οΈ Failed to register evaluation blueprint: {e}") | |
| logging.info("β All routes registered successfully") | |
| # CONFIGURE ERROR HANDLERS | |
| logging.info("π‘οΈ Setting up error handlers...") | |
| def not_found(error): | |
| return render_template("404.html"), 404 | |
| def internal_error(error): | |
| logging.error(f"Internal server error: {error}") | |
| return render_template("500.html"), 500 | |
| def handle_exception(e): | |
| logging.error(f"Unhandled exception: {e}", exc_info=True) | |
| return ( | |
| jsonify( | |
| { | |
| "error": "Internal server error", | |
| "message": "An unexpected error occurred", | |
| } | |
| ), | |
| 500, | |
| ) | |
| logging.info("β Error handlers configured") | |
| # INITIALIZE SERVICES | |
| logging.info("π§ Initializing application services...") | |
| # Check HF services configuration | |
| enable_hf_services = os.getenv("ENABLE_HF_SERVICES", "false").lower() == "true" | |
| hf_token_available = bool(os.getenv("HF_TOKEN")) | |
| # FORCE HF services when HF_TOKEN is available | |
| if hf_token_available: | |
| logging.info("π§ HF_TOKEN detected - FORCING HF services override") | |
| enable_hf_services = True | |
| if enable_hf_services: | |
| logging.info("π€ HuggingFace services enabled") | |
| # Initialize HF services | |
| try: | |
| from src.embedding.hf_embedding_service import HFEmbeddingService | |
| from src.llm.llm_service import ( # Use generic LLM service (OpenRouter) instead of HF | |
| LLMService, | |
| ) | |
| from src.vector_store.hf_dataset_store import HFDatasetVectorStore | |
| logging.info("β HF service modules imported successfully") | |
| # Test HF services initialization | |
| if initialize_llm: | |
| try: | |
| # Initialize LLM service for startup checks; do not keep a local reference | |
| LLMService.from_environment() # This will use OpenRouter | |
| logging.info("β LLM service (OpenRouter) initialized") | |
| except Exception as e: | |
| logging.warning("β οΈ LLM service initialization warning: %s", e) | |
| except Exception as e: | |
| logging.warning("β οΈ LLM service initialization warning: %s", e) | |
| if initialize_vectordb: | |
| try: | |
| # Initialize embedding and dataset store for startup checks; discard references | |
| HFEmbeddingService() | |
| HFDatasetVectorStore() | |
| # intentionally not used in this startup check | |
| logging.info("β HF embedding and vector store services initialized") | |
| except Exception as e: | |
| logging.warning("β οΈ HF vector services initialization warning: %s", e) | |
| except Exception as e: | |
| logging.error(f"β HF services initialization failed: {e}") | |
| logging.error("π§ Check HF_TOKEN configuration and network connectivity") | |
| else: | |
| logging.info("π» Local services mode (HF services disabled)") | |
| # ADD HEALTH CHECK ENDPOINT | |
| def health_check(): | |
| """Health check endpoint for deployment monitoring""" | |
| try: | |
| # Basic health check | |
| status = { | |
| "status": "healthy", | |
| "timestamp": time.time(), | |
| "pid": os.getpid(), | |
| "hf_services": enable_hf_services, | |
| "memory_monitoring": memory_monitoring_enabled, | |
| } | |
| # Add HF token status (without exposing the token) | |
| hf_token = os.getenv("HF_TOKEN") | |
| status["hf_token_configured"] = bool(hf_token) | |
| return jsonify(status), 200 | |
| except Exception as e: | |
| logging.error(f"Health check failed: {e}") | |
| return ( | |
| jsonify( | |
| { | |
| "status": "unhealthy", | |
| "error": str(e), | |
| "timestamp": time.time(), | |
| } | |
| ), | |
| 500, | |
| ) | |
| # APP STARTUP COMPLETE | |
| logging.info("=" * 80) | |
| logging.info("π APPLICATION STARTUP COMPLETED SUCCESSFULLY") | |
| logging.info("=" * 80) | |
| logging.info("π Final Status Summary:") | |
| logging.info(" β’ Flask App: β Created") | |
| logging.info( | |
| " β’ Memory Monitoring: %s", | |
| "β Enabled" if memory_monitoring_enabled else "β Disabled", | |
| ) | |
| logging.info( | |
| " β’ HF Services: %s", | |
| "β Enabled" if enable_hf_services else "β Disabled", | |
| ) | |
| logging.info(" β’ Error Handlers: β Registered") | |
| logging.info(" β’ Health Check: β Available at /health") | |
| logging.info("π Ready to serve requests!") | |
| logging.info("=" * 80) | |
| return app | |
| except Exception as e: | |
| # This is a critical catch-all for any exception during app creation. | |
| # Logging this as a critical error is essential for debugging startup failures. | |
| logging.critical("=" * 80) | |
| logging.critical("π₯ CRITICAL: APPLICATION STARTUP FAILED") | |
| logging.critical("=" * 80) | |
| logging.critical(f"β Error: {e}") | |
| logging.critical("π‘ Check the logs above for detailed error information") | |
| logging.critical("=" * 80, exc_info=True) | |
| # Re-raise the exception to ensure the Gunicorn worker fails loudly | |
| # and the failure is immediately obvious in the logs. | |
| raise | |