from flask import Flask, request, jsonify import json import logging import time from gradio_client import Client from json.decoder import JSONDecodeError import httpx # Import httpx for potential timeout adjustments app = Flask(__name__) # Configure logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) # Initialize JARVIS client - LAZY INITIALIZATION - Initialize as None initially jarvis = None JARVIS_INIT_LOCK = False # Use a simple lock to prevent race conditions in lazy init # Define available models models = [ "JARVIS: 2.1.2", "DeepSeek: V3-0324", # ... (rest of your models list) "Agentica: Deepcoder 14B Preview" ] # API Key API_KEY = "ez8j795dR3zChxvIG9VgcuCTyV9iJRFL" def authenticate_request(request): auth_header = request.headers.get('Authorization') if auth_header is None or not auth_header.startswith('Bearer '): return False provided_api_key = auth_header.split(' ')[1] return provided_api_key == API_KEY def get_jarvis_client(): global jarvis, JARVIS_INIT_LOCK if jarvis is None and not JARVIS_INIT_LOCK: # Check if client is None AND not already initializing JARVIS_INIT_LOCK = True # Set the lock max_retries = 5 # Increased retries retry_delay = 5 for attempt in range(max_retries): try: logger.info(f"Attempting to initialize JARVIS client (attempt {attempt+1}/{max_retries})...") jarvis = Client("hadadrjt/ai", client_kwargs={"timeout": httpx.Timeout(60.0)}) # Increased timeout, specify in client_kwargs logger.info("JARVIS client initialized successfully.") JARVIS_INIT_LOCK = False # Release the lock on success return jarvis # Return the initialized client except JSONDecodeError as e: logger.warning(f"Attempt {attempt + 1}/{max_retries}: JSONDecodeError during JARVIS client initialization: {e}") try: # Try to get the raw response content for debugging response = e.doc # Access the problematic JSON string (if available in exception) logger.warning(f"Problematic response content: {response[:200]}...") # Log first 200 chars except: logger.warning("Could not retrieve problematic response content.") if attempt < max_retries - 1: time.sleep(retry_delay) else: logger.error("Max retries reached. JARVIS client initialization failed due to JSONDecodeError.") JARVIS_INIT_LOCK = False # Release the lock even on failure return None # Indicate failure except Exception as e: logger.error(f"Attempt {attempt + 1}/{max_retries}: Error during JARVIS client initialization: {e}") if attempt < max_retries - 1: time.sleep(retry_delay) else: logger.error("Max retries reached. JARVIS client initialization failed due to general exception.") JARVIS_INIT_LOCK = False # Release lock even on failure return None # Indicate failure JARVIS_INIT_LOCK = False # Ensure lock is released if loop exits without success elif JARVIS_INIT_LOCK: logger.info("JARVIS client initialization is already in progress, waiting...") while JARVIS_INIT_LOCK: # Wait for initialization to complete (or fail) time.sleep(1) # Wait a bit to avoid busy loop return jarvis # Return existing or newly initialized (or None if failed) @app.route("/v1/chat/completions", methods=["POST"]) def chat_completions(): if not authenticate_request(request): return jsonify({"error": {"message": "Invalid API key", "code": "invalid_api_key"}}), 401 current_jarvis = get_jarvis_client() # Get the client (initialize if needed) if current_jarvis is None: return jsonify({"error": {"message": "JARVIS client failed to initialize. API not available.", "code": "jarvis_not_initialized"}}), 500 data = request.json messages = data.get("messages", []) model = data.get("model", "JARVIS: 2.1.2") stream = data.get("stream", False) # Validate messages (same as before) if not isinstance(messages, list): return jsonify({"error": ..., "code": ...}), 400 for message in messages: if not isinstance(message, dict) or 'role' not in message or 'content' not in message: return jsonify({"error": ..., "code": ...}), 400 last_message = messages[-1]["content"] try: current_jarvis.predict(new=model, api_name="/change_model") result = current_jarvis.predict(multi={"text": last_message}, api_name="/api") response_text = result[0][0][1] response_data = { # OpenAI compatible response (same as before) "id": ..., "object": ..., "created": ..., "choices": [{ "index": 0, "message": { "role": "assistant", "content": response_text }, "finish_reason": "stop" }], "usage": { "prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0 } } return jsonify(response_data) except Exception as e: logger.error(f"Error processing request: {str(e)}") return jsonify({"error": {"message": str(e), "code": "jarvis_error"}}), 500 @app.route("/v1/models", methods=["GET"]) def list_models(): if not authenticate_request(request): return jsonify({"error": {"message": "Invalid API key", "code": "invalid_api_key"}}), 401 current_jarvis = get_jarvis_client() # Get the client (initialize if needed) if current_jarvis is None: return jsonify({"error": {"message": "JARVIS client failed to initialize. API not available.", "code": "jarvis_not_initialized"}}), 500 return jsonify({"data": [{"id": model} for model in models], "object": "list"}) if __name__ == "__main__": app.run(host='0.0.0.0', port=7860)