Update app.py
Browse files
app.py
CHANGED
|
@@ -3,7 +3,8 @@ import json
|
|
| 3 |
import logging
|
| 4 |
import time
|
| 5 |
from gradio_client import Client
|
| 6 |
-
from json.decoder import JSONDecodeError
|
|
|
|
| 7 |
|
| 8 |
app = Flask(__name__)
|
| 9 |
|
|
@@ -12,60 +13,15 @@ logging.basicConfig(level=logging.INFO,
|
|
| 12 |
format='%(asctime)s - %(levelname)s - %(message)s')
|
| 13 |
logger = logging.getLogger(__name__)
|
| 14 |
|
| 15 |
-
# Initialize JARVIS client
|
| 16 |
-
jarvis = None
|
| 17 |
-
|
| 18 |
-
def initialize_jarvis_client(max_retries=3, retry_delay=5):
|
| 19 |
-
global jarvis # Use the global jarvis variable
|
| 20 |
-
for attempt in range(max_retries):
|
| 21 |
-
try:
|
| 22 |
-
jarvis = Client("hadadrjt/ai")
|
| 23 |
-
logger.info("JARVIS client initialized successfully.")
|
| 24 |
-
return True # Indicate success
|
| 25 |
-
except JSONDecodeError as e: # Catch JSONDecodeError specifically
|
| 26 |
-
logger.warning(f"Attempt {attempt + 1}/{max_retries}: JSONDecodeError during JARVIS client initialization: {e}")
|
| 27 |
-
if attempt < max_retries - 1:
|
| 28 |
-
time.sleep(retry_delay) # Wait before retrying
|
| 29 |
-
else:
|
| 30 |
-
logger.error("Max retries reached. JARVIS client initialization failed due to JSONDecodeError.")
|
| 31 |
-
return False # Indicate failure
|
| 32 |
-
except Exception as e: # Catch other exceptions as well
|
| 33 |
-
logger.error(f"Attempt {attempt + 1}/{max_retries}: Error during JARVIS client initialization: {e}")
|
| 34 |
-
if attempt < max_retries - 1:
|
| 35 |
-
time.sleep(retry_delay)
|
| 36 |
-
else:
|
| 37 |
-
logger.error("Max retries reached. JARVIS client initialization failed due to general exception.")
|
| 38 |
-
return False # Indicate failure
|
| 39 |
-
return False # Return False if loop completes without success
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
if not initialize_jarvis_client(): # Initialize client with retry mechanism
|
| 43 |
-
print("Failed to initialize JARVIS client after multiple retries. API will likely not function correctly.")
|
| 44 |
-
# It's up to you if you want to exit here or let the Flask app start but potentially fail on requests.
|
| 45 |
-
# For now, let's continue and handle potential None jarvis later.
|
| 46 |
-
|
| 47 |
|
| 48 |
# Define available models
|
| 49 |
models = [
|
| 50 |
"JARVIS: 2.1.2",
|
| 51 |
"DeepSeek: V3-0324",
|
| 52 |
-
|
| 53 |
-
"DeepSeek: R1 - Distill Qwen 14B (Reasoning)",
|
| 54 |
-
"DeepSeek: R1 - Distill Qwen 32B (Reasoning)",
|
| 55 |
-
"DeepSeek: R1 - Distill Llama 70B (Reasoning)",
|
| 56 |
-
"Google: Gemini 2.0 Flash Thinking (Reasoning)",
|
| 57 |
-
"Google: Gemini 2.5 Pro",
|
| 58 |
-
"Google: Gemma 3 1B-IT",
|
| 59 |
-
"Google: Gemma 3 4B-IT",
|
| 60 |
-
"Google: Gemma 3 27B-IT",
|
| 61 |
-
"Meta: Llama 3.1 8B Instruct",
|
| 62 |
-
"Meta: Llama 3.2 3B Instruct",
|
| 63 |
-
"Meta: Llama 3.3 70B Instruct",
|
| 64 |
-
"Meta: Llama 4 Maverick 17B 128E Instruct",
|
| 65 |
-
"Meta: Llama 4 Scout 17B 16E Instruct",
|
| 66 |
-
"Qwen: Qwen2.5 VL 3B Instruct",
|
| 67 |
-
"Qwen: Qwen2.5 VL 32B Instruct",
|
| 68 |
-
"Qwen: Qwen2.5 VL 72B Instruct",
|
| 69 |
"Agentica: Deepcoder 14B Preview"
|
| 70 |
]
|
| 71 |
|
|
@@ -79,12 +35,58 @@ def authenticate_request(request):
|
|
| 79 |
provided_api_key = auth_header.split(' ')[1]
|
| 80 |
return provided_api_key == API_KEY
|
| 81 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
@app.route("/v1/chat/completions", methods=["POST"])
|
| 83 |
def chat_completions():
|
| 84 |
if not authenticate_request(request):
|
| 85 |
return jsonify({"error": {"message": "Invalid API key", "code": "invalid_api_key"}}), 401
|
| 86 |
|
| 87 |
-
|
|
|
|
| 88 |
return jsonify({"error": {"message": "JARVIS client failed to initialize. API not available.", "code": "jarvis_not_initialized"}}), 500
|
| 89 |
|
| 90 |
|
|
@@ -93,44 +95,27 @@ def chat_completions():
|
|
| 93 |
model = data.get("model", "JARVIS: 2.1.2")
|
| 94 |
stream = data.get("stream", False)
|
| 95 |
|
| 96 |
-
# Validate messages
|
| 97 |
if not isinstance(messages, list):
|
| 98 |
-
return jsonify({"error":
|
| 99 |
-
|
| 100 |
for message in messages:
|
| 101 |
if not isinstance(message, dict) or 'role' not in message or 'content' not in message:
|
| 102 |
-
return jsonify({"error":
|
| 103 |
|
| 104 |
-
# Extract the last message content
|
| 105 |
last_message = messages[-1]["content"]
|
| 106 |
|
| 107 |
try:
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
result = jarvis.predict(multi={"text": last_message}, api_name="/api")
|
| 111 |
response_text = result[0][0][1]
|
| 112 |
|
| 113 |
-
#
|
| 114 |
-
|
| 115 |
-
"
|
| 116 |
-
"
|
| 117 |
-
"created": int(time.time()),
|
| 118 |
-
"choices": [{
|
| 119 |
-
"index": 0,
|
| 120 |
-
"message": {
|
| 121 |
-
"role": "assistant",
|
| 122 |
-
"content": response_text
|
| 123 |
-
},
|
| 124 |
-
"finish_reason": "stop"
|
| 125 |
-
}],
|
| 126 |
-
"usage": {
|
| 127 |
-
"prompt_tokens": 0,
|
| 128 |
-
"completion_tokens": 0,
|
| 129 |
-
"total_tokens": 0
|
| 130 |
-
}
|
| 131 |
}
|
|
|
|
| 132 |
|
| 133 |
-
return jsonify(response)
|
| 134 |
except Exception as e:
|
| 135 |
logger.error(f"Error processing request: {str(e)}")
|
| 136 |
return jsonify({"error": {"message": str(e), "code": "jarvis_error"}}), 500
|
|
@@ -139,11 +124,13 @@ def chat_completions():
|
|
| 139 |
def list_models():
|
| 140 |
if not authenticate_request(request):
|
| 141 |
return jsonify({"error": {"message": "Invalid API key", "code": "invalid_api_key"}}), 401
|
| 142 |
-
if jarvis is None: # Check if jarvis client is initialized
|
| 143 |
-
return jsonify({"error": {"message": "JARVIS client failed to initialize. API not available.", "code": "jarvis_not_initialized"}}), 500
|
| 144 |
|
|
|
|
|
|
|
|
|
|
| 145 |
|
| 146 |
return jsonify({"data": [{"id": model} for model in models], "object": "list"})
|
| 147 |
|
|
|
|
| 148 |
if __name__ == "__main__":
|
| 149 |
app.run(host='0.0.0.0', port=7860)
|
|
|
|
| 3 |
import logging
|
| 4 |
import time
|
| 5 |
from gradio_client import Client
|
| 6 |
+
from json.decoder import JSONDecodeError
|
| 7 |
+
import httpx # Import httpx for potential timeout adjustments
|
| 8 |
|
| 9 |
app = Flask(__name__)
|
| 10 |
|
|
|
|
| 13 |
format='%(asctime)s - %(levelname)s - %(message)s')
|
| 14 |
logger = logging.getLogger(__name__)
|
| 15 |
|
| 16 |
+
# Initialize JARVIS client - LAZY INITIALIZATION - Initialize as None initially
|
| 17 |
+
jarvis = None
|
| 18 |
+
JARVIS_INIT_LOCK = False # Use a simple lock to prevent race conditions in lazy init
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
# Define available models
|
| 21 |
models = [
|
| 22 |
"JARVIS: 2.1.2",
|
| 23 |
"DeepSeek: V3-0324",
|
| 24 |
+
# ... (rest of your models list)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
"Agentica: Deepcoder 14B Preview"
|
| 26 |
]
|
| 27 |
|
|
|
|
| 35 |
provided_api_key = auth_header.split(' ')[1]
|
| 36 |
return provided_api_key == API_KEY
|
| 37 |
|
| 38 |
+
|
| 39 |
+
def get_jarvis_client():
|
| 40 |
+
global jarvis, JARVIS_INIT_LOCK
|
| 41 |
+
if jarvis is None and not JARVIS_INIT_LOCK: # Check if client is None AND not already initializing
|
| 42 |
+
JARVIS_INIT_LOCK = True # Set the lock
|
| 43 |
+
max_retries = 5 # Increased retries
|
| 44 |
+
retry_delay = 5
|
| 45 |
+
for attempt in range(max_retries):
|
| 46 |
+
try:
|
| 47 |
+
logger.info(f"Attempting to initialize JARVIS client (attempt {attempt+1}/{max_retries})...")
|
| 48 |
+
jarvis = Client("hadadrjt/ai", client_kwargs={"timeout": httpx.Timeout(60.0)}) # Increased timeout, specify in client_kwargs
|
| 49 |
+
logger.info("JARVIS client initialized successfully.")
|
| 50 |
+
JARVIS_INIT_LOCK = False # Release the lock on success
|
| 51 |
+
return jarvis # Return the initialized client
|
| 52 |
+
except JSONDecodeError as e:
|
| 53 |
+
logger.warning(f"Attempt {attempt + 1}/{max_retries}: JSONDecodeError during JARVIS client initialization: {e}")
|
| 54 |
+
try: # Try to get the raw response content for debugging
|
| 55 |
+
response = e.doc # Access the problematic JSON string (if available in exception)
|
| 56 |
+
logger.warning(f"Problematic response content: {response[:200]}...") # Log first 200 chars
|
| 57 |
+
except:
|
| 58 |
+
logger.warning("Could not retrieve problematic response content.")
|
| 59 |
+
|
| 60 |
+
if attempt < max_retries - 1:
|
| 61 |
+
time.sleep(retry_delay)
|
| 62 |
+
else:
|
| 63 |
+
logger.error("Max retries reached. JARVIS client initialization failed due to JSONDecodeError.")
|
| 64 |
+
JARVIS_INIT_LOCK = False # Release the lock even on failure
|
| 65 |
+
return None # Indicate failure
|
| 66 |
+
except Exception as e:
|
| 67 |
+
logger.error(f"Attempt {attempt + 1}/{max_retries}: Error during JARVIS client initialization: {e}")
|
| 68 |
+
if attempt < max_retries - 1:
|
| 69 |
+
time.sleep(retry_delay)
|
| 70 |
+
else:
|
| 71 |
+
logger.error("Max retries reached. JARVIS client initialization failed due to general exception.")
|
| 72 |
+
JARVIS_INIT_LOCK = False # Release lock even on failure
|
| 73 |
+
return None # Indicate failure
|
| 74 |
+
JARVIS_INIT_LOCK = False # Ensure lock is released if loop exits without success
|
| 75 |
+
|
| 76 |
+
elif JARVIS_INIT_LOCK:
|
| 77 |
+
logger.info("JARVIS client initialization is already in progress, waiting...")
|
| 78 |
+
while JARVIS_INIT_LOCK: # Wait for initialization to complete (or fail)
|
| 79 |
+
time.sleep(1) # Wait a bit to avoid busy loop
|
| 80 |
+
|
| 81 |
+
return jarvis # Return existing or newly initialized (or None if failed)
|
| 82 |
+
|
| 83 |
@app.route("/v1/chat/completions", methods=["POST"])
|
| 84 |
def chat_completions():
|
| 85 |
if not authenticate_request(request):
|
| 86 |
return jsonify({"error": {"message": "Invalid API key", "code": "invalid_api_key"}}), 401
|
| 87 |
|
| 88 |
+
current_jarvis = get_jarvis_client() # Get the client (initialize if needed)
|
| 89 |
+
if current_jarvis is None:
|
| 90 |
return jsonify({"error": {"message": "JARVIS client failed to initialize. API not available.", "code": "jarvis_not_initialized"}}), 500
|
| 91 |
|
| 92 |
|
|
|
|
| 95 |
model = data.get("model", "JARVIS: 2.1.2")
|
| 96 |
stream = data.get("stream", False)
|
| 97 |
|
| 98 |
+
# Validate messages (same as before)
|
| 99 |
if not isinstance(messages, list):
|
| 100 |
+
return jsonify({"error": ..., "code": ...}), 400
|
|
|
|
| 101 |
for message in messages:
|
| 102 |
if not isinstance(message, dict) or 'role' not in message or 'content' not in message:
|
| 103 |
+
return jsonify({"error": ..., "code": ...}), 400
|
| 104 |
|
|
|
|
| 105 |
last_message = messages[-1]["content"]
|
| 106 |
|
| 107 |
try:
|
| 108 |
+
current_jarvis.predict(new=model, api_name="/change_model")
|
| 109 |
+
result = current_jarvis.predict(multi={"text": last_message}, api_name="/api")
|
|
|
|
| 110 |
response_text = result[0][0][1]
|
| 111 |
|
| 112 |
+
response_data = { # OpenAI compatible response (same as before)
|
| 113 |
+
"id": ..., "object": ..., "created": ...,
|
| 114 |
+
"choices": [{ "index": 0, "message": { "role": "assistant", "content": response_text }, "finish_reason": "stop" }],
|
| 115 |
+
"usage": { "prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0 }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
}
|
| 117 |
+
return jsonify(response_data)
|
| 118 |
|
|
|
|
| 119 |
except Exception as e:
|
| 120 |
logger.error(f"Error processing request: {str(e)}")
|
| 121 |
return jsonify({"error": {"message": str(e), "code": "jarvis_error"}}), 500
|
|
|
|
| 124 |
def list_models():
|
| 125 |
if not authenticate_request(request):
|
| 126 |
return jsonify({"error": {"message": "Invalid API key", "code": "invalid_api_key"}}), 401
|
|
|
|
|
|
|
| 127 |
|
| 128 |
+
current_jarvis = get_jarvis_client() # Get the client (initialize if needed)
|
| 129 |
+
if current_jarvis is None:
|
| 130 |
+
return jsonify({"error": {"message": "JARVIS client failed to initialize. API not available.", "code": "jarvis_not_initialized"}}), 500
|
| 131 |
|
| 132 |
return jsonify({"data": [{"id": model} for model in models], "object": "list"})
|
| 133 |
|
| 134 |
+
|
| 135 |
if __name__ == "__main__":
|
| 136 |
app.run(host='0.0.0.0', port=7860)
|