Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,7 +7,7 @@ import requests
|
|
| 7 |
import tempfile
|
| 8 |
import json
|
| 9 |
from concurrent.futures import ThreadPoolExecutor
|
| 10 |
-
import
|
| 11 |
|
| 12 |
app = Flask(__name__)
|
| 13 |
logging.basicConfig(level=logging.INFO)
|
|
@@ -96,37 +96,42 @@ class LLMManager:
|
|
| 96 |
|
| 97 |
def chat_completion(self, model_name, messages, **kwargs):
|
| 98 |
"""Generar respuesta con modelo espec铆fico"""
|
|
|
|
| 99 |
if not self.generation_lock.acquire(blocking=False):
|
| 100 |
return {"error": "Servidor ocupado - Generaci贸n en progreso"}
|
| 101 |
-
|
| 102 |
try:
|
| 103 |
model_data = self.get_model(model_name)
|
| 104 |
-
|
| 105 |
if not model_data or not model_data["loaded"]:
|
| 106 |
error_msg = f"Modelo {model_name} no cargado"
|
| 107 |
if model_data and "error" in model_data:
|
| 108 |
error_msg += f": {model_data['error']}"
|
| 109 |
return {"error": error_msg}
|
| 110 |
-
|
|
|
|
|
|
|
| 111 |
def timeout_handler(signum, frame):
|
| 112 |
raise TimeoutError("Timeout en generaci贸n (120 segundos)")
|
| 113 |
-
|
|
|
|
| 114 |
signal.signal(signal.SIGALRM, timeout_handler)
|
| 115 |
signal.alarm(120)
|
| 116 |
-
|
| 117 |
try:
|
| 118 |
response = model_data["instance"].create_chat_completion(
|
| 119 |
messages=messages,
|
| 120 |
**kwargs
|
| 121 |
)
|
| 122 |
finally:
|
| 123 |
-
signal.alarm(0)
|
| 124 |
|
| 125 |
response["provider"] = "telechars-ai"
|
| 126 |
response["model"] = model_name
|
| 127 |
return response
|
| 128 |
-
|
| 129 |
finally:
|
|
|
|
| 130 |
self.generation_lock.release()
|
| 131 |
|
| 132 |
def get_loaded_models(self):
|
|
|
|
| 7 |
import tempfile
|
| 8 |
import json
|
| 9 |
from concurrent.futures import ThreadPoolExecutor
|
| 10 |
+
import threading
|
| 11 |
|
| 12 |
app = Flask(__name__)
|
| 13 |
logging.basicConfig(level=logging.INFO)
|
|
|
|
| 96 |
|
| 97 |
def chat_completion(self, model_name, messages, **kwargs):
|
| 98 |
"""Generar respuesta con modelo espec铆fico"""
|
| 99 |
+
# Intentar obtener el lock (no bloqueante)
|
| 100 |
if not self.generation_lock.acquire(blocking=False):
|
| 101 |
return {"error": "Servidor ocupado - Generaci贸n en progreso"}
|
| 102 |
+
|
| 103 |
try:
|
| 104 |
model_data = self.get_model(model_name)
|
| 105 |
+
|
| 106 |
if not model_data or not model_data["loaded"]:
|
| 107 |
error_msg = f"Modelo {model_name} no cargado"
|
| 108 |
if model_data and "error" in model_data:
|
| 109 |
error_msg += f": {model_data['error']}"
|
| 110 |
return {"error": error_msg}
|
| 111 |
+
|
| 112 |
+
import signal
|
| 113 |
+
|
| 114 |
def timeout_handler(signum, frame):
|
| 115 |
raise TimeoutError("Timeout en generaci贸n (120 segundos)")
|
| 116 |
+
|
| 117 |
+
# Configurar timeout (120 segundos)
|
| 118 |
signal.signal(signal.SIGALRM, timeout_handler)
|
| 119 |
signal.alarm(120)
|
| 120 |
+
|
| 121 |
try:
|
| 122 |
response = model_data["instance"].create_chat_completion(
|
| 123 |
messages=messages,
|
| 124 |
**kwargs
|
| 125 |
)
|
| 126 |
finally:
|
| 127 |
+
signal.alarm(0) # Cancelar timeout
|
| 128 |
|
| 129 |
response["provider"] = "telechars-ai"
|
| 130 |
response["model"] = model_name
|
| 131 |
return response
|
| 132 |
+
|
| 133 |
finally:
|
| 134 |
+
# Siempre liberar el lock
|
| 135 |
self.generation_lock.release()
|
| 136 |
|
| 137 |
def get_loaded_models(self):
|