Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -24,11 +24,11 @@ class LLMManager:
|
|
| 24 |
self.models_config = models_config
|
| 25 |
self.executor = ThreadPoolExecutor(max_workers=2)
|
| 26 |
self.generation_lock = threading.Lock()
|
| 27 |
-
self.load_all_models()
|
| 28 |
self.session = requests.Session()
|
| 29 |
adapter = requests.adapters.HTTPAdapter(pool_connections=2, pool_maxsize=2)
|
| 30 |
self.session.mount('http://', adapter)
|
| 31 |
self.session.mount('https://', adapter)
|
|
|
|
| 32 |
|
| 33 |
def load_all_models(self):
|
| 34 |
"""Cargar todos los modelos en RAM"""
|
|
@@ -102,19 +102,19 @@ class LLMManager:
|
|
| 102 |
"""Generar respuesta con modelo espec铆fico"""
|
| 103 |
if not self.generation_lock.acquire(blocking=False):
|
| 104 |
return {"error": "Servidor ocupado - Generaci贸n en progreso"}
|
| 105 |
-
|
| 106 |
try:
|
| 107 |
model_data = self.get_model(model_name)
|
| 108 |
-
|
| 109 |
if not model_data or not model_data["loaded"]:
|
| 110 |
error_msg = f"Modelo {model_name} no cargado"
|
| 111 |
if model_data and "error" in model_data:
|
| 112 |
error_msg += f": {model_data['error']}"
|
| 113 |
return {"error": error_msg}
|
| 114 |
-
|
| 115 |
result = [None]
|
| 116 |
exception = [None]
|
| 117 |
-
|
| 118 |
def generate():
|
| 119 |
try:
|
| 120 |
result[0] = model_data["instance"].create_chat_completion(
|
|
@@ -123,21 +123,21 @@ class LLMManager:
|
|
| 123 |
)
|
| 124 |
except Exception as e:
|
| 125 |
exception[0] = e
|
| 126 |
-
|
| 127 |
gen_thread = threading.Thread(target=generate, daemon=True)
|
| 128 |
gen_thread.start()
|
| 129 |
gen_thread.join(timeout=120)
|
| 130 |
-
|
| 131 |
if gen_thread.is_alive():
|
| 132 |
return {"error": "Timeout en generaci贸n (120 segundos)"}
|
| 133 |
-
|
| 134 |
if exception[0]:
|
| 135 |
raise exception[0]
|
| 136 |
-
|
| 137 |
result[0]["provider"] = "telechars-ai"
|
| 138 |
result[0]["model"] = model_name
|
| 139 |
return result[0]
|
| 140 |
-
|
| 141 |
finally:
|
| 142 |
self.generation_lock.release()
|
| 143 |
|
|
|
|
| 24 |
self.models_config = models_config
|
| 25 |
self.executor = ThreadPoolExecutor(max_workers=2)
|
| 26 |
self.generation_lock = threading.Lock()
|
|
|
|
| 27 |
self.session = requests.Session()
|
| 28 |
adapter = requests.adapters.HTTPAdapter(pool_connections=2, pool_maxsize=2)
|
| 29 |
self.session.mount('http://', adapter)
|
| 30 |
self.session.mount('https://', adapter)
|
| 31 |
+
self.load_all_models()
|
| 32 |
|
| 33 |
def load_all_models(self):
|
| 34 |
"""Cargar todos los modelos en RAM"""
|
|
|
|
| 102 |
"""Generar respuesta con modelo espec铆fico"""
|
| 103 |
if not self.generation_lock.acquire(blocking=False):
|
| 104 |
return {"error": "Servidor ocupado - Generaci贸n en progreso"}
|
| 105 |
+
|
| 106 |
try:
|
| 107 |
model_data = self.get_model(model_name)
|
| 108 |
+
|
| 109 |
if not model_data or not model_data["loaded"]:
|
| 110 |
error_msg = f"Modelo {model_name} no cargado"
|
| 111 |
if model_data and "error" in model_data:
|
| 112 |
error_msg += f": {model_data['error']}"
|
| 113 |
return {"error": error_msg}
|
| 114 |
+
|
| 115 |
result = [None]
|
| 116 |
exception = [None]
|
| 117 |
+
|
| 118 |
def generate():
|
| 119 |
try:
|
| 120 |
result[0] = model_data["instance"].create_chat_completion(
|
|
|
|
| 123 |
)
|
| 124 |
except Exception as e:
|
| 125 |
exception[0] = e
|
| 126 |
+
|
| 127 |
gen_thread = threading.Thread(target=generate, daemon=True)
|
| 128 |
gen_thread.start()
|
| 129 |
gen_thread.join(timeout=120)
|
| 130 |
+
|
| 131 |
if gen_thread.is_alive():
|
| 132 |
return {"error": "Timeout en generaci贸n (120 segundos)"}
|
| 133 |
+
|
| 134 |
if exception[0]:
|
| 135 |
raise exception[0]
|
| 136 |
+
|
| 137 |
result[0]["provider"] = "telechars-ai"
|
| 138 |
result[0]["model"] = model_name
|
| 139 |
return result[0]
|
| 140 |
+
|
| 141 |
finally:
|
| 142 |
self.generation_lock.release()
|
| 143 |
|