Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,13 +1,13 @@
|
|
| 1 |
from flask import Flask, request, jsonify, Response
|
| 2 |
import os
|
| 3 |
import logging
|
|
|
|
| 4 |
import time
|
| 5 |
from llama_cpp import Llama
|
| 6 |
import requests
|
| 7 |
import tempfile
|
| 8 |
import json
|
| 9 |
from concurrent.futures import ThreadPoolExecutor
|
| 10 |
-
import threading
|
| 11 |
|
| 12 |
app = Flask(__name__)
|
| 13 |
logging.basicConfig(level=logging.INFO)
|
|
@@ -96,44 +96,47 @@ class LLMManager:
|
|
| 96 |
|
| 97 |
def chat_completion(self, model_name, messages, **kwargs):
|
| 98 |
"""Generar respuesta con modelo espec铆fico"""
|
| 99 |
-
# Intentar obtener el lock (no bloqueante)
|
| 100 |
if not self.generation_lock.acquire(blocking=False):
|
| 101 |
return {"error": "Servidor ocupado - Generaci贸n en progreso"}
|
| 102 |
-
|
| 103 |
try:
|
| 104 |
model_data = self.get_model(model_name)
|
| 105 |
-
|
| 106 |
if not model_data or not model_data["loaded"]:
|
| 107 |
error_msg = f"Modelo {model_name} no cargado"
|
| 108 |
if model_data and "error" in model_data:
|
| 109 |
error_msg += f": {model_data['error']}"
|
| 110 |
return {"error": error_msg}
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
finally:
|
| 134 |
-
# Siempre liberar el lock
|
| 135 |
self.generation_lock.release()
|
| 136 |
-
|
| 137 |
def get_loaded_models(self):
|
| 138 |
"""Obtener lista de modelos cargados"""
|
| 139 |
loaded = []
|
|
|
|
| 1 |
from flask import Flask, request, jsonify, Response
|
| 2 |
import os
|
| 3 |
import logging
|
| 4 |
+
import threading
|
| 5 |
import time
|
| 6 |
from llama_cpp import Llama
|
| 7 |
import requests
|
| 8 |
import tempfile
|
| 9 |
import json
|
| 10 |
from concurrent.futures import ThreadPoolExecutor
|
|
|
|
| 11 |
|
| 12 |
app = Flask(__name__)
|
| 13 |
logging.basicConfig(level=logging.INFO)
|
|
|
|
| 96 |
|
| 97 |
def chat_completion(self, model_name, messages, **kwargs):
|
| 98 |
"""Generar respuesta con modelo espec铆fico"""
|
|
|
|
| 99 |
if not self.generation_lock.acquire(blocking=False):
|
| 100 |
return {"error": "Servidor ocupado - Generaci贸n en progreso"}
|
| 101 |
+
|
| 102 |
try:
|
| 103 |
model_data = self.get_model(model_name)
|
| 104 |
+
|
| 105 |
if not model_data or not model_data["loaded"]:
|
| 106 |
error_msg = f"Modelo {model_name} no cargado"
|
| 107 |
if model_data and "error" in model_data:
|
| 108 |
error_msg += f": {model_data['error']}"
|
| 109 |
return {"error": error_msg}
|
| 110 |
+
|
| 111 |
+
result = [None]
|
| 112 |
+
exception = [None]
|
| 113 |
+
|
| 114 |
+
def generate():
|
| 115 |
+
try:
|
| 116 |
+
result[0] = model_data["instance"].create_chat_completion(
|
| 117 |
+
messages=messages,
|
| 118 |
+
**kwargs
|
| 119 |
+
)
|
| 120 |
+
except Exception as e:
|
| 121 |
+
exception[0] = e
|
| 122 |
+
|
| 123 |
+
gen_thread = threading.Thread(target=generate)
|
| 124 |
+
gen_thread.start()
|
| 125 |
+
gen_thread.join(timeout=120)
|
| 126 |
+
|
| 127 |
+
if gen_thread.is_alive():
|
| 128 |
+
return {"error": "Timeout en generaci贸n (120 segundos)"}
|
| 129 |
+
|
| 130 |
+
if exception[0]:
|
| 131 |
+
raise exception[0]
|
| 132 |
+
|
| 133 |
+
result[0]["provider"] = "telechars-ai"
|
| 134 |
+
result[0]["model"] = model_name
|
| 135 |
+
return result[0]
|
| 136 |
+
|
| 137 |
finally:
|
|
|
|
| 138 |
self.generation_lock.release()
|
| 139 |
+
|
| 140 |
def get_loaded_models(self):
|
| 141 |
"""Obtener lista de modelos cargados"""
|
| 142 |
loaded = []
|