MarcosFRGames commited on
Commit
e50dba9
verified
1 Parent(s): 27e596e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -28
app.py CHANGED
@@ -1,13 +1,13 @@
1
  from flask import Flask, request, jsonify, Response
2
  import os
3
  import logging
 
4
  import time
5
  from llama_cpp import Llama
6
  import requests
7
  import tempfile
8
  import json
9
  from concurrent.futures import ThreadPoolExecutor
10
- import threading
11
 
12
  app = Flask(__name__)
13
  logging.basicConfig(level=logging.INFO)
@@ -96,44 +96,47 @@ class LLMManager:
96
 
97
  def chat_completion(self, model_name, messages, **kwargs):
98
  """Generar respuesta con modelo espec铆fico"""
99
- # Intentar obtener el lock (no bloqueante)
100
  if not self.generation_lock.acquire(blocking=False):
101
  return {"error": "Servidor ocupado - Generaci贸n en progreso"}
102
-
103
  try:
104
  model_data = self.get_model(model_name)
105
-
106
  if not model_data or not model_data["loaded"]:
107
  error_msg = f"Modelo {model_name} no cargado"
108
  if model_data and "error" in model_data:
109
  error_msg += f": {model_data['error']}"
110
  return {"error": error_msg}
111
-
112
- import signal
113
-
114
- def timeout_handler(signum, frame):
115
- raise TimeoutError("Timeout en generaci贸n (120 segundos)")
116
-
117
- # Configurar timeout (120 segundos)
118
- signal.signal(signal.SIGALRM, timeout_handler)
119
- signal.alarm(120)
120
-
121
- try:
122
- response = model_data["instance"].create_chat_completion(
123
- messages=messages,
124
- **kwargs
125
- )
126
- finally:
127
- signal.alarm(0) # Cancelar timeout
128
-
129
- response["provider"] = "telechars-ai"
130
- response["model"] = model_name
131
- return response
132
-
 
 
 
 
 
133
  finally:
134
- # Siempre liberar el lock
135
  self.generation_lock.release()
136
-
137
  def get_loaded_models(self):
138
  """Obtener lista de modelos cargados"""
139
  loaded = []
 
1
  from flask import Flask, request, jsonify, Response
2
  import os
3
  import logging
4
+ import threading
5
  import time
6
  from llama_cpp import Llama
7
  import requests
8
  import tempfile
9
  import json
10
  from concurrent.futures import ThreadPoolExecutor
 
11
 
12
  app = Flask(__name__)
13
  logging.basicConfig(level=logging.INFO)
 
96
 
97
  def chat_completion(self, model_name, messages, **kwargs):
98
  """Generar respuesta con modelo espec铆fico"""
 
99
  if not self.generation_lock.acquire(blocking=False):
100
  return {"error": "Servidor ocupado - Generaci贸n en progreso"}
101
+
102
  try:
103
  model_data = self.get_model(model_name)
104
+
105
  if not model_data or not model_data["loaded"]:
106
  error_msg = f"Modelo {model_name} no cargado"
107
  if model_data and "error" in model_data:
108
  error_msg += f": {model_data['error']}"
109
  return {"error": error_msg}
110
+
111
+ result = [None]
112
+ exception = [None]
113
+
114
+ def generate():
115
+ try:
116
+ result[0] = model_data["instance"].create_chat_completion(
117
+ messages=messages,
118
+ **kwargs
119
+ )
120
+ except Exception as e:
121
+ exception[0] = e
122
+
123
+ gen_thread = threading.Thread(target=generate)
124
+ gen_thread.start()
125
+ gen_thread.join(timeout=120)
126
+
127
+ if gen_thread.is_alive():
128
+ return {"error": "Timeout en generaci贸n (120 segundos)"}
129
+
130
+ if exception[0]:
131
+ raise exception[0]
132
+
133
+ result[0]["provider"] = "telechars-ai"
134
+ result[0]["model"] = model_name
135
+ return result[0]
136
+
137
  finally:
 
138
  self.generation_lock.release()
139
+
140
  def get_loaded_models(self):
141
  """Obtener lista de modelos cargados"""
142
  loaded = []