MarcosFRGames commited on
Commit
34bd15b
verified
1 Parent(s): 7017b88

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -10
app.py CHANGED
@@ -24,11 +24,11 @@ class LLMManager:
24
  self.models_config = models_config
25
  self.executor = ThreadPoolExecutor(max_workers=2)
26
  self.generation_lock = threading.Lock()
27
- self.load_all_models()
28
  self.session = requests.Session()
29
  adapter = requests.adapters.HTTPAdapter(pool_connections=2, pool_maxsize=2)
30
  self.session.mount('http://', adapter)
31
  self.session.mount('https://', adapter)
 
32
 
33
  def load_all_models(self):
34
  """Cargar todos los modelos en RAM"""
@@ -102,19 +102,19 @@ class LLMManager:
102
  """Generar respuesta con modelo espec铆fico"""
103
  if not self.generation_lock.acquire(blocking=False):
104
  return {"error": "Servidor ocupado - Generaci贸n en progreso"}
105
-
106
  try:
107
  model_data = self.get_model(model_name)
108
-
109
  if not model_data or not model_data["loaded"]:
110
  error_msg = f"Modelo {model_name} no cargado"
111
  if model_data and "error" in model_data:
112
  error_msg += f": {model_data['error']}"
113
  return {"error": error_msg}
114
-
115
  result = [None]
116
  exception = [None]
117
-
118
  def generate():
119
  try:
120
  result[0] = model_data["instance"].create_chat_completion(
@@ -123,21 +123,21 @@ class LLMManager:
123
  )
124
  except Exception as e:
125
  exception[0] = e
126
-
127
  gen_thread = threading.Thread(target=generate, daemon=True)
128
  gen_thread.start()
129
  gen_thread.join(timeout=120)
130
-
131
  if gen_thread.is_alive():
132
  return {"error": "Timeout en generaci贸n (120 segundos)"}
133
-
134
  if exception[0]:
135
  raise exception[0]
136
-
137
  result[0]["provider"] = "telechars-ai"
138
  result[0]["model"] = model_name
139
  return result[0]
140
-
141
  finally:
142
  self.generation_lock.release()
143
 
 
24
  self.models_config = models_config
25
  self.executor = ThreadPoolExecutor(max_workers=2)
26
  self.generation_lock = threading.Lock()
 
27
  self.session = requests.Session()
28
  adapter = requests.adapters.HTTPAdapter(pool_connections=2, pool_maxsize=2)
29
  self.session.mount('http://', adapter)
30
  self.session.mount('https://', adapter)
31
+ self.load_all_models()
32
 
33
  def load_all_models(self):
34
  """Cargar todos los modelos en RAM"""
 
102
  """Generar respuesta con modelo espec铆fico"""
103
  if not self.generation_lock.acquire(blocking=False):
104
  return {"error": "Servidor ocupado - Generaci贸n en progreso"}
105
+
106
  try:
107
  model_data = self.get_model(model_name)
108
+
109
  if not model_data or not model_data["loaded"]:
110
  error_msg = f"Modelo {model_name} no cargado"
111
  if model_data and "error" in model_data:
112
  error_msg += f": {model_data['error']}"
113
  return {"error": error_msg}
114
+
115
  result = [None]
116
  exception = [None]
117
+
118
  def generate():
119
  try:
120
  result[0] = model_data["instance"].create_chat_completion(
 
123
  )
124
  except Exception as e:
125
  exception[0] = e
126
+
127
  gen_thread = threading.Thread(target=generate, daemon=True)
128
  gen_thread.start()
129
  gen_thread.join(timeout=120)
130
+
131
  if gen_thread.is_alive():
132
  return {"error": "Timeout en generaci贸n (120 segundos)"}
133
+
134
  if exception[0]:
135
  raise exception[0]
136
+
137
  result[0]["provider"] = "telechars-ai"
138
  result[0]["model"] = model_name
139
  return result[0]
140
+
141
  finally:
142
  self.generation_lock.release()
143