Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,6 +7,7 @@ from llama_cpp import Llama
|
|
| 7 |
import requests
|
| 8 |
import tempfile
|
| 9 |
import json
|
|
|
|
| 10 |
from concurrent.futures import ThreadPoolExecutor
|
| 11 |
|
| 12 |
app = Flask(__name__)
|
|
@@ -53,7 +54,10 @@ class LLMManager:
|
|
| 53 |
use_mlock=True,
|
| 54 |
mmap=True,
|
| 55 |
low_vram=False,
|
| 56 |
-
vocab_only=False
|
|
|
|
|
|
|
|
|
|
| 57 |
)
|
| 58 |
|
| 59 |
os.remove(temp_path)
|
|
@@ -140,6 +144,7 @@ class LLMManager:
|
|
| 140 |
|
| 141 |
finally:
|
| 142 |
self.generation_lock.release()
|
|
|
|
| 143 |
|
| 144 |
def get_loaded_models(self):
|
| 145 |
"""Obtener lista de modelos cargados"""
|
|
|
|
| 7 |
import requests
|
| 8 |
import tempfile
|
| 9 |
import json
|
| 10 |
+
import gc
|
| 11 |
from concurrent.futures import ThreadPoolExecutor
|
| 12 |
|
| 13 |
app = Flask(__name__)
|
|
|
|
| 54 |
use_mlock=True,
|
| 55 |
mmap=True,
|
| 56 |
low_vram=False,
|
| 57 |
+
vocab_only=False,
|
| 58 |
+
verbose=False,
|
| 59 |
+
logits_all=False,
|
| 60 |
+
mul_mat_q=True
|
| 61 |
)
|
| 62 |
|
| 63 |
os.remove(temp_path)
|
|
|
|
| 144 |
|
| 145 |
finally:
|
| 146 |
self.generation_lock.release()
|
| 147 |
+
gc.collect()
|
| 148 |
|
| 149 |
def get_loaded_models(self):
|
| 150 |
"""Obtener lista de modelos cargados"""
|