Spaces:
Runtime error
Runtime error
use more thread for inference
Browse files
app.py
CHANGED
|
@@ -91,7 +91,7 @@ def update_llm(size, model_file, clip_file):
|
|
| 91 |
if (model_cache['size'], model_cache['model_file'], model_cache['clip_file']) != (size, model_file, clip_file):
|
| 92 |
mf, cf = ensure_weights(size, model_file, clip_file)
|
| 93 |
handler = SmolVLM2ChatHandler(clip_model_path=cf, verbose=False)
|
| 94 |
-
llm = Llama(model_path=mf, chat_handler=handler, n_ctx=1024, verbose=False)
|
| 95 |
model_cache.update({'size': size, 'model_file': mf, 'clip_file': cf, 'llm': llm})
|
| 96 |
return None # no UI output
|
| 97 |
|
|
|
|
| 91 |
if (model_cache['size'], model_cache['model_file'], model_cache['clip_file']) != (size, model_file, clip_file):
|
| 92 |
mf, cf = ensure_weights(size, model_file, clip_file)
|
| 93 |
handler = SmolVLM2ChatHandler(clip_model_path=cf, verbose=False)
|
| 94 |
+
llm = Llama(model_path=mf, chat_handler=handler, n_ctx=1024, verbose=False, n_threads=min(2, os.cpu_count()), )
|
| 95 |
model_cache.update({'size': size, 'model_file': mf, 'clip_file': cf, 'llm': llm})
|
| 96 |
return None # no UI output
|
| 97 |
|