Luigi commited on
Commit
bd12f6b
·
1 Parent(s): bdd1478

use more thread for inference

Browse files
Files changed (1) hide show
  1. app.py +1 -1
app.py CHANGED
@@ -91,7 +91,7 @@ def update_llm(size, model_file, clip_file):
91
  if (model_cache['size'], model_cache['model_file'], model_cache['clip_file']) != (size, model_file, clip_file):
92
  mf, cf = ensure_weights(size, model_file, clip_file)
93
  handler = SmolVLM2ChatHandler(clip_model_path=cf, verbose=False)
94
- llm = Llama(model_path=mf, chat_handler=handler, n_ctx=1024, verbose=False)
95
  model_cache.update({'size': size, 'model_file': mf, 'clip_file': cf, 'llm': llm})
96
  return None # no UI output
97
 
 
91
  if (model_cache['size'], model_cache['model_file'], model_cache['clip_file']) != (size, model_file, clip_file):
92
  mf, cf = ensure_weights(size, model_file, clip_file)
93
  handler = SmolVLM2ChatHandler(clip_model_path=cf, verbose=False)
94
+ llm = Llama(model_path=mf, chat_handler=handler, n_ctx=1024, verbose=False, n_threads=min(2, os.cpu_count()), )
95
  model_cache.update({'size': size, 'model_file': mf, 'clip_file': cf, 'llm': llm})
96
  return None # no UI output
97