rphrp1985 commited on
Commit
5d3c589
·
verified ·
1 Parent(s): 511b1cc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -8
app.py CHANGED
@@ -261,13 +261,13 @@ def respond(
261
 
262
 
263
  if model == "Q5_K_M/Qwen3-Coder-Next-Q5_K_M-00001-of-00003.gguf" :
264
- # if llm_model_qwen == None:
265
- llm_model_qwen = Llama(
266
  model_path=f"models/Q5_K_M/Qwen3-Coder-Next-Q5_K_M-00001-of-00003.gguf",
267
  flash_attn=True,
268
  n_gpu_layers=-1,
269
  n_batch=2048, # increase
270
- n_ctx= 8196, # reduce if you don’t need 8k
271
  n_threads=16, # set to your CPU cores
272
  use_mlock=True,
273
  verbose=True,
@@ -284,16 +284,16 @@ def respond(
284
  ]
285
  )
286
  print(x)
287
- delete_llama_model(llm_model_qwen)
288
  yield str(x)
289
  if model=="GLM-4.7-Flash-Q8_0.gguf" :
290
- # if llm_model_glm == None:
291
- llm_model_glm = Llama(
292
  model_path=f"models/{model}",
293
  flash_attn=True,
294
  n_gpu_layers=-1,
295
  n_batch=2048, # increase
296
- n_ctx=8196, # reduce if you don’t need 8k
297
  n_threads=16, # set to your CPU cores
298
  use_mlock=True,
299
  verbose=True,
@@ -308,7 +308,7 @@ def respond(
308
  }
309
  ]
310
  )
311
- delete_llama_model(llm_model_glm)
312
  print(x)
313
  yield str(x)
314
 
 
261
 
262
 
263
  if model == "Q5_K_M/Qwen3-Coder-Next-Q5_K_M-00001-of-00003.gguf" :
264
+ if llm_model_qwen == None:
265
+ llm_model_qwen = Llama(
266
  model_path=f"models/Q5_K_M/Qwen3-Coder-Next-Q5_K_M-00001-of-00003.gguf",
267
  flash_attn=True,
268
  n_gpu_layers=-1,
269
  n_batch=2048, # increase
270
+ n_ctx= 4098, # reduce if you don’t need 8k
271
  n_threads=16, # set to your CPU cores
272
  use_mlock=True,
273
  verbose=True,
 
284
  ]
285
  )
286
  print(x)
287
+ # delete_llama_model(llm_model_qwen)
288
  yield str(x)
289
  if model=="GLM-4.7-Flash-Q8_0.gguf" :
290
+ if llm_model_glm == None:
291
+ llm_model_glm = Llama(
292
  model_path=f"models/{model}",
293
  flash_attn=True,
294
  n_gpu_layers=-1,
295
  n_batch=2048, # increase
296
+ n_ctx=4098, # reduce if you don’t need 8k
297
  n_threads=16, # set to your CPU cores
298
  use_mlock=True,
299
  verbose=True,
 
308
  }
309
  ]
310
  )
311
+ # delete_llama_model(llm_model_glm)
312
  print(x)
313
  yield str(x)
314