Spaces:
Runtime error
Runtime error
Sean-Case
commited on
Commit
·
9aef340
1
Parent(s):
febdc08
Setting gpu_layer slider to false to prevent accidental misuse
Browse files- .gitignore +1 -0
- app.py +4 -4
.gitignore
CHANGED
|
@@ -3,5 +3,6 @@
|
|
| 3 |
*.pdf
|
| 4 |
*.spec
|
| 5 |
*.toc
|
|
|
|
| 6 |
build/*
|
| 7 |
dist/*
|
|
|
|
| 3 |
*.pdf
|
| 4 |
*.spec
|
| 5 |
*.toc
|
| 6 |
+
bootstrapper.py
|
| 7 |
build/*
|
| 8 |
dist/*
|
app.py
CHANGED
|
@@ -83,7 +83,7 @@ def load_model(model_type, gpu_layers, gpu_config=None, cpu_config=None, torch_d
|
|
| 83 |
if model_type == "Orca Mini":
|
| 84 |
|
| 85 |
gpu_config.update_gpu(gpu_layers)
|
| 86 |
-
cpu_config.update_gpu(
|
| 87 |
|
| 88 |
print("Loading with", cpu_config.gpu_layers, "model layers sent to GPU.")
|
| 89 |
|
|
@@ -91,9 +91,9 @@ def load_model(model_type, gpu_layers, gpu_config=None, cpu_config=None, torch_d
|
|
| 91 |
print(vars(cpu_config))
|
| 92 |
|
| 93 |
try:
|
| 94 |
-
model = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **vars(
|
| 95 |
except:
|
| 96 |
-
model = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **vars(
|
| 97 |
|
| 98 |
tokenizer = []
|
| 99 |
|
|
@@ -220,7 +220,7 @@ with block:
|
|
| 220 |
|
| 221 |
with gr.Tab("Advanced features"):
|
| 222 |
model_choice = gr.Radio(label="Choose a chat model", value="Flan Alpaca", choices = ["Flan Alpaca", "Orca Mini"])
|
| 223 |
-
gpu_layer_choice = gr.Slider(label="Choose number of model layers to send to GPU (please don't change if you don't know what you're doing).", value=0, minimum=0, maximum=6, step = 1, scale = 0)
|
| 224 |
|
| 225 |
gr.HTML(
|
| 226 |
"<center>This app is based on the models Flan Alpaca and Orca Mini. It powered by Gradio, Transformers, Ctransformers, and Langchain.</a></center>"
|
|
|
|
| 83 |
if model_type == "Orca Mini":
|
| 84 |
|
| 85 |
gpu_config.update_gpu(gpu_layers)
|
| 86 |
+
cpu_config.update_gpu(0)
|
| 87 |
|
| 88 |
print("Loading with", cpu_config.gpu_layers, "model layers sent to GPU.")
|
| 89 |
|
|
|
|
| 91 |
print(vars(cpu_config))
|
| 92 |
|
| 93 |
try:
|
| 94 |
+
model = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
|
| 95 |
except:
|
| 96 |
+
model = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **vars(cpu_config)) #**asdict(CtransRunConfig_gpu())
|
| 97 |
|
| 98 |
tokenizer = []
|
| 99 |
|
|
|
|
| 220 |
|
| 221 |
with gr.Tab("Advanced features"):
|
| 222 |
model_choice = gr.Radio(label="Choose a chat model", value="Flan Alpaca", choices = ["Flan Alpaca", "Orca Mini"])
|
| 223 |
+
gpu_layer_choice = gr.Slider(label="Choose number of model layers to send to GPU (please don't change if you don't know what you're doing).", value=0, minimum=0, maximum=6, step = 1, scale = 0, visible=False)
|
| 224 |
|
| 225 |
gr.HTML(
|
| 226 |
"<center>This app is based on the models Flan Alpaca and Orca Mini. It powered by Gradio, Transformers, Ctransformers, and Langchain.</a></center>"
|