Command-R

Runtime error

minhdang commited on Mar 29, 2024

Commit

76de27f

verified ·

1 Parent(s): 175bb86

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -24,12 +24,12 @@ model_id = "Nexusflow/Starling-LM-7B-beta"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(model_id,
                                              # load_in_8bit=True,
-                                             # quantization_config=nf4_config,
                                              torch_dtype = torch.bfloat16,
                                              # device_map="auto"
                                             )
-replace_linears_in_hf(model)
 model.to('cuda').eval()
 @spaces.GPU
 def generate_response(user_input, max_new_tokens, temperature):

 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(model_id,
                                              # load_in_8bit=True,
+                                             quantization_config=nf4_config,
                                              torch_dtype = torch.bfloat16,
                                              # device_map="auto"
                                             )
+# replace_linears_in_hf(model)
 model.to('cuda').eval()
 @spaces.GPU
 def generate_response(user_input, max_new_tokens, temperature):