Spaces:

Linly-AI
/

Linly-ChatFlow

Runtime error

yuhaofeng-shiba commited on May 12, 2023

Commit

9f88a9b

1 Parent(s): b28b3e5

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -22,7 +22,7 @@ def init_args():
     args.batch_size = 1
     args.seq_length = 512
     args.world_size = 1
-    args.use_int8 = False
     args.top_p = 0
     args.repetition_penalty_range = 1024
     args.repetition_penalty_slope = 0
@@ -40,11 +40,12 @@ def init_model():
     model = LLaMa(args)
     torch.set_default_tensor_type(torch.FloatTensor)
     model = load_model(model, args.load_model_path)
-    print('done load model.')
     model.eval()
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     model.to(device)
     lm_generation = LmGeneration(model, args.tokenizer)

     args.batch_size = 1
     args.seq_length = 512
     args.world_size = 1
+    args.use_int8 = True
     args.top_p = 0
     args.repetition_penalty_range = 1024
     args.repetition_penalty_slope = 0
     model = LLaMa(args)
     torch.set_default_tensor_type(torch.FloatTensor)
     model = load_model(model, args.load_model_path)
+    print('load model done.')
     model.eval()
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     model.to(device)
+    print('to cuda.')
     lm_generation = LmGeneration(model, args.tokenizer)