Spaces:

TomatoFull
/

MiniCPM-V-2_6

Paused

finalf0 commited on Aug 7, 2024

Commit

def3d69

1 Parent(s): 5893ca4

add flash-attn

Files changed (2) hide show

app.py CHANGED Viewed

@@ -29,8 +29,6 @@ import modelscope_studio as mgr
 os.system("pip list|grep torch")
 os.system("pip list|grep trans")
 os.system("pip list|grep flash")
-os.system("nvidia-smi")
-os.system("ll /usr/local/cuda*")
 # Argparser
 parser = argparse.ArgumentParser(description='demo')
@@ -46,7 +44,8 @@ if 'int4' in model_path:
     if device == 'mps':
         print('Error: running int4 model with bitsandbytes on Mac is not supported right now.')
         exit()
-    model = AutoModel.from_pretrained(model_path, trust_remote_code=True, attn_implementation='sdpa')
 else:
     if args.multi_gpus:
         from accelerate import load_checkpoint_and_dispatch, init_empty_weights, infer_auto_device_map
@@ -72,7 +71,8 @@ else:
         model = load_checkpoint_and_dispatch(model, model_path, dtype=torch.bfloat16, device_map=device_map)
     else:
-        model = AutoModel.from_pretrained(model_path, trust_remote_code=True, attn_implementation='sdpa', torch_dtype=torch.bfloat16)
         model = model.to(device=device)
 tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 model.eval()

 os.system("pip list|grep torch")
 os.system("pip list|grep trans")
 os.system("pip list|grep flash")
 # Argparser
 parser = argparse.ArgumentParser(description='demo')
     if device == 'mps':
         print('Error: running int4 model with bitsandbytes on Mac is not supported right now.')
         exit()
+    #model = AutoModel.from_pretrained(model_path, trust_remote_code=True, attn_implementation='sdpa')
+    model = AutoModel.from_pretrained(model_path, trust_remote_code=True)
 else:
     if args.multi_gpus:
         from accelerate import load_checkpoint_and_dispatch, init_empty_weights, infer_auto_device_map
         model = load_checkpoint_and_dispatch(model, model_path, dtype=torch.bfloat16, device_map=device_map)
     else:
+        #model = AutoModel.from_pretrained(model_path, trust_remote_code=True, attn_implementation='sdpa', torch_dtype=torch.bfloat16)
+        model = AutoModel.from_pretrained(model_path, trust_remote_code=True, torch_dtype=torch.bfloat16)
         model = model.to(device=device)
 tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 model.eval()

requirements.txt CHANGED Viewed

@@ -3,6 +3,7 @@ torch==2.1.2
 torchvision==0.16.2
 transformers==4.40.2
 sentencepiece==0.1.99
 opencv-python
 decord
 gradio==4.22.0

 torchvision==0.16.2
 transformers==4.40.2
 sentencepiece==0.1.99
+https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.2/flash_attn-2.6.2+cu123torch2.1cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
 opencv-python
 decord
 gradio==4.22.0