Brave1 commited on
Commit
0370e8f
·
verified ·
1 Parent(s): ee43c13

Update app.py

Browse files

cuda device track

Files changed (1) hide show
  1. app.py +9 -2
app.py CHANGED
@@ -4,6 +4,13 @@ from dotenv import load_dotenv
4
  import os
5
 
6
  load_dotenv()
 
 
 
 
 
 
 
7
  # Définir vos tokens
8
  HUGGINGFACE_TOKEN =os.getenv("HUGGINGFACE_TOKEN")
9
  MODEL_NAME = "Qwen/Qwen1.5-1.8B-Chat"
@@ -15,12 +22,12 @@ app = FastAPI()
15
  tokenizer = AutoTokenizer.from_pretrained(
16
  MODEL_NAME,
17
  trust_remote_code=True,
18
- use_auth_token=HUGGINGFACE_TOKEN
19
  )
20
  model = AutoModelForCausalLM.from_pretrained(
21
  MODEL_NAME,
22
  trust_remote_code=True,
23
- use_auth_token=HUGGINGFACE_TOKEN,
24
  device_map="auto",
25
  torch_dtype=torch.float16
26
  ).eval()
 
4
  import os
5
 
6
  load_dotenv()
7
+ if torch.cuda.is_available():
8
+ if hasattr(torch.backends.cuda, "enable_mem_efficient_sdp"):
9
+ torch.backends.cuda.enable_mem_efficient_sdp(False)
10
+ if hasattr(torch.backends.cuda, "enable_flash_sdp"):
11
+ torch.backends.cuda.enable_flash_sdp(False)
12
+ if hasattr(torch.backends.cuda, "enable_math_sdp"):
13
+ torch.backends.cuda.enable_math_sdp(True)
14
  # Définir vos tokens
15
  HUGGINGFACE_TOKEN =os.getenv("HUGGINGFACE_TOKEN")
16
  MODEL_NAME = "Qwen/Qwen1.5-1.8B-Chat"
 
22
  tokenizer = AutoTokenizer.from_pretrained(
23
  MODEL_NAME,
24
  trust_remote_code=True,
25
+ token=HUGGINGFACE_TOKEN
26
  )
27
  model = AutoModelForCausalLM.from_pretrained(
28
  MODEL_NAME,
29
  trust_remote_code=True,
30
+ token=HUGGINGFACE_TOKEN,
31
  device_map="auto",
32
  torch_dtype=torch.float16
33
  ).eval()