Spaces:

Brave1
/

maiarhSLM

Sleeping

Brave1 commited on Apr 21, 2025

Commit

0370e8f

verified ·

1 Parent(s): ee43c13

Update app.py

cuda device track

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,6 +4,13 @@ from dotenv import load_dotenv
 import os
 load_dotenv()
 # Définir vos tokens
 HUGGINGFACE_TOKEN =os.getenv("HUGGINGFACE_TOKEN")
 MODEL_NAME = "Qwen/Qwen1.5-1.8B-Chat"
@@ -15,12 +22,12 @@ app = FastAPI()
 tokenizer = AutoTokenizer.from_pretrained(
     MODEL_NAME,
     trust_remote_code=True,
-    use_auth_token=HUGGINGFACE_TOKEN
 )
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
     trust_remote_code=True,
-    use_auth_token=HUGGINGFACE_TOKEN,
     device_map="auto",
     torch_dtype=torch.float16
 ).eval()

 import os
 load_dotenv()
+if torch.cuda.is_available():
+    if hasattr(torch.backends.cuda, "enable_mem_efficient_sdp"):
+        torch.backends.cuda.enable_mem_efficient_sdp(False)
+    if hasattr(torch.backends.cuda, "enable_flash_sdp"):
+        torch.backends.cuda.enable_flash_sdp(False)
+    if hasattr(torch.backends.cuda, "enable_math_sdp"):
+        torch.backends.cuda.enable_math_sdp(True)
 # Définir vos tokens
 HUGGINGFACE_TOKEN =os.getenv("HUGGINGFACE_TOKEN")
 MODEL_NAME = "Qwen/Qwen1.5-1.8B-Chat"
 tokenizer = AutoTokenizer.from_pretrained(
     MODEL_NAME,
     trust_remote_code=True,
+    token=HUGGINGFACE_TOKEN
 )
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
     trust_remote_code=True,
+    token=HUGGINGFACE_TOKEN,
     device_map="auto",
     torch_dtype=torch.float16
 ).eval()