credent007 commited on
Commit
cd25762
·
verified ·
1 Parent(s): 450c6fd

Update llm.py

Browse files
Files changed (1) hide show
  1. llm.py +11 -12
llm.py CHANGED
@@ -1,25 +1,24 @@
1
  from huggingface_hub import login
2
  import os
3
  login(token=os.getenv("HUGGINGFACE_HUB_TOKEN"))
4
- import torch
5
  import asyncio
6
  from functools import partial
7
  import time
8
- from transformers import AutoProcessor, AutoModelForVision2Seq #, BitsAndBytesConfig
 
 
 
9
 
10
- # Quantization config
11
- # quant_config = BitsAndBytesConfig(load_in_8bit=True)
12
- model_name="Qwen/Qwen3.5-9B-Base"
13
- # Load processor
14
- processor = AutoProcessor.from_pretrained(model_name)
15
 
16
- # Load model (auto device mapping)
17
- model = AutoModelForVision2Seq.from_pretrained(
18
  model_name,
19
- # quantization_config=quant_config,
20
  device_map="auto",
21
- attn_implementation='sdpa'
22
-
23
  )
24
 
25
  print("CUDA available:", torch.cuda.is_available())
 
1
  from huggingface_hub import login
2
  import os
3
  login(token=os.getenv("HUGGINGFACE_HUB_TOKEN"))
 
4
  import asyncio
5
  from functools import partial
6
  import time
7
+ from transformers import AutoTokenizer, AutoModelForCausalLM
8
+ import torch
9
+
10
+ model_name = "Qwen/Qwen3.5-9B-Base"
11
 
12
+ tokenizer = AutoTokenizer.from_pretrained(
13
+ model_name,
14
+ trust_remote_code=True
15
+ )
 
16
 
17
+ model = AutoModelForCausalLM.from_pretrained(
 
18
  model_name,
 
19
  device_map="auto",
20
+ torch_dtype=torch.float16,
21
+ trust_remote_code=True
22
  )
23
 
24
  print("CUDA available:", torch.cuda.is_available())