Spaces:

credent007
/

easyocr-phi3

Paused

credent007 commited on Apr 10

Commit

cd25762

verified ·

1 Parent(s): 450c6fd

Update llm.py

Files changed (1) hide show

llm.py CHANGED Viewed

@@ -1,25 +1,24 @@
 from huggingface_hub import login
 import os
 login(token=os.getenv("HUGGINGFACE_HUB_TOKEN"))
-import torch
 import asyncio
 from functools import partial
 import time
-from transformers import AutoProcessor, AutoModelForVision2Seq #, BitsAndBytesConfig
-# Quantization config
-# quant_config = BitsAndBytesConfig(load_in_8bit=True)
-model_name="Qwen/Qwen3.5-9B-Base"
-# Load processor
-processor = AutoProcessor.from_pretrained(model_name)
-# Load model (auto device mapping)
-model = AutoModelForVision2Seq.from_pretrained(
     model_name,
-    # quantization_config=quant_config,
     device_map="auto",
-    attn_implementation='sdpa'
 )
 print("CUDA available:", torch.cuda.is_available())

 from huggingface_hub import login
 import os
 login(token=os.getenv("HUGGINGFACE_HUB_TOKEN"))
 import asyncio
 from functools import partial
 import time
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+model_name = "Qwen/Qwen3.5-9B-Base"
+tokenizer = AutoTokenizer.from_pretrained(
+    model_name,
+    trust_remote_code=True
+)
+model = AutoModelForCausalLM.from_pretrained(
     model_name,
     device_map="auto",
+    torch_dtype=torch.float16,
+    trust_remote_code=True
 )
 print("CUDA available:", torch.cuda.is_available())