LLDDWW Claude commited on
Commit
c5fa8ab
·
1 Parent(s): 664b899

fix: use correct Qwen2_5_VL class for Qwen2.5-VL model

Browse files

- Replace Qwen2VLForConditionalGeneration with Qwen2_5_VLForConditionalGeneration
- Install transformers from source for Qwen2.5-VL support
- Fix model loading architecture mismatch

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (2) hide show
  1. app.py +4 -4
  2. requirements.txt +1 -1
app.py CHANGED
@@ -7,7 +7,7 @@ import gradio as gr
7
  import spaces
8
  import torch
9
  from PIL import Image
10
- from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
11
  from qwen_vl_utils import process_vision_info
12
 
13
  # Qwen2.5-VL 모델 ID
@@ -35,10 +35,10 @@ def _extract_json_block(text: str) -> Optional[str]:
35
  def extract_text_from_image(image: Image.Image) -> str:
36
  """Qwen2-VL로 이미지에서 텍스트 추출"""
37
  try:
38
- # Qwen2-VL 모델 로드
39
- model = Qwen2VLForConditionalGeneration.from_pretrained(
40
  MODEL_ID,
41
- torch_dtype=torch.bfloat16,
42
  device_map="auto"
43
  )
44
  processor = AutoProcessor.from_pretrained(MODEL_ID)
 
7
  import spaces
8
  import torch
9
  from PIL import Image
10
+ from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
11
  from qwen_vl_utils import process_vision_info
12
 
13
  # Qwen2.5-VL 모델 ID
 
35
  def extract_text_from_image(image: Image.Image) -> str:
36
  """Qwen2-VL로 이미지에서 텍스트 추출"""
37
  try:
38
+ # Qwen2.5-VL 모델 로드
39
+ model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
40
  MODEL_ID,
41
+ torch_dtype="auto",
42
  device_map="auto"
43
  )
44
  processor = AutoProcessor.from_pretrained(MODEL_ID)
requirements.txt CHANGED
@@ -1,5 +1,5 @@
1
  gradio>=4.0.0
2
- transformers>=4.37.0
3
  torch>=2.1.0
4
  torchvision
5
  Pillow
 
1
  gradio>=4.0.0
2
+ git+https://github.com/huggingface/transformers
3
  torch>=2.1.0
4
  torchvision
5
  Pillow