fix: use correct Qwen2_5_VL class for Qwen2.5-VL model
Browse files- Replace Qwen2VLForConditionalGeneration with Qwen2_5_VLForConditionalGeneration
- Install transformers from source for Qwen2.5-VL support
- Fix model loading architecture mismatch
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
- app.py +4 -4
- requirements.txt +1 -1
app.py
CHANGED
|
@@ -7,7 +7,7 @@ import gradio as gr
|
|
| 7 |
import spaces
|
| 8 |
import torch
|
| 9 |
from PIL import Image
|
| 10 |
-
from transformers import
|
| 11 |
from qwen_vl_utils import process_vision_info
|
| 12 |
|
| 13 |
# Qwen2.5-VL 모델 ID
|
|
@@ -35,10 +35,10 @@ def _extract_json_block(text: str) -> Optional[str]:
|
|
| 35 |
def extract_text_from_image(image: Image.Image) -> str:
|
| 36 |
"""Qwen2-VL로 이미지에서 텍스트 추출"""
|
| 37 |
try:
|
| 38 |
-
# Qwen2-VL 모델 로드
|
| 39 |
-
model =
|
| 40 |
MODEL_ID,
|
| 41 |
-
torch_dtype=
|
| 42 |
device_map="auto"
|
| 43 |
)
|
| 44 |
processor = AutoProcessor.from_pretrained(MODEL_ID)
|
|
|
|
| 7 |
import spaces
|
| 8 |
import torch
|
| 9 |
from PIL import Image
|
| 10 |
+
from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
|
| 11 |
from qwen_vl_utils import process_vision_info
|
| 12 |
|
| 13 |
# Qwen2.5-VL 모델 ID
|
|
|
|
| 35 |
def extract_text_from_image(image: Image.Image) -> str:
|
| 36 |
"""Qwen2-VL로 이미지에서 텍스트 추출"""
|
| 37 |
try:
|
| 38 |
+
# Qwen2.5-VL 모델 로드
|
| 39 |
+
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
| 40 |
MODEL_ID,
|
| 41 |
+
torch_dtype="auto",
|
| 42 |
device_map="auto"
|
| 43 |
)
|
| 44 |
processor = AutoProcessor.from_pretrained(MODEL_ID)
|
requirements.txt
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
gradio>=4.0.0
|
| 2 |
-
transformers
|
| 3 |
torch>=2.1.0
|
| 4 |
torchvision
|
| 5 |
Pillow
|
|
|
|
| 1 |
gradio>=4.0.0
|
| 2 |
+
git+https://github.com/huggingface/transformers
|
| 3 |
torch>=2.1.0
|
| 4 |
torchvision
|
| 5 |
Pillow
|