Spaces:
Running
on
Zero
Running
on
Zero
Upgrade to Qwen3-VL-30B-A3B
Browse files- Update transformers>=4.51.0 for Qwen3-VL support
- Use AutoModelForImageTextToText with trust_remote_code
- Qwen3-VL-30B-A3B: MoE model with only 3B active params
- Update UI to reflect new model
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
- app.py +3 -3
- evaluator.py +18 -10
- requirements.txt +1 -1
app.py
CHANGED
|
@@ -7,7 +7,7 @@ AI image quality assessment using:
|
|
| 7 |
- Multi-image comparison
|
| 8 |
- Technical metrics (sharpness, colorfulness, contrast, CLIP)
|
| 9 |
|
| 10 |
-
Powered by
|
| 11 |
"""
|
| 12 |
|
| 13 |
import gradio as gr
|
|
@@ -708,7 +708,7 @@ with gr.Blocks(title="Image Evaluator", css=DARK_CSS, theme=gr.themes.Base()) as
|
|
| 708 |
<div style="text-align: center; padding: 20px 0 30px 0;">
|
| 709 |
<h1 style="color: #fafafa; font-size: 2.2em; font-weight: 700; margin: 0;">Image Evaluator</h1>
|
| 710 |
<p style="color: #71717a; font-size: 1em; margin-top: 8px;">
|
| 711 |
-
AI image quality assessment powered by <span style="color: #3b82f6;">
|
| 712 |
</p>
|
| 713 |
</div>
|
| 714 |
''')
|
|
@@ -850,7 +850,7 @@ with gr.Blocks(title="Image Evaluator", css=DARK_CSS, theme=gr.themes.Base()) as
|
|
| 850 |
|
| 851 |
gr.HTML('''
|
| 852 |
<div style="text-align: center; padding: 30px 0 20px 0; color: #52525b; font-size: 0.85em;">
|
| 853 |
-
Powered by
|
| 854 |
</div>
|
| 855 |
''')
|
| 856 |
|
|
|
|
| 7 |
- Multi-image comparison
|
| 8 |
- Technical metrics (sharpness, colorfulness, contrast, CLIP)
|
| 9 |
|
| 10 |
+
Powered by Qwen3-VL-30B-A3B
|
| 11 |
"""
|
| 12 |
|
| 13 |
import gradio as gr
|
|
|
|
| 708 |
<div style="text-align: center; padding: 20px 0 30px 0;">
|
| 709 |
<h1 style="color: #fafafa; font-size: 2.2em; font-weight: 700; margin: 0;">Image Evaluator</h1>
|
| 710 |
<p style="color: #71717a; font-size: 1em; margin-top: 8px;">
|
| 711 |
+
AI image quality assessment powered by <span style="color: #3b82f6;">Qwen3-VL-30B-A3B</span>
|
| 712 |
</p>
|
| 713 |
</div>
|
| 714 |
''')
|
|
|
|
| 850 |
|
| 851 |
gr.HTML('''
|
| 852 |
<div style="text-align: center; padding: 30px 0 20px 0; color: #52525b; font-size: 0.85em;">
|
| 853 |
+
Powered by Qwen3-VL-30B-A3B | Soft-TIFA | CLIP | LPIPS
|
| 854 |
</div>
|
| 855 |
''')
|
| 856 |
|
evaluator.py
CHANGED
|
@@ -199,19 +199,23 @@ class ImageEvaluator:
|
|
| 199 |
def __init__(self, device: str = "cuda"):
|
| 200 |
"""Initialize evaluator with models."""
|
| 201 |
import torch
|
| 202 |
-
from transformers import
|
| 203 |
|
| 204 |
self.device = device if torch.cuda.is_available() else "cpu"
|
| 205 |
|
| 206 |
-
# Load
|
| 207 |
-
model_name = "Qwen/
|
| 208 |
|
| 209 |
-
self.vlm_model =
|
| 210 |
model_name,
|
| 211 |
device_map="auto",
|
| 212 |
torch_dtype=torch.bfloat16,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 213 |
)
|
| 214 |
-
self.vlm_processor = AutoProcessor.from_pretrained(model_name)
|
| 215 |
|
| 216 |
# Load CLIP for text-image alignment
|
| 217 |
import open_clip
|
|
@@ -825,20 +829,24 @@ class EditEvaluator:
|
|
| 825 |
def __init__(self, device: str = "cuda"):
|
| 826 |
"""Initialize evaluator with models."""
|
| 827 |
import torch
|
| 828 |
-
from transformers import
|
| 829 |
import lpips
|
| 830 |
|
| 831 |
self.device = device if torch.cuda.is_available() else "cpu"
|
| 832 |
|
| 833 |
-
# Load
|
| 834 |
-
model_name = "Qwen/
|
| 835 |
|
| 836 |
-
self.vlm_model =
|
| 837 |
model_name,
|
| 838 |
device_map="auto",
|
| 839 |
torch_dtype=torch.bfloat16,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 840 |
)
|
| 841 |
-
self.vlm_processor = AutoProcessor.from_pretrained(model_name)
|
| 842 |
|
| 843 |
# Load LPIPS
|
| 844 |
self.lpips_model = lpips.LPIPS(net='alex').to(self.device)
|
|
|
|
| 199 |
def __init__(self, device: str = "cuda"):
|
| 200 |
"""Initialize evaluator with models."""
|
| 201 |
import torch
|
| 202 |
+
from transformers import AutoModelForImageTextToText, AutoProcessor
|
| 203 |
|
| 204 |
self.device = device if torch.cuda.is_available() else "cpu"
|
| 205 |
|
| 206 |
+
# Load Qwen3-VL-30B-A3B (MoE with 3B active params)
|
| 207 |
+
model_name = "Qwen/Qwen3-VL-30B-A3B"
|
| 208 |
|
| 209 |
+
self.vlm_model = AutoModelForImageTextToText.from_pretrained(
|
| 210 |
model_name,
|
| 211 |
device_map="auto",
|
| 212 |
torch_dtype=torch.bfloat16,
|
| 213 |
+
trust_remote_code=True,
|
| 214 |
+
)
|
| 215 |
+
self.vlm_processor = AutoProcessor.from_pretrained(
|
| 216 |
+
model_name,
|
| 217 |
+
trust_remote_code=True,
|
| 218 |
)
|
|
|
|
| 219 |
|
| 220 |
# Load CLIP for text-image alignment
|
| 221 |
import open_clip
|
|
|
|
| 829 |
def __init__(self, device: str = "cuda"):
|
| 830 |
"""Initialize evaluator with models."""
|
| 831 |
import torch
|
| 832 |
+
from transformers import AutoModelForImageTextToText, AutoProcessor
|
| 833 |
import lpips
|
| 834 |
|
| 835 |
self.device = device if torch.cuda.is_available() else "cpu"
|
| 836 |
|
| 837 |
+
# Load Qwen3-VL-30B-A3B (MoE with 3B active params)
|
| 838 |
+
model_name = "Qwen/Qwen3-VL-30B-A3B"
|
| 839 |
|
| 840 |
+
self.vlm_model = AutoModelForImageTextToText.from_pretrained(
|
| 841 |
model_name,
|
| 842 |
device_map="auto",
|
| 843 |
torch_dtype=torch.bfloat16,
|
| 844 |
+
trust_remote_code=True,
|
| 845 |
+
)
|
| 846 |
+
self.vlm_processor = AutoProcessor.from_pretrained(
|
| 847 |
+
model_name,
|
| 848 |
+
trust_remote_code=True,
|
| 849 |
)
|
|
|
|
| 850 |
|
| 851 |
# Load LPIPS
|
| 852 |
self.lpips_model = lpips.LPIPS(net='alex').to(self.device)
|
requirements.txt
CHANGED
|
@@ -2,7 +2,7 @@ gradio>=4.0.0
|
|
| 2 |
spaces
|
| 3 |
torch>=2.1.0
|
| 4 |
torchvision>=0.16.0
|
| 5 |
-
transformers>=4.
|
| 6 |
accelerate>=0.25.0
|
| 7 |
qwen-vl-utils>=0.0.8
|
| 8 |
Pillow>=10.0.0
|
|
|
|
| 2 |
spaces
|
| 3 |
torch>=2.1.0
|
| 4 |
torchvision>=0.16.0
|
| 5 |
+
transformers>=4.51.0
|
| 6 |
accelerate>=0.25.0
|
| 7 |
qwen-vl-utils>=0.0.8
|
| 8 |
Pillow>=10.0.0
|