ilkerzg Claude Opus 4.5 commited on
Commit
6c06e73
·
unverified ·
1 Parent(s): 995e233

Upgrade to Qwen3-VL-30B-A3B

Browse files

- Update transformers>=4.51.0 for Qwen3-VL support
- Use AutoModelForImageTextToText with trust_remote_code
- Qwen3-VL-30B-A3B: MoE model with only 3B active params
- Update UI to reflect new model

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

Files changed (3) hide show
  1. app.py +3 -3
  2. evaluator.py +18 -10
  3. requirements.txt +1 -1
app.py CHANGED
@@ -7,7 +7,7 @@ AI image quality assessment using:
7
  - Multi-image comparison
8
  - Technical metrics (sharpness, colorfulness, contrast, CLIP)
9
 
10
- Powered by Qwen2.5-VL-7B
11
  """
12
 
13
  import gradio as gr
@@ -708,7 +708,7 @@ with gr.Blocks(title="Image Evaluator", css=DARK_CSS, theme=gr.themes.Base()) as
708
  <div style="text-align: center; padding: 20px 0 30px 0;">
709
  <h1 style="color: #fafafa; font-size: 2.2em; font-weight: 700; margin: 0;">Image Evaluator</h1>
710
  <p style="color: #71717a; font-size: 1em; margin-top: 8px;">
711
- AI image quality assessment powered by <span style="color: #3b82f6;">Qwen2.5-VL-7B</span>
712
  </p>
713
  </div>
714
  ''')
@@ -850,7 +850,7 @@ with gr.Blocks(title="Image Evaluator", css=DARK_CSS, theme=gr.themes.Base()) as
850
 
851
  gr.HTML('''
852
  <div style="text-align: center; padding: 30px 0 20px 0; color: #52525b; font-size: 0.85em;">
853
- Powered by Qwen2.5-VL-7B &nbsp;|&nbsp; Soft-TIFA &nbsp;|&nbsp; CLIP &nbsp;|&nbsp; LPIPS
854
  </div>
855
  ''')
856
 
 
7
  - Multi-image comparison
8
  - Technical metrics (sharpness, colorfulness, contrast, CLIP)
9
 
10
+ Powered by Qwen3-VL-30B-A3B
11
  """
12
 
13
  import gradio as gr
 
708
  <div style="text-align: center; padding: 20px 0 30px 0;">
709
  <h1 style="color: #fafafa; font-size: 2.2em; font-weight: 700; margin: 0;">Image Evaluator</h1>
710
  <p style="color: #71717a; font-size: 1em; margin-top: 8px;">
711
+ AI image quality assessment powered by <span style="color: #3b82f6;">Qwen3-VL-30B-A3B</span>
712
  </p>
713
  </div>
714
  ''')
 
850
 
851
  gr.HTML('''
852
  <div style="text-align: center; padding: 30px 0 20px 0; color: #52525b; font-size: 0.85em;">
853
+ Powered by Qwen3-VL-30B-A3B &nbsp;|&nbsp; Soft-TIFA &nbsp;|&nbsp; CLIP &nbsp;|&nbsp; LPIPS
854
  </div>
855
  ''')
856
 
evaluator.py CHANGED
@@ -199,19 +199,23 @@ class ImageEvaluator:
199
  def __init__(self, device: str = "cuda"):
200
  """Initialize evaluator with models."""
201
  import torch
202
- from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
203
 
204
  self.device = device if torch.cuda.is_available() else "cpu"
205
 
206
- # Load Qwen2.5-VL-7B-Instruct
207
- model_name = "Qwen/Qwen2.5-VL-7B-Instruct"
208
 
209
- self.vlm_model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
210
  model_name,
211
  device_map="auto",
212
  torch_dtype=torch.bfloat16,
 
 
 
 
 
213
  )
214
- self.vlm_processor = AutoProcessor.from_pretrained(model_name)
215
 
216
  # Load CLIP for text-image alignment
217
  import open_clip
@@ -825,20 +829,24 @@ class EditEvaluator:
825
  def __init__(self, device: str = "cuda"):
826
  """Initialize evaluator with models."""
827
  import torch
828
- from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
829
  import lpips
830
 
831
  self.device = device if torch.cuda.is_available() else "cpu"
832
 
833
- # Load Qwen2.5-VL-7B-Instruct
834
- model_name = "Qwen/Qwen2.5-VL-7B-Instruct"
835
 
836
- self.vlm_model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
837
  model_name,
838
  device_map="auto",
839
  torch_dtype=torch.bfloat16,
 
 
 
 
 
840
  )
841
- self.vlm_processor = AutoProcessor.from_pretrained(model_name)
842
 
843
  # Load LPIPS
844
  self.lpips_model = lpips.LPIPS(net='alex').to(self.device)
 
199
  def __init__(self, device: str = "cuda"):
200
  """Initialize evaluator with models."""
201
  import torch
202
+ from transformers import AutoModelForImageTextToText, AutoProcessor
203
 
204
  self.device = device if torch.cuda.is_available() else "cpu"
205
 
206
+ # Load Qwen3-VL-30B-A3B (MoE with 3B active params)
207
+ model_name = "Qwen/Qwen3-VL-30B-A3B"
208
 
209
+ self.vlm_model = AutoModelForImageTextToText.from_pretrained(
210
  model_name,
211
  device_map="auto",
212
  torch_dtype=torch.bfloat16,
213
+ trust_remote_code=True,
214
+ )
215
+ self.vlm_processor = AutoProcessor.from_pretrained(
216
+ model_name,
217
+ trust_remote_code=True,
218
  )
 
219
 
220
  # Load CLIP for text-image alignment
221
  import open_clip
 
829
  def __init__(self, device: str = "cuda"):
830
  """Initialize evaluator with models."""
831
  import torch
832
+ from transformers import AutoModelForImageTextToText, AutoProcessor
833
  import lpips
834
 
835
  self.device = device if torch.cuda.is_available() else "cpu"
836
 
837
+ # Load Qwen3-VL-30B-A3B (MoE with 3B active params)
838
+ model_name = "Qwen/Qwen3-VL-30B-A3B"
839
 
840
+ self.vlm_model = AutoModelForImageTextToText.from_pretrained(
841
  model_name,
842
  device_map="auto",
843
  torch_dtype=torch.bfloat16,
844
+ trust_remote_code=True,
845
+ )
846
+ self.vlm_processor = AutoProcessor.from_pretrained(
847
+ model_name,
848
+ trust_remote_code=True,
849
  )
 
850
 
851
  # Load LPIPS
852
  self.lpips_model = lpips.LPIPS(net='alex').to(self.device)
requirements.txt CHANGED
@@ -2,7 +2,7 @@ gradio>=4.0.0
2
  spaces
3
  torch>=2.1.0
4
  torchvision>=0.16.0
5
- transformers>=4.45.0
6
  accelerate>=0.25.0
7
  qwen-vl-utils>=0.0.8
8
  Pillow>=10.0.0
 
2
  spaces
3
  torch>=2.1.0
4
  torchvision>=0.16.0
5
+ transformers>=4.51.0
6
  accelerate>=0.25.0
7
  qwen-vl-utils>=0.0.8
8
  Pillow>=10.0.0