Spaces:

ilkerzgi
/

image-evaluator

Sleeping

App Files Files Community

ilkerzg Claude Opus 4.5 commited on Dec 21, 2025

Commit

6c06e73

unverified ·

1 Parent(s): 995e233

Upgrade to Qwen3-VL-30B-A3B

Browse files

- Update transformers>=4.51.0 for Qwen3-VL support
- Use AutoModelForImageTextToText with trust_remote_code
- Qwen3-VL-30B-A3B: MoE model with only 3B active params
- Update UI to reflect new model

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

Files changed (3) hide show

app.py +3 -3
evaluator.py +18 -10
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -7,7 +7,7 @@ AI image quality assessment using:
 - Multi-image comparison
 - Technical metrics (sharpness, colorfulness, contrast, CLIP)
-Powered by Qwen2.5-VL-7B
 """
 import gradio as gr
@@ -708,7 +708,7 @@ with gr.Blocks(title="Image Evaluator", css=DARK_CSS, theme=gr.themes.Base()) as
     <div style="text-align: center; padding: 20px 0 30px 0;">
         <h1 style="color: #fafafa; font-size: 2.2em; font-weight: 700; margin: 0;">Image Evaluator</h1>
         <p style="color: #71717a; font-size: 1em; margin-top: 8px;">
-            AI image quality assessment powered by <span style="color: #3b82f6;">Qwen2.5-VL-7B</span>
         </p>
     </div>
     ''')
@@ -850,7 +850,7 @@ with gr.Blocks(title="Image Evaluator", css=DARK_CSS, theme=gr.themes.Base()) as
     gr.HTML('''
     <div style="text-align: center; padding: 30px 0 20px 0; color: #52525b; font-size: 0.85em;">
-        Powered by Qwen2.5-VL-7B &nbsp;|&nbsp; Soft-TIFA &nbsp;|&nbsp; CLIP &nbsp;|&nbsp; LPIPS
     </div>
     ''')

 - Multi-image comparison
 - Technical metrics (sharpness, colorfulness, contrast, CLIP)
+Powered by Qwen3-VL-30B-A3B
 """
 import gradio as gr
     <div style="text-align: center; padding: 20px 0 30px 0;">
         <h1 style="color: #fafafa; font-size: 2.2em; font-weight: 700; margin: 0;">Image Evaluator</h1>
         <p style="color: #71717a; font-size: 1em; margin-top: 8px;">
+            AI image quality assessment powered by <span style="color: #3b82f6;">Qwen3-VL-30B-A3B</span>
         </p>
     </div>
     ''')
     gr.HTML('''
     <div style="text-align: center; padding: 30px 0 20px 0; color: #52525b; font-size: 0.85em;">
+        Powered by Qwen3-VL-30B-A3B &nbsp;|&nbsp; Soft-TIFA &nbsp;|&nbsp; CLIP &nbsp;|&nbsp; LPIPS
     </div>
     ''')

evaluator.py CHANGED Viewed

@@ -199,19 +199,23 @@ class ImageEvaluator:
     def __init__(self, device: str = "cuda"):
         """Initialize evaluator with models."""
         import torch
-        from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
         self.device = device if torch.cuda.is_available() else "cpu"
-        # Load Qwen2.5-VL-7B-Instruct
-        model_name = "Qwen/Qwen2.5-VL-7B-Instruct"
-        self.vlm_model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
             model_name,
             device_map="auto",
             torch_dtype=torch.bfloat16,
         )
-        self.vlm_processor = AutoProcessor.from_pretrained(model_name)
         # Load CLIP for text-image alignment
         import open_clip
@@ -825,20 +829,24 @@ class EditEvaluator:
     def __init__(self, device: str = "cuda"):
         """Initialize evaluator with models."""
         import torch
-        from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
         import lpips
         self.device = device if torch.cuda.is_available() else "cpu"
-        # Load Qwen2.5-VL-7B-Instruct
-        model_name = "Qwen/Qwen2.5-VL-7B-Instruct"
-        self.vlm_model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
             model_name,
             device_map="auto",
             torch_dtype=torch.bfloat16,
         )
-        self.vlm_processor = AutoProcessor.from_pretrained(model_name)
         # Load LPIPS
         self.lpips_model = lpips.LPIPS(net='alex').to(self.device)

     def __init__(self, device: str = "cuda"):
         """Initialize evaluator with models."""
         import torch
+        from transformers import AutoModelForImageTextToText, AutoProcessor
         self.device = device if torch.cuda.is_available() else "cpu"
+        # Load Qwen3-VL-30B-A3B (MoE with 3B active params)
+        model_name = "Qwen/Qwen3-VL-30B-A3B"
+        self.vlm_model = AutoModelForImageTextToText.from_pretrained(
             model_name,
             device_map="auto",
             torch_dtype=torch.bfloat16,
+            trust_remote_code=True,
+        )
+        self.vlm_processor = AutoProcessor.from_pretrained(
+            model_name,
+            trust_remote_code=True,
         )
         # Load CLIP for text-image alignment
         import open_clip
     def __init__(self, device: str = "cuda"):
         """Initialize evaluator with models."""
         import torch
+        from transformers import AutoModelForImageTextToText, AutoProcessor
         import lpips
         self.device = device if torch.cuda.is_available() else "cpu"
+        # Load Qwen3-VL-30B-A3B (MoE with 3B active params)
+        model_name = "Qwen/Qwen3-VL-30B-A3B"
+        self.vlm_model = AutoModelForImageTextToText.from_pretrained(
             model_name,
             device_map="auto",
             torch_dtype=torch.bfloat16,
+            trust_remote_code=True,
+        )
+        self.vlm_processor = AutoProcessor.from_pretrained(
+            model_name,
+            trust_remote_code=True,
         )
         # Load LPIPS
         self.lpips_model = lpips.LPIPS(net='alex').to(self.device)

requirements.txt CHANGED Viewed

@@ -2,7 +2,7 @@ gradio>=4.0.0
 spaces
 torch>=2.1.0
 torchvision>=0.16.0
-transformers>=4.45.0
 accelerate>=0.25.0
 qwen-vl-utils>=0.0.8
 Pillow>=10.0.0

 spaces
 torch>=2.1.0
 torchvision>=0.16.0
+transformers>=4.51.0
 accelerate>=0.25.0
 qwen-vl-utils>=0.0.8
 Pillow>=10.0.0