Spaces:

VOIDER
/

VisualQuality-R1-7B

Build error

App Files Files Community

VOIDER commited on Jan 8

Commit

9520f85

verified ·

1 Parent(s): c37360a

Update app.py

Browse files

Files changed (1) hide show

app.py +326 -178

app.py CHANGED Viewed

@@ -1,13 +1,21 @@
 import gradio as gr
-import torch
-from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor, BitsAndBytesConfig, TextIteratorStreamer
-from qwen_vl_utils import process_vision_info
-from threading import Thread
 import re
-import spaces
 # Константы
-MODEL_PATH = "TianheWu/VisualQuality-R1-7B"
 # Промпты
 PROMPT = (
@@ -19,40 +27,85 @@ PROMPT = (
 QUESTION_TEMPLATE_THINKING = "{Question} First output the thinking process in <think> </think> tags and then output the final answer with only one score in <answer> </answer> tags."
 QUESTION_TEMPLATE_NO_THINKING = "{Question} Please only output the final answer with only one score in <answer> </answer> tags."
-# Глобальные переменные для модели
-model = None
-processor = None
 def load_model():
-    """Загрузка модели с 8-bit квантизацией"""
-    global model, processor
-    if model is not None:
         return
-    print("Loading model...")
-    quantization_config = BitsAndBytesConfig(
-        load_in_8bit=True,
-        llm_int8_threshold=6.0,
-    )
-    model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
-        MODEL_PATH,
-        quantization_config=quantization_config,
-        device_map="auto",
-        trust_remote_code=True,
-        torch_dtype=torch.float16,
     )
-    model.eval()
-    processor = AutoProcessor.from_pretrained(MODEL_PATH, trust_remote_code=True)
-    processor.tokenizer.padding_side = "left"
     print("Model loaded successfully!")
 def extract_score(text):
     """Извлечение оценки из текста"""
     try:
@@ -65,25 +118,23 @@ def extract_score(text):
         if score_match:
             score = float(score_match.group())
             return min(max(score, 1.0), 5.0)
-    except Exception as e:
-        print(f"Error extracting score: {e}")
     return None
 def extract_thinking(text):
-    """Извлечение процесса мышления из текста"""
     thinking_matches = re.findall(r'<think>(.*?)</think>', text, re.DOTALL)
     if thinking_matches:
         return thinking_matches[-1].strip()
-    return None
-@spaces.GPU(duration=180)
-def score_image_streaming(image, use_thinking=True):
-    """Оценка качества изображения со стримингом"""
-    global model, processor
-    # Загрузка модели при первом вызове
     load_model()
     if image is None:
@@ -91,189 +142,287 @@ def score_image_streaming(image, use_thinking=True):
         return
     # Выбор шаблона
-    if use_thinking:
-        question_template = QUESTION_TEMPLATE_THINKING
-    else:
-        question_template = QUESTION_TEMPLATE_NO_THINKING
-    # Формирование сообщения
-    message = [
         {
             "role": "user",
             "content": [
-                {'type': 'image', 'image': image},
-                {"type": "text", "text": question_template.format(Question=PROMPT)}
-            ],
         }
     ]
-    batch_messages = [message]
-    # Подготовка входных данных
-    text = [processor.apply_chat_template(
-        msg, tokenize=False, add_generation_prompt=True, add_vision_id=True
-    ) for msg in batch_messages]
-    image_inputs, video_inputs = process_vision_info(batch_messages)
-    inputs = processor(
-        text=text,
-        images=image_inputs,
-        videos=video_inputs,
-        padding=True,
-        return_tensors="pt",
-    )
-    inputs = inputs.to(model.device)
-    # Настройка стриминга
-    streamer = TextIteratorStreamer(
-        processor.tokenizer,
-        skip_prompt=True,
-        skip_special_tokens=True
-    )
-    generation_kwargs = dict(
-        **inputs,
-        streamer=streamer,
-        max_new_tokens=2048 if use_thinking else 256,
-        do_sample=True,
-        top_k=50,
-        top_p=0.95,
-        temperature=0.7,
-        use_cache=True,
-    )
-    # Запуск генерации в отдельном потоке
-    thread = Thread(target=model.generate, kwargs=generation_kwargs)
-    thread.start()
-    # Стриминг вывода
-    generated_text = ""
-    current_thinking = ""
-    current_score = "*Analyzing...*"
-    for new_text in streamer:
-        generated_text += new_text
-        # Извлечение мышления (если есть)
-        thinking = extract_thinking(generated_text)
-        if thinking:
-            current_thinking = thinking
-        # Извлечение оценки
-        score = extract_score(generated_text)
-        if score is not None:
-            current_score = f"⭐ **Quality Score: {score:.2f} / 5.00**"
-        yield generated_text, current_thinking, current_score
-    thread.join()
-    # Финальное извлечение
-    final_score = extract_score(generated_text)
-    final_thinking = extract_thinking(generated_text) if use_thinking else ""
-    if final_score is not None:
-        score_display = f"⭐ **Quality Score: {final_score:.2f} / 5.00**\n\n📊 **For Leaderboard:** `{final_score:.2f}`"
-    else:
-        score_display = "❌ Could not extract score. Please try again."
-    yield generated_text, final_thinking or "", score_display
 def create_interface():
     """Создание интерфейса Gradio"""
-    # Убрали theme из gr.Blocks() - теперь он передаётся в launch()
-    with gr.Blocks(
-        title="VisualQuality-R1: Image Quality Assessment",
-    ) as demo:
         gr.Markdown("""
-        # 🎨 VisualQuality-R1: Image Quality Assessment
-        **Reasoning-Induced Image Quality Assessment via Reinforcement Learning to Rank**
-        Upload an image to get a quality score (1-5) with detailed reasoning.
         [![Paper](https://img.shields.io/badge/arXiv-Paper-red)](https://arxiv.org/abs/2505.14460)
         [![Model](https://img.shields.io/badge/🤗-Model-yellow)](https://huggingface.co/TianheWu/VisualQuality-R1-7B)
         """)
-        with gr.Row():
-            with gr.Column(scale=1):
-                image_input = gr.Image(
-                    label="📷 Upload Image",
-                    type="pil",
-                    height=400
-                )
-                thinking_checkbox = gr.Checkbox(
-                    label="🧠 Enable Thinking Mode (detailed reasoning)",
-                    value=True
-                )
-                submit_btn = gr.Button(
-                    "🔍 Analyze Image Quality",
-                    variant="primary",
-                    size="lg"
                 )
                 gr.Markdown("""
-                ### 📖 Instructions:
-                1. Upload an image
-                2. Enable/disable thinking mode
-                3. Click "Analyze Image Quality"
-                4. Wait for the score and reasoning
-                ### 📊 Score Scale:
-                - **1.0**: Very poor quality
-                - **2.0**: Poor quality
-                - **3.0**: Fair quality
-                - **4.0**: Good quality
-                - **5.0**: Excellent quality
                 """)
-            with gr.Column(scale=1):
-                score_output = gr.Markdown(
-                    label="Quality Score",
-                    value="*Upload an image to see the score*"
-                )
-                thinking_output = gr.Textbox(
-                    label="🧠 Thinking Process",
-                    lines=8,
-                    max_lines=15,
-                    placeholder="Reasoning will appear here when thinking mode is enabled...",
-                    interactive=False
-                )
-                raw_output = gr.Textbox(
-                    label="📝 Full Model Output",
-                    lines=10,
-                    max_lines=20,
-                    placeholder="Full model response will appear here...",
-                    interactive=False
                 )
-        # Обработка события
-        submit_btn.click(
-            fn=score_image_streaming,
-            inputs=[image_input, thinking_checkbox],
-            outputs=[raw_output, thinking_output, score_output],
-        )
         gr.Markdown("""
         ---
-        ### 📚 Citation
-        ```bibtex
-        @article{wu2025visualquality,
-          title={{VisualQuality-R1}: Reasoning-Induced Image Quality Assessment via Reinforcement Learning to Rank},
-          author={Wu, Tianhe and Zou, Jian and Liang, Jie and Zhang, Lei and Ma, Kede},
-          journal={arXiv preprint arXiv:2505.14460},
-          year={2025}
-        }
-        ```
         """)
     return demo
@@ -281,9 +430,8 @@ def create_interface():
 if __name__ == "__main__":
     demo = create_interface()
-    demo.queue(max_size=10)
-    # Добавлены параметры для Gradio 6.0
     demo.launch(
-        ssr_mode=False,  # Отключаем SSR для стабильности
         show_error=True,
     )

 import gradio as gr
+import os
 import re
+import json
+import tempfile
+import zipfile
+from pathlib import Path
+from huggingface_hub import hf_hub_download
+from llama_cpp import Llama
+from llama_cpp.llama_chat_format import Qwen2VLChatHandler
+import base64
+from PIL import Image
+from io import BytesIO
 # Константы
+REPO_ID = "mradermacher/VisualQuality-R1-7B-GGUF"
+MODEL_FILE = "VisualQuality-R1-7B.Q4_K_M.gguf"  # 4.68 GB - баланс качества/размера
+MMPROJ_FILE = "VisualQuality-R1-7B.mmproj-Q8_0.gguf"  # 853 MB
 # Промпты
 PROMPT = (
 QUESTION_TEMPLATE_THINKING = "{Question} First output the thinking process in <think> </think> tags and then output the final answer with only one score in <answer> </answer> tags."
 QUESTION_TEMPLATE_NO_THINKING = "{Question} Please only output the final answer with only one score in <answer> </answer> tags."
+# Глобальные переменные
+llm = None
+chat_handler = None
+def download_models():
+    """Скачивание моделей из HuggingFace"""
+    print("Downloading model files...")
+    model_path = hf_hub_download(
+        repo_id=REPO_ID,
+        filename=MODEL_FILE,
+        resume_download=True,
+    )
+    print(f"Model downloaded: {model_path}")
+    mmproj_path = hf_hub_download(
+        repo_id=REPO_ID,
+        filename=MMPROJ_FILE,
+        resume_download=True,
+    )
+    print(f"MMProj downloaded: {mmproj_path}")
+    return model_path, mmproj_path
 def load_model():
+    """Загрузка модели"""
+    global llm, chat_handler
+    if llm is not None:
         return
+    model_path, mmproj_path = download_models()
+    print("Loading model into memory...")
+    # Создаём chat handler для Qwen2-VL
+    chat_handler = Qwen2VLChatHandler(
+        clip_model_path=mmproj_path,
+        verbose=False
     )
+    # Загружаем основную модель
+    llm = Llama(
+        model_path=model_path,
+        chat_handler=chat_handler,
+        n_ctx=4096,  # Контекст
+        n_threads=4,  # Потоки CPU
+        n_gpu_layers=0,  # CPU only
+        verbose=False,
+    )
     print("Model loaded successfully!")
+def image_to_base64_uri(image):
+    """Конвертация PIL Image в data URI"""
+    if image is None:
+        return None
+    # Конвертируем в RGB если нужно
+    if image.mode != "RGB":
+        image = image.convert("RGB")
+    # Сжимаем для ускорения
+    max_size = 1024
+    if max(image.size) > max_size:
+        ratio = max_size / max(image.size)
+        new_size = (int(image.size[0] * ratio), int(image.size[1] * ratio))
+        image = image.resize(new_size, Image.LANCZOS)
+    buffered = BytesIO()
+    image.save(buffered, format="JPEG", quality=85)
+    img_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
+    return f"data:image/jpeg;base64,{img_base64}"
 def extract_score(text):
     """Извлечение оценки из текста"""
     try:
         if score_match:
             score = float(score_match.group())
             return min(max(score, 1.0), 5.0)
+    except:
+        pass
     return None
 def extract_thinking(text):
+    """Извлечение процесса мышления"""
     thinking_matches = re.findall(r'<think>(.*?)</think>', text, re.DOTALL)
     if thinking_matches:
         return thinking_matches[-1].strip()
+    return ""
+def score_single_image(image, use_thinking=True):
+    """Оценка одного изображения"""
+    global llm
     load_model()
     if image is None:
         return
     # Выбор шаблона
+    template = QUESTION_TEMPLATE_THINKING if use_thinking else QUESTION_TEMPLATE_NO_THINKING
+    prompt_text = template.format(Question=PROMPT)
+    # Конвертируем изображение
+    image_uri = image_to_base64_uri(image)
+    # Формируем сообщение
+    messages = [
         {
             "role": "user",
             "content": [
+                {"type": "image_url", "image_url": {"url": image_uri}},
+                {"type": "text", "text": prompt_text}
+            ]
         }
     ]
+    # Генерация со стримингом
+    generated_text = ""
+    yield "⏳ Processing...", "", "*Analyzing image...*"
+    try:
+        response = llm.create_chat_completion(
+            messages=messages,
+            max_tokens=2048 if use_thinking else 256,
+            temperature=0.7,
+            top_p=0.95,
+            stream=True,
+        )
+        for chunk in response:
+            delta = chunk.get("choices", [{}])[0].get("delta", {})
+            content = delta.get("content", "")
+            if content:
+                generated_text += content
+                thinking = extract_thinking(generated_text)
+                score = extract_score(generated_text)
+                if score is not None:
+                    score_display = f"⭐ **Quality Score: {score:.2f} / 5.00**"
+                else:
+                    score_display = "*Analyzing...*"
+                yield generated_text, thinking, score_display
+        # Финальный результат
+        final_score = extract_score(generated_text)
+        final_thinking = extract_thinking(generated_text) if use_thinking else ""
+        if final_score is not None:
+            score_display = f"⭐ **Quality Score: {final_score:.2f} / 5.00**\n\n📊 **For Leaderboard:** `{final_score:.2f}`"
+        else:
+            score_display = "❌ Could not extract score. Please try again."
+        yield generated_text, final_thinking, score_display
+    except Exception as e:
+        yield f"❌ Error: {str(e)}", "", ""
+def process_batch(files, use_thinking=True, progress=gr.Progress()):
+    """Обработка пакета изображений"""
+    global llm
+    load_model()
+    if not files:
+        return "❌ No files uploaded", None
+    results = []
+    template = QUESTION_TEMPLATE_THINKING if use_thinking else QUESTION_TEMPLATE_NO_THINKING
+    prompt_text = template.format(Question=PROMPT)
+    progress(0, desc="Starting batch processing...")
+    for i, file in enumerate(files):
+        try:
+            # Загружаем изображение
+            if hasattr(file, 'name'):
+                image = Image.open(file.name)
+                filename = os.path.basename(file.name)
+            else:
+                image = Image.open(file)
+                filename = f"image_{i+1}.jpg"
+            image_uri = image_to_base64_uri(image)
+            messages = [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "image_url", "image_url": {"url": image_uri}},
+                        {"type": "text", "text": prompt_text}
+                    ]
+                }
+            ]
+            # Генерация
+            response = llm.create_chat_completion(
+                messages=messages,
+                max_tokens=2048 if use_thinking else 256,
+                temperature=0.7,
+                top_p=0.95,
+            )
+            generated_text = response["choices"][0]["message"]["content"]
+            score = extract_score(generated_text)
+            thinking = extract_thinking(generated_text) if use_thinking else ""
+            results.append({
+                "filename": filename,
+                "score": score if score else "N/A",
+                "thinking": thinking,
+                "raw_output": generated_text
+            })
+            progress((i + 1) / len(files), desc=f"Processed {i+1}/{len(files)}: {filename}")
+        except Exception as e:
+            results.append({
+                "filename": filename if 'filename' in dir() else f"image_{i+1}",
+                "score": "ERROR",
+                "thinking": "",
+                "raw_output": str(e)
+            })
+    # Создаём файлы результатов
+    with tempfile.TemporaryDirectory() as tmpdir:
+        # Текстовый файл для лидерборда
+        leaderboard_file = os.path.join(tmpdir, "leaderboard_scores.txt")
+        with open(leaderboard_file, "w") as f:
+            for r in results:
+                score_str = f"{r['score']:.2f}" if isinstance(r['score'], float) else r['score']
+                f.write(f"{r['filename']}\t{score_str}\n")
+        # JSON с полными результатами
+        json_file = os.path.join(tmpdir, "full_results.json")
+        with open(json_file, "w") as f:
+            json.dump(results, f, indent=2, ensure_ascii=False)
+        # CSV файл
+        csv_file = os.path.join(tmpdir, "scores.csv")
+        with open(csv_file, "w") as f:
+            f.write("filename,score\n")
+            for r in results:
+                score_str = f"{r['score']:.2f}" if isinstance(r['score'], float) else r['score']
+                f.write(f"{r['filename']},{score_str}\n")
+        # Создаём ZIP архив
+        zip_path = os.path.join(tmpdir, "results.zip")
+        with zipfile.ZipFile(zip_path, 'w') as zipf:
+            zipf.write(leaderboard_file, "leaderboard_scores.txt")
+            zipf.write(json_file, "full_results.json")
+            zipf.write(csv_file, "scores.csv")
+        # Копируем ZIP во временную папку, которая не удалится
+        final_zip = tempfile.NamedTemporaryFile(delete=False, suffix=".zip")
+        with open(zip_path, 'rb') as f:
+            final_zip.write(f.read())
+        final_zip.close()
+    # Формируем summary
+    valid_scores = [r['score'] for r in results if isinstance(r['score'], float)]
+    summary = f"""
+## ✅ Batch Processing Complete!
+**Processed:** {len(results)} images
+**Successful:** {len(valid_scores)} images
+**Failed:** {len(results) - len(valid_scores)} images
+### Statistics:
+- **Average Score:** {sum(valid_scores)/len(valid_scores):.2f} (if valid scores exist)
+- **Min Score:** {min(valid_scores):.2f if valid_scores else 'N/A'}
+- **Max Score:** {max(valid_scores):.2f if valid_scores else 'N/A'}
+### Preview (first 10):
+| Filename | Score |
+|----------|-------|
+""" + "\n".join([f"| {r['filename']} | {r['score']:.2f if isinstance(r['score'], float) else r['score']} |" for r in results[:10]])
+    return summary, final_zip.name
 def create_interface():
     """Создание интерфейса Gradio"""
+    with gr.Blocks(title="VisualQuality-R1 GGUF") as demo:
         gr.Markdown("""
+        # 🎨 VisualQuality-R1: Image Quality Assessment (GGUF/CPU)
+        **Reasoning-Induced Image Quality Assessment** | Running on CPU with GGUF quantization
         [![Paper](https://img.shields.io/badge/arXiv-Paper-red)](https://arxiv.org/abs/2505.14460)
         [![Model](https://img.shields.io/badge/🤗-Model-yellow)](https://huggingface.co/TianheWu/VisualQuality-R1-7B)
+        > ⚠️ **CPU Mode**: Processing is slower but works without GPU. ~30-60 sec per image.
         """)
+        with gr.Tabs():
+            # Вкладка для одного изображения
+            with gr.TabItem("📷 Single Image"):
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        image_input = gr.Image(
+                            label="Upload Image",
+                            type="pil",
+                            height=350
+                        )
+                        thinking_checkbox = gr.Checkbox(
+                            label="🧠 Enable Thinking Mode",
+                            value=True
+                        )
+                        submit_btn = gr.Button(
+                            "🔍 Analyze Quality",
+                            variant="primary",
+                            size="lg"
+                        )
+                    with gr.Column(scale=1):
+                        score_output = gr.Markdown(value="*Upload an image to see the score*")
+                        thinking_output = gr.Textbox(label="🧠 Thinking", lines=6, interactive=False)
+                        raw_output = gr.Textbox(label="📝 Full Output", lines=8, interactive=False)
+                submit_btn.click(
+                    fn=score_single_image,
+                    inputs=[image_input, thinking_checkbox],
+                    outputs=[raw_output, thinking_output, score_output],
                 )
+            # Вкладка для batch processing
+            with gr.TabItem("📁 Batch Processing (1000+ images)"):
                 gr.Markdown("""
+                ### Batch Processing for Leaderboard
+                Upload multiple images (ZIP or individual files) to process them all at once.
+                Results will be saved in a format ready for leaderboard submission.
                 """)
+                with gr.Row():
+                    with gr.Column():
+                        batch_files = gr.File(
+                            label="Upload Images",
+                            file_count="multiple",
+                            file_types=["image"],
+                        )
+                        batch_thinking = gr.Checkbox(
+                            label="🧠 Enable Thinking Mode (slower but more detailed)",
+                            value=False  # По умолчанию выключено для скорости
+                        )
+                        batch_btn = gr.Button(
+                            "🚀 Process All Images",
+                            variant="primary",
+                            size="lg"
+                        )
+                    with gr.Column():
+                        batch_summary = gr.Markdown(value="*Upload images and click Process*")
+                        batch_download = gr.File(label="📥 Download Results")
+                batch_btn.click(
+                    fn=process_batch,
+                    inputs=[batch_files, batch_thinking],
+                    outputs=[batch_summary, batch_download],
                 )
         gr.Markdown("""
         ---
+        ### 📊 Score Scale
+        | Score | Quality |
+        |-------|---------|
+        | 1.0 | Very poor |
+        | 2.0 | Poor |
+        | 3.0 | Fair |
+        | 4.0 | Good |
+        | 5.0 | Excellent |
         """)
     return demo
 if __name__ == "__main__":
     demo = create_interface()
+    demo.queue(max_size=5)
     demo.launch(
         show_error=True,
+        ssr_mode=False,
     )