Spaces:

VOIDER
/

VisualQuality-R1-7B

Build error

App Files Files Community

VOIDER commited on Jan 7

Commit

1be11fc

verified ·

1 Parent(s): b161f01

Update app.py

Browse files

Files changed (1) hide show

app.py +104 -59

app.py CHANGED Viewed

@@ -2,27 +2,19 @@ import os
 import sys
 import subprocess
-# --- УСТАНОВКА LLAMA-CPP-PYTHON (Runtime) ---
-# Устанавливаем версию с поддержкой Vision (CPU)
 try:
-    from llama_cpp import Llama
-    from llama_cpp.llama_chat_format import Qwen2VLChatHandler
-    print("Библиотека llama-cpp-python и Qwen2VLChatHandler найдены.")
 except ImportError:
-    print("Установка свежей версии llama-cpp-python...")
     subprocess.check_call([
         sys.executable, "-m", "pip", "install",
-        "llama-cpp-python>=0.3.2",
         "--extra-index-url", "https://abetlen.github.io/llama-cpp-python/whl/cpu"
     ])
-    print("Установка завершена! Импортируем...")
-    from llama_cpp import Llama
-    # Пытаемся импортировать хендлер после установки
-    try:
-        from llama_cpp.llama_chat_format import Qwen2VLChatHandler
-    except ImportError:
-        print("ВАЖНО: Qwen2VLChatHandler не найден. Возможно, версия библиотеки старая.")
-        Qwen2VLChatHandler = None
 import gradio as gr
 from huggingface_hub import hf_hub_download
@@ -31,45 +23,96 @@ import io
 import re
 from PIL import Image
-# Настройки модели
 REPO_ID = "mradermacher/VisualQuality-R1-7B-GGUF"
 MODEL_FILENAME = "VisualQuality-R1-7B.Q8_0.gguf"
 llm = None
 def load_model():
     global llm
     if llm is None:
         print(f"Загрузка модели {MODEL_FILENAME}...")
-        model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME)
-        # Настраиваем обработчик диалога СПЕЦИАЛЬНО для Qwen2-VL
-        # Это решает проблему "Invalid chat handler" и ошибки с токенами
-        chat_handler = None
-        if Qwen2VLChatHandler:
-            print("Активация режима Qwen2-VL Vision...")
-            # Передаем путь к модели как clip_model_path, так как в unified GGUF
-            # визуальный энкодер находится внутри основного файла
-            chat_handler = Qwen2VLChatHandler(clip_model_path=model_path)
-        llm = Llama(
-            model_path=model_path,
-            n_ctx=12288,          # Контекст (картинки занимают много токенов)
-            n_gpu_layers=0,       # CPU
-            verbose=True,
-            chat_handler=chat_handler, # Подключаем ручной обработчик
-            n_batch=512           # Размер батча для CPU
-        )
-        print("Модель успешно загружена!")
     return llm
 def process_image(image):
-    # Ресайз обязателен для Qwen2-VL на CPU, иначе вылетит контекст 32k+
-    max_size = 1024
-    if max(image.size) > max_size:
-        ratio = max_size / max(image.size)
-        new_size = (int(image.size[0] * ratio), int(image.size[1] * ratio))
-        image = image.resize(new_size, Image.Resampling.LANCZOS)
     buffered = io.BytesIO()
     image = image.convert("RGB")
@@ -81,15 +124,15 @@ def evaluate_image(image, progress=gr.Progress()):
         return "Пожалуйста, загрузите изображение.", ""
     try:
-        progress(0.1, desc="Инициализация модели...")
         model = load_model()
-        progress(0.3, desc="Обработка изображения...")
-        base64_image = process_image(image)
-        image_url = f"data:image/jpeg;base64,{base64_image}"
         system_prompt = "You are doing the image quality assessment task."
-        user_prompt_text = (
             "What is your overall rating on the quality of this picture? "
             "The rating should be a float between 1 and 5, rounded to two decimal places, "
             "with 1 representing very poor quality and 5 representing excellent quality. "
@@ -101,15 +144,16 @@ def evaluate_image(image, progress=gr.Progress()):
             {
                 "role": "user",
                 "content": [
-                    {"type": "image_url", "image_url": {"url": image_url}},
-                    {"type": "text", "text": user_prompt_text}
                 ]
             }
         ]
         full_response = ""
-        print("Генерация ответа...")
         stream = model.create_chat_completion(
             messages=messages,
             max_tokens=1024,
@@ -123,22 +167,23 @@ def evaluate_image(image, progress=gr.Progress()):
                 if "content" in delta and delta["content"]:
                     content = delta["content"]
                     full_response += content
-                    yield full_response, "Вычисляется..."
-        # Парсинг оценки
         score_match = re.search(r'<answer>\s*([\d\.]+)\s*</answer>', full_response)
-        final_score = score_match.group(1) if score_match else "Не найдено"
         yield full_response, final_score
     except Exception as e:
-        error_msg = f"Ошибка: {str(e)}"
-        print(error_msg)
-        yield error_msg, "Error"
-with gr.Blocks(title="VisualQuality-R1") as demo:
     gr.Markdown("# 👁️ VisualQuality-R1 (Qwen2-VL)")
-    gr.Markdown("Оценка качества изображений. Запуск на CPU (может быть медленно).")
     with gr.Row():
         with gr.Column():
@@ -147,7 +192,7 @@ with gr.Blocks(title="VisualQuality-R1") as demo:
         with gr.Column():
             output_score = gr.Label(label="Оценка")
-            output_text = gr.Textbox(label="CoT (Мысли модели)", lines=15)
     run_btn.click(evaluate_image, inputs=[input_img], outputs=[output_text, output_score])

 import sys
 import subprocess
+# --- ПРОВЕРКА И УСТАНОВКА БИБЛИОТЕКИ ---
 try:
+    from llama_cpp import Llama, LlamaChatCompletionHandler
+    print("Библиотека llama-cpp-python найдена.")
 except ImportError:
+    print("Установка llama-cpp-python (CPU)...")
+    # Принудительно ставим 0.3.16 или новее с поддержкой CPU
     subprocess.check_call([
         sys.executable, "-m", "pip", "install",
+        "llama-cpp-python>=0.3.16",
         "--extra-index-url", "https://abetlen.github.io/llama-cpp-python/whl/cpu"
     ])
+    from llama_cpp import Llama, LlamaChatCompletionHandler
 import gradio as gr
 from huggingface_hub import hf_hub_download
 import re
 from PIL import Image
+# Конфигурация
 REPO_ID = "mradermacher/VisualQuality-R1-7B-GGUF"
 MODEL_FILENAME = "VisualQuality-R1-7B.Q8_0.gguf"
+# === ГЛАВНЫЙ ФИКС: СВОЙ ОБРАБОТЧИК ДЛЯ QWEN2-VL ===
+# Мы не зависим от встроенных классов, а пишем свой.
+class CustomQwen2VLHandler(LlamaChatCompletionHandler):
+    def __init__(self, clip_model_path=None, verbose=False):
+        self.clip_model_path = clip_model_path
+        self.verbose = verbose
+    def __call__(self, llama: Llama, messages, functions=None, function_call=None, tools=None, tool_choice=None, **kwargs):
+        # 1. Формируем промпт вручную с правильными тегами
+        prompt = ""
+        images = []
+        for message in messages:
+            role = message["role"]
+            content = message["content"]
+            # Начало сообщения
+            prompt += f"<|im_start|>{role}\n"
+            if isinstance(content, str):
+                prompt += content
+            elif isinstance(content, list):
+                for part in content:
+                    if part["type"] == "text":
+                        prompt += part["text"]
+                    elif part["type"] == "image_url":
+                        # Теги для Qwen2-VL: Vision Start -> Pad -> Vision End
+                        prompt += "<|vision_start|><|image_pad|><|vision_end|>"
+                        # Извлекаем байты из base64 для передачи в C++ слой
+                        try:
+                            image_url = part["image_url"]["url"]
+                            if "base64," in image_url:
+                                base64_data = image_url.split("base64,")[1]
+                                image_bytes = base64.b64decode(base64_data)
+                                images.append(image_bytes)
+                        except Exception as e:
+                            print(f"Ошибка декодирования картинки: {e}")
+            # Конец сообщения
+            prompt += "<|im_end|>\n"
+        # Добавляем триггер для ответа ассистента
+        prompt += "<|im_start|>assistant\n"
+        if self.verbose:
+            print(f"=== SENDED PROMPT ({len(prompt)} chars) ===")
+            print(prompt[:200] + "..." if len(prompt) > 200 else prompt)
+            print(f"=== IMAGES: {len(images)} ===")
+        # Возвращаем кортеж (prompt, images), который понимает llama.cpp
+        return prompt, images
 llm = None
 def load_model():
     global llm
     if llm is None:
         print(f"Загрузка модели {MODEL_FILENAME}...")
+        try:
+            model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME)
+            # Инициализируем НАШ кастомный хендлер
+            # clip_model_path указываем на тот же файл (так как это GGUF all-in-one)
+            chat_handler = CustomQwen2VLHandler(clip_model_path=model_path, verbose=True)
+            llm = Llama(
+                model_path=model_path,
+                n_ctx=8192,           # Контекст (картинки большие, нужно место)
+                n_gpu_layers=0,       # CPU
+                verbose=True,
+                chat_handler=chat_handler, # <-- ВАЖНО: Используем наш класс
+                n_batch=512,
+                logits_all=True
+            )
+            print("Модель успешно загружена с CustomQwen2VLHandler!")
+        except Exception as e:
+            print(f"Ошибка загрузки: {e}")
+            raise e
     return llm
 def process_image(image):
+    # Ресайз до 1024px макс, чтобы не перегружать CPU память и контекст
+    max_dim = 1024
+    if max(image.size) > max_dim:
+        image.thumbnail((max_dim, max_dim), Image.Resampling.LANCZOS)
     buffered = io.BytesIO()
     image = image.convert("RGB")
         return "Пожалуйста, загрузите изображение.", ""
     try:
+        progress(0.1, desc="Загрузка модели...")
         model = load_model()
+        progress(0.2, desc="Обработка...")
+        base64_img = process_image(image)
+        img_url = f"data:image/jpeg;base64,{base64_img}"
         system_prompt = "You are doing the image quality assessment task."
+        user_prompt = (
             "What is your overall rating on the quality of this picture? "
             "The rating should be a float between 1 and 5, rounded to two decimal places, "
             "with 1 representing very poor quality and 5 representing excellent quality. "
             {
                 "role": "user",
                 "content": [
+                    {"type": "image_url", "image_url": {"url": img_url}},
+                    {"type": "text", "text": user_prompt}
                 ]
             }
         ]
         full_response = ""
+        print("Начинаю генерацию...")
+        # Запуск стриминга
         stream = model.create_chat_completion(
             messages=messages,
             max_tokens=1024,
                 if "content" in delta and delta["content"]:
                     content = delta["content"]
                     full_response += content
+                    yield full_response, "Думаю..."
+        # Поиск оценки
         score_match = re.search(r'<answer>\s*([\d\.]+)\s*</answer>', full_response)
+        final_score = score_match.group(1) if score_match else "Оценка не найдена"
         yield full_response, final_score
     except Exception as e:
+        err_msg = f"Произошла ошибка: {str(e)}"
+        print(err_msg)
+        yield err_msg, "Error"
+# Интерфейс
+with gr.Blocks(title="VisualQuality-R1 (Custom Handler)") as demo:
     gr.Markdown("# 👁️ VisualQuality-R1 (Qwen2-VL)")
+    gr.Markdown("Оценка качества изображений на CPU с кастомным обработчиком.")
     with gr.Row():
         with gr.Column():
         with gr.Column():
             output_score = gr.Label(label="Оценка")
+            output_text = gr.Textbox(label="CoT (Рассуждения)", lines=15)
     run_btn.click(evaluate_image, inputs=[input_img], outputs=[output_text, output_score])