Spaces:

VOIDER
/

VisualQuality-R1-7B

Build error

App Files Files Community

VOIDER commited on Jan 7

Commit

b161f01

verified ·

1 Parent(s): 821cf80

Update app.py

Browse files

Files changed (1) hide show

app.py +77 -87

app.py CHANGED Viewed

@@ -3,13 +3,13 @@ import sys
 import subprocess
 # --- УСТАНОВКА LLAMA-CPP-PYTHON (Runtime) ---
 try:
     from llama_cpp import Llama
-    from llama_cpp.llama_chat_format import Llava15ChatHandler
-    print("Библиотека llama-cpp-python проверена.")
 except ImportError:
-    print("Установка llama-cpp-python с поддержкой CPU...")
-    # Устанавливаем версию с официального индекса разработчика
     subprocess.check_call([
         sys.executable, "-m", "pip", "install",
         "llama-cpp-python>=0.3.2",
@@ -17,6 +17,12 @@ except ImportError:
     ])
     print("Установка завершена! Импортируем...")
     from llama_cpp import Llama
 import gradio as gr
 from huggingface_hub import hf_hub_download
@@ -35,35 +41,36 @@ def load_model():
     global llm
     if llm is None:
         print(f"Загрузка модели {MODEL_FILENAME}...")
-        try:
-            model_path = hf_hub_download(
-                repo_id=REPO_ID,
-                filename=MODEL_FILENAME
-            )
-            # ВАЖНО: chat_format="qwen2vl" включает встроенный хендлер для картинок
-            llm = Llama(
-                model_path=model_path,
-                n_ctx=12288,          # 12k контекста (хватит для HD картинки)
-                n_gpu_layers=0,       # CPU
-                verbose=True,
-                chat_format="qwen2vl" # Явно включаем режим Qwen2-VL
-            )
-            print("Модель успешно загружена!")
-        except Exception as e:
-            print(f"Критическая ошибка загрузки: {e}")
-            raise e
     return llm
 def process_image(image):
-    # Ресайз слишком больших изображений для экономии памяти и контекста
-    max_size = 1024
     if max(image.size) > max_size:
         ratio = max_size / max(image.size)
         new_size = (int(image.size[0] * ratio), int(image.size[1] * ratio))
         image = image.resize(new_size, Image.Resampling.LANCZOS)
-    # Конвертация в Base64
     buffered = io.BytesIO()
     image = image.convert("RGB")
     image.save(buffered, format="JPEG", quality=90)
@@ -73,42 +80,39 @@ def evaluate_image(image, progress=gr.Progress()):
     if image is None:
         return "Пожалуйста, загрузите изображение.", ""
-    progress(0, desc="Инициализация...")
     try:
         model = load_model()
-    except Exception as e:
-        return f"Ошибка инициализации модели: {str(e)}", "Ошибка"
-    system_prompt = "You are doing the image quality assessment task."
-    user_prompt_text = (
-        "What is your overall rating on the quality of this picture? "
-        "The rating should be a float between 1 and 5, rounded to two decimal places, "
-        "with 1 representing very poor quality and 5 representing excellent quality. "
-        "Please only output the final answer with only one score in <answer> </answer> tags."
-    )
-    progress(0.1, desc="Обработка изображения...")
-    base64_image = process_image(image)
-    image_url = f"data:image/jpeg;base64,{base64_image}"
-    messages = [
-        {"role": "system", "content": system_prompt},
-        {
-            "role": "user",
-            "content": [
-                {"type": "image_url", "image_url": {"url": image_url}},
-                {"type": "text", "text": user_prompt_text}
-            ]
-        }
-    ]
-    full_response = ""
-    print("Отправка запроса в модель...")
-    try:
         stream = model.create_chat_completion(
             messages=messages,
-            max_tokens=1500,
             temperature=0.6,
             stream=True
         )
@@ -120,31 +124,21 @@ def evaluate_image(image, progress=gr.Progress()):
                     content = delta["content"]
                     full_response += content
                     yield full_response, "Вычисляется..."
-    except ValueError as e:
-        # Если формат чата не сработал
-        err = f"Ошибка формата: {e}. Попробуйте перезагрузить Space."
-        print(err)
-        yield err, "Error"
-        return
     except Exception as e:
-        err = f"Внутренняя ошибка: {e}"
-        print(err)
-        yield err, "Error"
-        return
-    # Извлечение оценки
-    score_match = re.search(r'<answer>\s*([\d\.]+)\s*</answer>', full_response)
-    final_score = score_match.group(1) if score_match else "Не найдено"
-    yield full_response, final_score
-with gr.Blocks(title="VisualQuality-R1 (Q8 GGUF)") as demo:
-    gr.Markdown("# 👁️ VisualQuality-R1 (7B Q8)")
-    gr.Markdown(
-        "Оценка качества (IQA) с CoT. Работает на CPU (медленно!).\n"
-        "Если видите ошибку 'context window', попробуйте картинку меньшего разрешения."
-    )
     with gr.Row():
         with gr.Column():
@@ -153,13 +147,9 @@ with gr.Blocks(title="VisualQuality-R1 (Q8 GGUF)") as demo:
         with gr.Column():
             output_score = gr.Label(label="Оценка")
-            output_text = gr.Textbox(label="Рассуждения (CoT)", lines=15)
-    run_btn.click(
-        fn=evaluate_image,
-        inputs=[input_img],
-        outputs=[output_text, output_score]
-    )
 if __name__ == "__main__":
     demo.queue().launch()

 import subprocess
 # --- УСТАНОВКА LLAMA-CPP-PYTHON (Runtime) ---
+# Устанавливаем версию с поддержкой Vision (CPU)
 try:
     from llama_cpp import Llama
+    from llama_cpp.llama_chat_format import Qwen2VLChatHandler
+    print("Библиотека llama-cpp-python и Qwen2VLChatHandler найдены.")
 except ImportError:
+    print("Установка свежей версии llama-cpp-python...")
     subprocess.check_call([
         sys.executable, "-m", "pip", "install",
         "llama-cpp-python>=0.3.2",
     ])
     print("Установка завершена! Импортируем...")
     from llama_cpp import Llama
+    # Пытаемся импортировать хендлер после установки
+    try:
+        from llama_cpp.llama_chat_format import Qwen2VLChatHandler
+    except ImportError:
+        print("ВАЖНО: Qwen2VLChatHandler не найден. Возможно, версия библиотеки старая.")
+        Qwen2VLChatHandler = None
 import gradio as gr
 from huggingface_hub import hf_hub_download
     global llm
     if llm is None:
         print(f"Загрузка модели {MODEL_FILENAME}...")
+        model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME)
+        # Настраиваем обработчик диалога СПЕЦИАЛЬНО для Qwen2-VL
+        # Это решает проблему "Invalid chat handler" и ошибки с токенами
+        chat_handler = None
+        if Qwen2VLChatHandler:
+            print("Активация режима Qwen2-VL Vision...")
+            # Передаем путь к модели как clip_model_path, так как в unified GGUF
+            # визуальный энкодер находится внутри основного файла
+            chat_handler = Qwen2VLChatHandler(clip_model_path=model_path)
+        llm = Llama(
+            model_path=model_path,
+            n_ctx=12288,          # Контекст (картинки занимают много токенов)
+            n_gpu_layers=0,       # CPU
+            verbose=True,
+            chat_handler=chat_handler, # Подключаем ручной обработчик
+            n_batch=512           # Размер батча для CPU
+        )
+        print("Модель успешно загружена!")
     return llm
 def process_image(image):
+    # Ресайз обязателен для Qwen2-VL на CPU, иначе вылетит контекст 32k+
+    max_size = 1024
     if max(image.size) > max_size:
         ratio = max_size / max(image.size)
         new_size = (int(image.size[0] * ratio), int(image.size[1] * ratio))
         image = image.resize(new_size, Image.Resampling.LANCZOS)
     buffered = io.BytesIO()
     image = image.convert("RGB")
     image.save(buffered, format="JPEG", quality=90)
     if image is None:
         return "Пожалуйста, загрузите изображение.", ""
     try:
+        progress(0.1, desc="Инициализация модели...")
         model = load_model()
+        progress(0.3, desc="Обработка изображения...")
+        base64_image = process_image(image)
+        image_url = f"data:image/jpeg;base64,{base64_image}"
+        system_prompt = "You are doing the image quality assessment task."
+        user_prompt_text = (
+            "What is your overall rating on the quality of this picture? "
+            "The rating should be a float between 1 and 5, rounded to two decimal places, "
+            "with 1 representing very poor quality and 5 representing excellent quality. "
+            "Please only output the final answer with only one score in <answer> </answer> tags."
+        )
+        messages = [
+            {"role": "system", "content": system_prompt},
+            {
+                "role": "user",
+                "content": [
+                    {"type": "image_url", "image_url": {"url": image_url}},
+                    {"type": "text", "text": user_prompt_text}
+                ]
+            }
+        ]
+        full_response = ""
+        print("Генерация ответа...")
         stream = model.create_chat_completion(
             messages=messages,
+            max_tokens=1024,
             temperature=0.6,
             stream=True
         )
                     content = delta["content"]
                     full_response += content
                     yield full_response, "Вычисляется..."
+        # Парсинг оценки
+        score_match = re.search(r'<answer>\s*([\d\.]+)\s*</answer>', full_response)
+        final_score = score_match.group(1) if score_match else "Не найдено"
+        yield full_response, final_score
     except Exception as e:
+        error_msg = f"Ошибка: {str(e)}"
+        print(error_msg)
+        yield error_msg, "Error"
+with gr.Blocks(title="VisualQuality-R1") as demo:
+    gr.Markdown("# 👁️ VisualQuality-R1 (Qwen2-VL)")
+    gr.Markdown("Оценка качества изображений. Запуск на CPU (может быть медленно).")
     with gr.Row():
         with gr.Column():
         with gr.Column():
             output_score = gr.Label(label="Оценка")
+            output_text = gr.Textbox(label="CoT (Мысли модели)", lines=15)
+    run_btn.click(evaluate_image, inputs=[input_img], outputs=[output_text, output_score])
 if __name__ == "__main__":
     demo.queue().launch()