File size: 4,683 Bytes
578ee8e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
591e59b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import os
import sys
import subprocess

# --- ХАК ДЛЯ УСТАНОВКИ LLAMA-CPP-PYTHON ---
# Устанавливаем библиотеку при запуске, чтобы избежать компиляции во время сборки Space
try:
    import llama_cpp
    print("llama-cpp-python уже установлен.")
except ImportError:
    print("Установка llama-cpp-python из пресобранного wheel (CPU)...")
    # Используем pre-built wheel для Linux x86_64 (избегаем компиляции)
    subprocess.check_call([
        sys.executable, "-m", "pip", "install", 
        "llama-cpp-python", 
        "--extra-index-url", "https://abetlen.github.io/llama-cpp-python/whl/cpu"
    ])
    print("Установка завершена!")
    import llama_cpp
# ------------------------------------------

import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import base64
import io
import re

# Настройки модели
REPO_ID = "mradermacher/VisualQuality-R1-7B-GGUF"
MODEL_FILENAME = "VisualQuality-R1-7B.Q8_0.gguf" 

llm = None

def load_model():
    global llm
    if llm is None:
        print(f"Загрузка модели {MODEL_FILENAME}...")
        try:
            model_path = hf_hub_download(
                repo_id=REPO_ID,
                filename=MODEL_FILENAME
            )
            llm = Llama(
                model_path=model_path,
                n_ctx=8192,
                n_gpu_layers=0,       
                verbose=True,
                chat_format="chatml-function-calling"
            )
            print("Модель успешно загружена!")
        except Exception as e:
            print(f"Ошибка загрузки: {e}")
            raise e
    return llm

def image_to_base64(image):
    buffered = io.BytesIO()
    image.save(buffered, format="JPEG")
    return base64.b64encode(buffered.getvalue()).decode('utf-8')

def evaluate_image(image, progress=gr.Progress()):
    if image is None:
        return "Пожалуйста, загрузите изображение.", ""
    
    # Ленивая загрузка модели при первом запросе
    model = load_model()
    
    system_prompt = "You are doing the image quality assessment task."
    user_prompt_text = (
        "What is your overall rating on the quality of this picture? "
        "The rating should be a float between 1 and 5, rounded to two decimal places, "
        "with 1 representing very poor quality and 5 representing excellent quality. "
        "Please only output the final answer with only one score in <answer> </answer> tags."
    )
    
    base64_image = image_to_base64(image)
    image_url = f"data:image/jpeg;base64,{base64_image}"

    messages = [
        {"role": "system", "content": system_prompt},
        {
            "role": "user", 
            "content": [
                {"type": "image_url", "image_url": {"url": image_url}},
                {"type": "text", "text": user_prompt_text}
            ]
        }
    ]

    full_response = ""
    print("Начало генерации...")
    
    stream = model.create_chat_completion(
        messages=messages,
        max_tokens=1024,
        temperature=0.6,
        stream=True
    )

    for chunk in stream:
        if "choices" in chunk:
            delta = chunk["choices"][0]["delta"]
            if "content" in delta and delta["content"]:
                content = delta["content"]
                full_response += content
                yield full_response, "Вычисляется..."

    score_match = re.search(r'<answer>\s*([\d\.]+)\s*</answer>', full_response)
    final_score = score_match.group(1) if score_match else "Не найдено"
    
    yield full_response, final_score

with gr.Blocks(title="VisualQuality-R1 (Q8 GGUF)") as demo:
    gr.Markdown("# 👁️ VisualQuality-R1 (7B Q8)")
    gr.Markdown("Оценка качества изображений (Chain of Thought). Работает на CPU.")
    
    with gr.Row():
        with gr.Column():
            input_img = gr.Image(type="pil", label="Загрузите изображение")
            run_btn = gr.Button("Оценить качество", variant="primary")
        
        with gr.Column():
            output_score = gr.Label(label="Итоговая оценка")
            output_text = gr.Textbox(label="Ход мыслей (CoT)", lines=15, show_copy_button=True)

    run_btn.click(evaluate_image, inputs=[input_img], outputs=[output_text, output_score])

if __name__ == "__main__":
    demo.queue().launch()