Spaces:

VOIDER
/

VisualQuality-R1-7B

Build error

App Files Files Community

VisualQuality-R1-7B / app.py

VOIDER

Update app.py

1be11fc verified 3 months ago

raw

history blame

8.2 kB

	import os
	import sys
	import subprocess

	# --- ПРОВЕРКА И УСТАНОВКА БИБЛИОТЕКИ ---
	try:
	from llama_cpp import Llama, LlamaChatCompletionHandler
	print("Библиотека llama-cpp-python найдена.")
	except ImportError:
	print("Установка llama-cpp-python (CPU)...")
	# Принудительно ставим 0.3.16 или новее с поддержкой CPU
	subprocess.check_call([
	sys.executable, "-m", "pip", "install",
	"llama-cpp-python>=0.3.16",
	"--extra-index-url", "https://abetlen.github.io/llama-cpp-python/whl/cpu"
	])
	from llama_cpp import Llama, LlamaChatCompletionHandler

	import gradio as gr
	from huggingface_hub import hf_hub_download
	import base64
	import io
	import re
	from PIL import Image

	# Конфигурация
	REPO_ID = "mradermacher/VisualQuality-R1-7B-GGUF"
	MODEL_FILENAME = "VisualQuality-R1-7B.Q8_0.gguf"

	# === ГЛАВНЫЙ ФИКС: СВОЙ ОБРАБОТЧИК ДЛЯ QWEN2-VL ===
	# Мы не зависим от встроенных классов, а пишем свой.
	class CustomQwen2VLHandler(LlamaChatCompletionHandler):
	def __init__(self, clip_model_path=None, verbose=False):
	self.clip_model_path = clip_model_path
	self.verbose = verbose

	def __call__(self, llama: Llama, messages, functions=None, function_call=None, tools=None, tool_choice=None, **kwargs):
	# 1. Формируем промпт вручную с правильными тегами
	prompt = ""
	images = []

	for message in messages:
	role = message["role"]
	content = message["content"]

	# Начало сообщения
	prompt += f"<\|im_start\|>{role}\n"

	if isinstance(content, str):
	prompt += content
	elif isinstance(content, list):
	for part in content:
	if part["type"] == "text":
	prompt += part["text"]
	elif part["type"] == "image_url":
	# Теги для Qwen2-VL: Vision Start -> Pad -> Vision End
	prompt += "<\|vision_start\|><\|image_pad\|><\|vision_end\|>"

	# Извлекаем байты из base64 для передачи в C++ слой
	try:
	image_url = part["image_url"]["url"]
	if "base64," in image_url:
	base64_data = image_url.split("base64,")[1]
	image_bytes = base64.b64decode(base64_data)
	images.append(image_bytes)
	except Exception as e:
	print(f"Ошибка декодирования картинки: {e}")

	# Конец сообщения
	prompt += "<\|im_end\|>\n"

	# Добавляем триггер для ответа ассистента
	prompt += "<\|im_start\|>assistant\n"

	if self.verbose:
	print(f"=== SENDED PROMPT ({len(prompt)} chars) ===")
	print(prompt[:200] + "..." if len(prompt) > 200 else prompt)
	print(f"=== IMAGES: {len(images)} ===")

	# Возвращаем кортеж (prompt, images), который понимает llama.cpp
	return prompt, images

	llm = None

	def load_model():
	global llm
	if llm is None:
	print(f"Загрузка модели {MODEL_FILENAME}...")
	try:
	model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME)

	# Инициализируем НАШ кастомный хендлер
	# clip_model_path указываем на тот же файл (так как это GGUF all-in-one)
	chat_handler = CustomQwen2VLHandler(clip_model_path=model_path, verbose=True)

	llm = Llama(
	model_path=model_path,
	n_ctx=8192, # Контекст (картинки большие, нужно место)
	n_gpu_layers=0, # CPU
	verbose=True,
	chat_handler=chat_handler, # <-- ВАЖНО: Используем наш класс
	n_batch=512,
	logits_all=True
	)
	print("Модель успешно загружена с CustomQwen2VLHandler!")
	except Exception as e:
	print(f"Ошибка загрузки: {e}")
	raise e
	return llm

	def process_image(image):
	# Ресайз до 1024px макс, чтобы не перегружать CPU память и контекст
	max_dim = 1024
	if max(image.size) > max_dim:
	image.thumbnail((max_dim, max_dim), Image.Resampling.LANCZOS)

	buffered = io.BytesIO()
	image = image.convert("RGB")
	image.save(buffered, format="JPEG", quality=90)
	return base64.b64encode(buffered.getvalue()).decode('utf-8')

	def evaluate_image(image, progress=gr.Progress()):
	if image is None:
	return "Пожалуйста, загрузите изображение.", ""

	try:
	progress(0.1, desc="Загрузка модели...")
	model = load_model()

	progress(0.2, desc="Обработка...")
	base64_img = process_image(image)
	img_url = f"data:image/jpeg;base64,{base64_img}"

	system_prompt = "You are doing the image quality assessment task."
	user_prompt = (
	"What is your overall rating on the quality of this picture? "
	"The rating should be a float between 1 and 5, rounded to two decimal places, "
	"with 1 representing very poor quality and 5 representing excellent quality. "
	"Please only output the final answer with only one score in <answer> </answer> tags."
	)

	messages = [
	{"role": "system", "content": system_prompt},
	{
	"role": "user",
	"content": [
	{"type": "image_url", "image_url": {"url": img_url}},
	{"type": "text", "text": user_prompt}
	]
	}
	]

	full_response = ""
	print("Начинаю генерацию...")

	# Запуск стриминга
	stream = model.create_chat_completion(
	messages=messages,
	max_tokens=1024,
	temperature=0.6,
	stream=True
	)

	for chunk in stream:
	if "choices" in chunk:
	delta = chunk["choices"][0]["delta"]
	if "content" in delta and delta["content"]:
	content = delta["content"]
	full_response += content
	yield full_response, "Думаю..."

	# Поиск оценки
	score_match = re.search(r'<answer>\s([\d\.]+)\s</answer>', full_response)
	final_score = score_match.group(1) if score_match else "Оценка не найдена"

	yield full_response, final_score

	except Exception as e:
	err_msg = f"Произошла ошибка: {str(e)}"
	print(err_msg)
	yield err_msg, "Error"

	# Интерфейс
	with gr.Blocks(title="VisualQuality-R1 (Custom Handler)") as demo:
	gr.Markdown("# 👁️ VisualQuality-R1 (Qwen2-VL)")
	gr.Markdown("Оценка качества изображений на CPU с кастомным обработчиком.")

	with gr.Row():
	with gr.Column():
	input_img = gr.Image(type="pil", label="Изображение")
	run_btn = gr.Button("Оценить", variant="primary")

	with gr.Column():
	output_score = gr.Label(label="Оценка")
	output_text = gr.Textbox(label="CoT (Рассуждения)", lines=15)

	run_btn.click(evaluate_image, inputs=[input_img], outputs=[output_text, output_score])

	if __name__ == "__main__":
	demo.queue().launch()