Spaces:
Running on T4
Running on T4
Commit ·
39b31b5
1
Parent(s): 16ed97d
revert: to Florence2 model
Browse files- Dockerfile +6 -5
- logic.py +81 -73
- requirements.txt +5 -7
Dockerfile
CHANGED
|
@@ -29,6 +29,7 @@ RUN pip install --no-cache-dir -r requirements.txt
|
|
| 29 |
COPY . .
|
| 30 |
|
| 31 |
# 6. Создаем пользователя и директории, меняем владельца
|
|
|
|
| 32 |
RUN useradd -m -u 1000 appuser && \
|
| 33 |
mkdir -p /data/.cache && \
|
| 34 |
chown -R appuser:appuser /app /data
|
|
@@ -37,13 +38,13 @@ RUN useradd -m -u 1000 appuser && \
|
|
| 37 |
USER appuser
|
| 38 |
|
| 39 |
# 8. Скачиваем модели от имени этого пользователя.
|
| 40 |
-
#
|
| 41 |
RUN iopaint download --model lama && \
|
| 42 |
python -c "\
|
| 43 |
-
from transformers import
|
| 44 |
-
model_id = '
|
| 45 |
-
|
| 46 |
-
|
| 47 |
|
| 48 |
# 9. Открываем порт и запускаем приложение
|
| 49 |
EXPOSE 7860
|
|
|
|
| 29 |
COPY . .
|
| 30 |
|
| 31 |
# 6. Создаем пользователя и директории, меняем владельца
|
| 32 |
+
# Это гарантирует, что все последующие операции будут иметь правильные права
|
| 33 |
RUN useradd -m -u 1000 appuser && \
|
| 34 |
mkdir -p /data/.cache && \
|
| 35 |
chown -R appuser:appuser /app /data
|
|
|
|
| 38 |
USER appuser
|
| 39 |
|
| 40 |
# 8. Скачиваем модели от имени этого пользователя.
|
| 41 |
+
# Теперь они попадут в правильный кэш, доступный приложению.
|
| 42 |
RUN iopaint download --model lama && \
|
| 43 |
python -c "\
|
| 44 |
+
from transformers import AutoModelForCausalLM, AutoProcessor; \
|
| 45 |
+
model_id = 'microsoft/Florence-2-large'; \
|
| 46 |
+
AutoProcessor.from_pretrained(model_id, trust_remote_code=True); \
|
| 47 |
+
AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True)"
|
| 48 |
|
| 49 |
# 9. Открываем порт и запускаем приложение
|
| 50 |
EXPOSE 7860
|
logic.py
CHANGED
|
@@ -1,108 +1,116 @@
|
|
| 1 |
import cv2
|
| 2 |
import numpy as np
|
| 3 |
-
from PIL import Image
|
| 4 |
-
from transformers import
|
| 5 |
from iopaint.model_manager import ModelManager
|
| 6 |
from iopaint.schema import HDStrategy, LDMSampler, InpaintRequest
|
| 7 |
import torch
|
| 8 |
-
import supervision as sv
|
| 9 |
from loguru import logger
|
|
|
|
| 10 |
import time
|
| 11 |
|
|
|
|
|
|
|
|
|
|
| 12 |
class WatermarkRemover:
|
| 13 |
def __init__(self, device="cpu"):
|
| 14 |
self.device = device
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
logger.info("Первый вызов: Загрузка модели детектора Grounding DINO...")
|
| 23 |
-
dino_model_id = "IDEA-Research/grounding-dino-base"
|
| 24 |
-
self.dino_processor = GroundingDinoProcessor.from_pretrained(dino_model_id)
|
| 25 |
-
self.dino_model = GroundingDinoForObjectDetection.from_pretrained(dino_model_id).to(self.device)
|
| 26 |
-
logger.info("Модель Grounding DINO загружена.")
|
| 27 |
-
return self.dino_processor, self.dino_model
|
| 28 |
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
self.inpainting_model = ModelManager(name="lama", device=self.device)
|
| 33 |
-
logger.info("Модель LaMA загружена.")
|
| 34 |
-
return self.inpainting_model
|
| 35 |
|
| 36 |
-
def
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
outputs = model(**inputs)
|
| 42 |
-
|
| 43 |
-
results = processor.post_process_grounded_object_detection(
|
| 44 |
-
outputs,
|
| 45 |
input_ids=inputs["input_ids"],
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
|
|
|
| 50 |
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
def _inpaint_image(self, image_np: np.ndarray, mask_np: np.ndarray) -> np.ndarray:
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
result = inpainting_model(image_np, mask_np, config)
|
| 60 |
if result.dtype in [np.float64, np.float32]:
|
| 61 |
result = np.clip(result, 0, 255).astype(np.uint8)
|
| 62 |
return result
|
| 63 |
|
| 64 |
-
def run(self, image: Image.Image) -> Image.Image:
|
| 65 |
start_time = time.time()
|
| 66 |
logger.info("Начало процесса удаления вотермарок...")
|
| 67 |
-
|
| 68 |
-
# --- Этап 1: Поиск высококонтрастных частей (текст) ---
|
| 69 |
-
logger.info("--- ЭТАП 1: Поиск текста с высокой уверенностью ---")
|
| 70 |
-
detections_text = self._detect_watermarks(
|
| 71 |
-
image, text_prompt="text . watermark", box_threshold=0.4, text_threshold=0.4
|
| 72 |
-
)
|
| 73 |
-
logger.info(f"Найдено {len(detections_text)} текстовых фрагментов.")
|
| 74 |
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
detections_bg = self._detect_watermarks(
|
| 78 |
-
image, text_prompt="semi-transparent background . transparent overlay . watermark background", box_threshold=0.25, text_threshold=0.25
|
| 79 |
-
)
|
| 80 |
-
logger.info(f"Найдено {len(detections_bg)} фоновых фрагментов.")
|
| 81 |
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
|
| 87 |
-
if
|
| 88 |
-
logger.info("Ни од
|
| 89 |
return image
|
| 90 |
-
|
| 91 |
-
logger.info(f"Всего найдено {len(detections)} фрагментов. Запускаю NMS для слияния...")
|
| 92 |
-
# Применяем NMS для слияния всех пересекающихся рамок в одну
|
| 93 |
-
merged_detections = detections.with_nms(class_agnostic=True, threshold=0.6)
|
| 94 |
-
logger.success(f"После слияния осталось {len(merged_detections)} уникальных областей.")
|
| 95 |
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
kernel = np.ones((9, 9), np.uint8)
|
| 102 |
-
processed_mask = cv2.dilate(mask, kernel, iterations=1)
|
| 103 |
-
|
| 104 |
image_np = np.array(image.convert("RGB"))
|
|
|
|
|
|
|
| 105 |
result_np_bgr = self._inpaint_image(image_np, processed_mask)
|
|
|
|
| 106 |
result_np_rgb = cv2.cvtColor(result_np_bgr, cv2.COLOR_BGR2RGB)
|
| 107 |
|
| 108 |
end_time = time.time()
|
|
|
|
| 1 |
import cv2
|
| 2 |
import numpy as np
|
| 3 |
+
from PIL import Image, ImageDraw
|
| 4 |
+
from transformers import AutoProcessor, AutoModelForCausalLM
|
| 5 |
from iopaint.model_manager import ModelManager
|
| 6 |
from iopaint.schema import HDStrategy, LDMSampler, InpaintRequest
|
| 7 |
import torch
|
|
|
|
| 8 |
from loguru import logger
|
| 9 |
+
from enum import Enum
|
| 10 |
import time
|
| 11 |
|
| 12 |
+
class TaskType(str, Enum):
|
| 13 |
+
OPEN_VOCAB_DETECTION = "<OPEN_VOCABULARY_DETECTION>"
|
| 14 |
+
|
| 15 |
class WatermarkRemover:
|
| 16 |
def __init__(self, device="cpu"):
|
| 17 |
self.device = device
|
| 18 |
+
logger.info(f"Используемое устройство: {self.device}")
|
| 19 |
+
|
| 20 |
+
logger.info("Загрузка модели Florence-2...")
|
| 21 |
+
model_id = "microsoft/Florence-2-large"
|
| 22 |
+
self.florence_model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True).to(self.device).eval()
|
| 23 |
+
self.florence_processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
|
| 24 |
+
logger.info("Модель Florence-2 загружена.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
+
logger.info("Загрузка модели LaMA для закрашивания...")
|
| 27 |
+
self.inpainting_model = ModelManager(name="lama", device=self.device)
|
| 28 |
+
logger.info("Модель LaMA загружена.")
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
+
def _identify(self, image: Image.Image, text_input: str):
|
| 31 |
+
task_prompt = TaskType.OPEN_VOCAB_DETECTION
|
| 32 |
+
prompt = task_prompt.value + text_input
|
| 33 |
+
inputs = self.florence_processor(text=prompt, images=image, return_tensors="pt")
|
| 34 |
+
inputs = {k: v.to(self.device) for k, v in inputs.items()}
|
| 35 |
|
| 36 |
+
# Возвращаем проверенное значение num_beams
|
| 37 |
+
generated_ids = self.florence_model.generate(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
input_ids=inputs["input_ids"],
|
| 39 |
+
pixel_values=inputs["pixel_values"],
|
| 40 |
+
max_new_tokens=1024,
|
| 41 |
+
do_sample=False,
|
| 42 |
+
num_beams=5
|
| 43 |
+
)
|
| 44 |
|
| 45 |
+
generated_text = self.florence_processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
|
| 46 |
+
return self.florence_processor.post_process_generation(
|
| 47 |
+
generated_text, task=task_prompt.value, image_size=image.size
|
| 48 |
)
|
| 49 |
+
|
| 50 |
+
def _get_mask_for_prompt(self, image: Image.Image, text_input: str, max_bbox_percent: float) -> Image.Image:
|
| 51 |
+
logger.info(f"Поиск по промпту: '{text_input}'...")
|
| 52 |
+
parsed_answer = self._identify(image, text_input)
|
| 53 |
|
| 54 |
+
mask = Image.new("L", image.size, 0)
|
| 55 |
+
draw = ImageDraw.Draw(mask)
|
| 56 |
+
|
| 57 |
+
detection_key = TaskType.OPEN_VOCAB_DETECTION.value
|
| 58 |
+
if detection_key in parsed_answer and "bboxes" in parsed_answer[detection_key]:
|
| 59 |
+
bboxes = parsed_answer[detection_key]["bboxes"]
|
| 60 |
+
logger.success(f"Найдено {len(bboxes)} bbox по промпту '{text_input}'.")
|
| 61 |
+
image_area = image.width * image.height
|
| 62 |
+
|
| 63 |
+
for bbox in bboxes:
|
| 64 |
+
x1, y1, x2, y2 = map(int, bbox)
|
| 65 |
+
bbox_area = (x2 - x1) * (y2 - y1)
|
| 66 |
+
|
| 67 |
+
if (bbox_area / image_area) * 100 <= max_bbox_percent:
|
| 68 |
+
draw.rectangle([x1, y1, x2, y2], fill=255)
|
| 69 |
+
else:
|
| 70 |
+
logger.warning(f"Пропущен bbox >{max_bbox_percent}%: {bbox}")
|
| 71 |
+
else:
|
| 72 |
+
logger.warning(f"Промпт '{text_input}' не дал результатов.")
|
| 73 |
+
|
| 74 |
+
return mask
|
| 75 |
+
|
| 76 |
def _inpaint_image(self, image_np: np.ndarray, mask_np: np.ndarray) -> np.ndarray:
|
| 77 |
+
config = InpaintRequest(ldm_steps=50, ldm_sampler=LDMSampler.ddim, hd_strategy=HDStrategy.CROP, hd_strategy_crop_margin=64, hd_strategy_crop_trigger_size=800, hd_strategy_resize_limit=1600)
|
| 78 |
+
result = self.inpainting_model(image_np, mask_np, config)
|
|
|
|
| 79 |
if result.dtype in [np.float64, np.float32]:
|
| 80 |
result = np.clip(result, 0, 255).astype(np.uint8)
|
| 81 |
return result
|
| 82 |
|
| 83 |
+
def run(self, image: Image.Image, max_bbox_percent: float = 27.0) -> Image.Image:
|
| 84 |
start_time = time.time()
|
| 85 |
logger.info("Начало процесса удаления вотермарок...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
|
| 87 |
+
mask_image = self._get_mask_for_prompt(image, "watermark", max_bbox_percent)
|
| 88 |
+
mask_np = np.array(mask_image)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
|
| 90 |
+
if not np.any(mask_np):
|
| 91 |
+
logger.warning("Основной промпт не нашел вотермарок. Запускаю резервные попытки...")
|
| 92 |
+
fallback_prompts = ["logo", "text overlay"] # Убрали слишком общие промпты
|
| 93 |
+
|
| 94 |
+
for prompt in fallback_prompts:
|
| 95 |
+
mask_image = self._get_mask_for_prompt(image, prompt, max_bbox_percent)
|
| 96 |
+
mask_np = np.array(mask_image)
|
| 97 |
+
if np.any(mask_np):
|
| 98 |
+
logger.success(f"Резервный промпт '{prompt}' нашел вотермарку!")
|
| 99 |
+
break
|
| 100 |
|
| 101 |
+
if not np.any(mask_np):
|
| 102 |
+
logger.info("Ни одна из попыток не нашла вотермарок. Возвращаем оригинальное изображение.")
|
| 103 |
return image
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
|
| 105 |
+
kernel_size = 7
|
| 106 |
+
kernel = np.ones((kernel_size, kernel_size), np.uint8)
|
| 107 |
+
processed_mask = cv2.dilate(mask_np, kernel, iterations=1)
|
| 108 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
image_np = np.array(image.convert("RGB"))
|
| 110 |
+
|
| 111 |
+
logger.info("Закрашивание области по обработанной маске...")
|
| 112 |
result_np_bgr = self._inpaint_image(image_np, processed_mask)
|
| 113 |
+
|
| 114 |
result_np_rgb = cv2.cvtColor(result_np_bgr, cv2.COLOR_BGR2RGB)
|
| 115 |
|
| 116 |
end_time = time.time()
|
requirements.txt
CHANGED
|
@@ -4,15 +4,13 @@ uvicorn[standard]
|
|
| 4 |
python-multipart
|
| 5 |
requests
|
| 6 |
|
| 7 |
-
# Основная ML-библиотека (
|
| 8 |
iopaint
|
| 9 |
|
| 10 |
-
# --- Grounding DINO ---
|
| 11 |
-
timm
|
| 12 |
-
supervision
|
| 13 |
-
|
| 14 |
# Утилиты
|
| 15 |
loguru
|
|
|
|
| 16 |
|
| 17 |
-
# Оптимизация
|
| 18 |
-
|
|
|
|
|
|
| 4 |
python-multipart
|
| 5 |
requests
|
| 6 |
|
| 7 |
+
# Основная ML-библиотека (она сама подтянет transformers, diffusers и т.д.)
|
| 8 |
iopaint
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
# Утилиты
|
| 11 |
loguru
|
| 12 |
+
opencv-python-headless
|
| 13 |
|
| 14 |
+
# Оптимизация для Transformer (устанавливаем отдельно для контроля)
|
| 15 |
+
# Убедитесь, что эта версия совместима с вашим кодом и версией CUDA в Docker-образе
|
| 16 |
+
flash-attn==2.5.8
|