Spaces:

Astridkraft
/

Stable-ControlNet-GPU

Running on T4

App Files Files Community

Stable-ControlNet-GPU / controlnet_facefix.py

Astridkraft

Update controlnet_facefix.py

0c4f4cc verified about 1 month ago

raw

history blame

8.01 kB

	# controlnet_facefix.py - PURE QUALITY ENHANCEMENT WITH MINIMAL CHANGE
	import torch
	from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, AutoencoderKL
	from PIL import Image, ImageFilter, ImageEnhance
	import time
	import cv2
	import numpy as np
	from torchvision import transforms

	print("="*60)
	print("FACE-FIX: REINE QUALITÄTSVERBESSERUNG - MINIMALE ÄNDERUNG")
	print("="*60)

	_components_loaded = False
	_controlnet_depth = None
	_controlnet_pose = None
	_pipeline = None

	def _initialize_components():
	"""Lade nur notwendige Komponenten"""
	global _components_loaded, _controlnet_depth, _controlnet_pose

	if _components_loaded:
	return True

	print("⚠️ Lade nur OpenPose (Depth wird deaktiviert)...")

	try:
	# NUR OPENPOSE - Depth verändert zu viel
	_controlnet_pose = ControlNetModel.from_pretrained(
	"lllyasviel/sd-controlnet-openpose",
	torch_dtype=torch.float16
	)
	print("✅ OpenPose geladen")

	# Depth wird NICHT geladen - es verändert den Hintergrund zu stark
	_controlnet_depth = None

	_components_loaded = True
	return True
	except Exception as e:
	print(f"❌ Fehler: {e}")
	return False

	def _extract_precise_pose(image):
	"""SEHR PRÄZISE Pose-Extraktion nur für Gesicht"""
	try:
	img_array = np.array(image.convert("RGB"))
	gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)

	# EXTREM NIEDRIGE Thresholds für minimale Kanten
	edges = cv2.Canny(gray, 15, 45) # Nur feinste Kanten

	# Face detection für Fokus
	face_cascade = cv2.CascadeClassifier(
	cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
	)
	faces = face_cascade.detectMultiScale(gray, 1.1, 4)

	# Erstelle leere Pose Map
	pose_map = np.zeros_like(img_array)

	# Nur Gesichtskanten einfügen
	if len(faces) > 0:
	for (x, y, w, h) in faces:
	# Extrahiere Gesichtsregion
	face_region = edges[y:y+h, x:x+w]
	# Nur 10% der stärksten Kanten behalten
	threshold = np.percentile(face_region[face_region > 0], 90)
	face_region[face_region < threshold] = 0
	pose_map[y:y+h, x:x+w, 0] = face_region
	else:
	# Falls kein Gesicht erkannt, minimale Kanten
	pose_map[..., 0] = edges * 0.3 # Noch schwächer

	return Image.fromarray(pose_map)
	except:
	# Fallback: minimale Kanten
	gray = cv2.cvtColor(np.array(image.convert("RGB")), cv2.COLOR_RGB2GRAY)
	edges = cv2.Canny(gray, 10, 30) * 0.2 # Sehr schwach
	return Image.fromarray(edges).convert("RGB")

	def _apply_face_enhancement(image):
	"""EINFACHE Face Enhancement ohne AI"""
	try:
	img_array = np.array(image.convert("RGB"))

	# 1. Scharfe Kanten (nur leicht)
	sharpened = cv2.filter2D(img_array, -1,
	np.array([[-0.5, -0.5, -0.5],
	[-0.5, 5.0, -0.5],
	[-0.5, -0.5, -0.5]]) / 3.0)

	# 2. Leichter De-Noise
	denoised = cv2.fastNlMeansDenoisingColored(sharpened, None, 3, 3, 7, 21)

	# 3. Kontrast leicht erhöhen
	lab = cv2.cvtColor(denoised, cv2.COLOR_RGB2LAB)
	l, a, b = cv2.split(lab)
	clahe = cv2.createCLAHE(clipLimit=1.0, tileGridSize=(8,8))
	l = clahe.apply(l)
	enhanced = cv2.merge([l, a, b])
	enhanced = cv2.cvtColor(enhanced, cv2.COLOR_LAB2RGB)

	return Image.fromarray(enhanced)
	except:
	return image

	def apply_facefix(image: Image.Image, prompt: str, negative_prompt: str, seed: int, model_id: str):
	"""
	SUPER-SUBTILE QUALITÄTSVERBESSERUNG

	Strategie:
	1. NUR OpenPose (kein Depth - das verändert zu viel)
	2. SEHR niedrige ControlNet-Stärke
	3. Fast kein CFG Scale
	4. Identischer Prompt
	"""
	print("\n" + "🎯"*50)
	print("SUBTILE QUALITÄTSVERBESSERUNG")
	print(f" Größe: {image.size}")
	print("🎯"*50)

	start_time = time.time()

	# OPTION 1: Einfache non-AI Verbesserung (empfohlen)
	print("\n⚡ OPTION 1: Einfache non-AI Verbesserung...")
	enhanced = _apply_face_enhancement(image)

	# Optional: AI-Verbesserung nur wenn nötig
	use_ai_enhancement = False # Auf False setzen für minimale Änderung

	if not use_ai_enhancement:
	duration = time.time() - start_time
	print(f"✅ Non-AI Verbesserung in {duration:.1f}s")
	return enhanced

	# OPTION 2: Minimale AI-Verbesserung (falls gewünscht)
	print("⚠️ Starte MINIMALE AI-Verbesserung...")

	if not _initialize_components():
	return enhanced

	# Control Map vorbereiten
	original_size = image.size
	control_size = (512, 512)
	resized_image = image.resize(control_size, Image.Resampling.LANCZOS)

	# MINIMALE Pose Map
	pose_img = _extract_precise_pose(resized_image)
	pose_img.save("debug_minimal_pose.png")

	# Pipeline (nur falls nicht geladen)
	global _pipeline
	if _pipeline is None:
	try:
	print("🔄 Lade Pipeline...")
	_pipeline = StableDiffusionControlNetPipeline.from_pretrained(
	model_id,
	controlnet=[_controlnet_pose], # NUR OpenPose!
	torch_dtype=torch.float16,
	safety_checker=None,
	requires_safety_checker=False,
	)

	_pipeline.enable_attention_slicing()
	_pipeline.enable_vae_slicing()

	print("✅ Pipeline geladen")
	except Exception as e:
	print(f"❌ Pipeline Fehler: {e}")
	return enhanced

	try:
	device = "cuda" if torch.cuda.is_available() else "cpu"
	print(f" Device: {device}")
	pipeline = _pipeline.to(device)

	# KRITISCHE ÄNDERUNGEN:
	# 1. GLEICHER PROMPT wie ursprünglich
	# 2. SEHR niedrige ControlNet-Stärke
	# 3. FAST KEIN CFG

	print("\n⚙️ EXTREM SUBTILE PARAMETER:")
	print(" • OpenPose Strength: 0.3 (SEHR NIEDRIG)")
	print(" • Steps: 15 (wenig)")
	print(" • CFG: 2.0 (fast kein Guidance)")
	print(" • Gleicher Seed")

	result = pipeline(
	prompt=prompt, # WICHTIG: GLEICHER PROMPT!
	negative_prompt=f"{negative_prompt}, deformed, blurry",
	image=[pose_img], # Nur Pose
	controlnet_conditioning_scale=[0.3], # EXTREM NIEDRIG
	num_inference_steps=15, # WENIG Steps
	guidance_scale=2.0, # FAST KEIN CFG
	generator=torch.Generator(device).manual_seed(seed + 100), # Leicht anderer Seed
	height=512,
	width=512,
	).images[0]

	# Zurück auf Originalgröße
	if original_size != (512, 512):
	result = result.resize(original_size, Image.Resampling.LANCZOS)

	# 50/50 Blend mit Original für noch weniger Änderung
	result_array = np.array(result).astype(float)
	original_array = np.array(image).astype(float)

	# 70% Original, 30% AI-result
	blended = (original_array * 0.7 + result_array * 0.3).astype(np.uint8)
	final_result = Image.fromarray(blended)

	duration = time.time() - start_time
	print(f"\n✅ SUBTILE VERBESSERUNG in {duration:.1f}s")
	print(f" • 70% Original, 30% AI")
	print(f" • OpenPose: 0.3")
	print(f" • CFG: 2.0")

	return final_result

	except Exception as e:
	print(f"\n❌ AI-Verbesserung fehlgeschlagen: {e}")
	return enhanced

	print("="*60)
	print("FACE-FIX: REINE QUALITÄTSVERBESSERUNG")
	print("="*60)