Spaces:

Seniordev22
/

OldServer

Running

App Files Files Community

Seniordev22 commited on Apr 15

Commit

c7400ce

verified ·

1 Parent(s): 257adde

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -47

app.py CHANGED Viewed

@@ -27,17 +27,11 @@ DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 logger.info(f"Using Device: {DEVICE}")
 logger.info(f"CUDA Available: {torch.cuda.is_available()}")
-# Strong CPU Optimization
 if DEVICE.type == "cpu":
     torch.set_num_threads(4)
     torch.set_num_interop_threads(1)
     cv2.setNumThreads(4)
-    # Enable better CPU backend if available
-    try:
-        import torch.backends.mkldnn
-        torch.backends.mkldnn.enabled = True
-    except:
-        pass
 else:
     torch.set_num_threads(1)
@@ -49,7 +43,6 @@ executor = ThreadPoolExecutor(max_workers=2)
 face_processor = None
 face_parser = None
 beard_model = None
-face_parser_compiled = None
 # ====================== TIMED DECORATOR ======================
 def timed(name: str):
@@ -65,7 +58,7 @@ def timed(name: str):
 # ====================== MODEL LOADING ======================
 def load_face_parser():
-    global face_processor, face_parser, face_parser_compiled
     if face_parser is not None:
         return
     logger.info("Loading Segformer Face Parser...")
@@ -73,18 +66,6 @@ def load_face_parser():
     face_parser = SegformerForSemanticSegmentation.from_pretrained("jonathandinu/face-parsing")
     face_parser.to(DEVICE)
     face_parser.eval()
-    # Try torch.compile for speedup (CPU pe bhi kaam karta hai PyTorch 2.0+)
-    try:
-        if DEVICE.type == "cpu":
-            face_parser_compiled = torch.compile(face_parser, mode="default", fullgraph=False)
-            logger.info("✅ Face parser compiled with torch.compile")
-        else:
-            face_parser_compiled = face_parser
-    except Exception as e:
-        logger.warning(f"torch.compile failed: {e}, using normal mode")
-        face_parser_compiled = face_parser
     logger.info("✅ Face parser loaded")
 def load_beard_model():
@@ -99,26 +80,21 @@ def load_beard_model():
 def get_hair_and_exclude_masks(pil_image: Image.Image):
     load_face_parser()
     orig_w, orig_h = pil_image.size
-    # Use 128x128 (already fast)
     img_small = pil_image.resize((128, 128), Image.BILINEAR)
     inputs = face_processor(images=img_small, return_tensors="pt").to(DEVICE)
     with torch.inference_mode():
-        if face_parser_compiled is not None:
-            out = face_parser_compiled(**inputs)
-        else:
-            out = face_parser(**inputs)
         logits = out.logits
         up = torch.nn.functional.interpolate(logits, size=(128, 128), mode="bilinear", align_corners=False)
         probs = torch.softmax(up, dim=1)[0]
-    # Hair mask (optimized threshold)
-    hair = (probs[13].cpu().numpy() > 0.04).astype(np.float32)  # thoda loose for speed
-    hair = cv2.GaussianBlur(hair, (3,3), 0.8)
-    # Face exclude (simplified a bit)
     parsing = up.argmax(dim=1).squeeze(0).cpu().numpy()
     face_cls = list(range(1,6)) + list(range(8,13)) + [17,18]
     face_m = np.isin(parsing, face_cls).astype(np.float32)
@@ -128,17 +104,16 @@ def get_hair_and_exclude_masks(pil_image: Image.Image):
     h, w = face_m.shape
     forehead = np.zeros_like(face_m, dtype=np.float32)
-    forehead[:int(h*0.32)] = 1.0
     face_m = face_m * (1 - forehead * 0.45)
     hair = hair * (1 - face_m)
     hair = cv2.resize(hair, (orig_w, orig_h), interpolation=cv2.INTER_LINEAR)
-    # Exclude mask (nose+lips) - fast path
-    exclude = np.zeros((128,128), dtype=np.float32)
-    exclude = np.maximum(exclude, (probs[2] > 0.5).cpu().numpy())
-    exclude = np.maximum(exclude, (probs[11] > 0.5).cpu().numpy())
-    exclude = np.maximum(exclude, (probs[12] > 0.5).cpu().numpy())
     exclude = cv2.resize(exclude, (orig_w, orig_h), interpolation=cv2.INTER_NEAREST)
     exclude = cv2.dilate(exclude, kernel, iterations=1)
@@ -149,21 +124,18 @@ def get_hair_and_exclude_masks(pil_image: Image.Image):
 def get_beard_mask_fast(pil_image: Image.Image, exclude_mask: np.ndarray):
     model = load_beard_model()
     orig_w, orig_h = pil_image.size
-    # Use 128x128 with streaming for slight speedup
     img_small = pil_image.resize((128, 128), Image.BILINEAR)
     img_array = np.array(img_small)
     results = model.predict(
         img_array,
         device=DEVICE.type,
-        conf=0.30,          # thoda tight for speed
         iou=0.50,
         imgsz=128,
         half=(DEVICE.type == "cuda"),
         verbose=False,
-        max_det=8,
-        stream=True         # helps in some cases
     )
     mask = np.zeros((orig_h, orig_w), dtype=np.float32)
@@ -184,11 +156,8 @@ def get_beard_mask_fast(pil_image: Image.Image, exclude_mask: np.ndarray):
     return mask
-# Color Transfer part same rakha hai (already fast hai, sirf minor clean)
 @timed("Color Transfer")
 def apply_strong_grey_hair(image: Image.Image, hair_mask: np.ndarray, beard_mask: np.ndarray):
-    # (same as previous optimized version - no major change needed here)
     comb = np.maximum(hair_mask, beard_mask)
     if comb.sum() < 100:
         comb = cv2.GaussianBlur(comb, (5,5), 1.5)
@@ -237,7 +206,7 @@ def apply_strong_grey_hair(image: Image.Image, hair_mask: np.ndarray, beard_mask
     return result
-# ====================== MAIN ======================
 @timed("Total Processing")
 def process_face_whitening(input_image: Image.Image):
     orig = input_image.convert("RGB")

 logger.info(f"Using Device: {DEVICE}")
 logger.info(f"CUDA Available: {torch.cuda.is_available()}")
+# CPU Optimization (stable settings)
 if DEVICE.type == "cpu":
     torch.set_num_threads(4)
     torch.set_num_interop_threads(1)
     cv2.setNumThreads(4)
 else:
     torch.set_num_threads(1)
 face_processor = None
 face_parser = None
 beard_model = None
 # ====================== TIMED DECORATOR ======================
 def timed(name: str):
 # ====================== MODEL LOADING ======================
 def load_face_parser():
+    global face_processor, face_parser
     if face_parser is not None:
         return
     logger.info("Loading Segformer Face Parser...")
     face_parser = SegformerForSemanticSegmentation.from_pretrained("jonathandinu/face-parsing")
     face_parser.to(DEVICE)
     face_parser.eval()
     logger.info("✅ Face parser loaded")
 def load_beard_model():
 def get_hair_and_exclude_masks(pil_image: Image.Image):
     load_face_parser()
     orig_w, orig_h = pil_image.size
     img_small = pil_image.resize((128, 128), Image.BILINEAR)
     inputs = face_processor(images=img_small, return_tensors="pt").to(DEVICE)
     with torch.inference_mode():
+        out = face_parser(**inputs)
         logits = out.logits
         up = torch.nn.functional.interpolate(logits, size=(128, 128), mode="bilinear", align_corners=False)
         probs = torch.softmax(up, dim=1)[0]
+    # Optimized hair mask
+    hair = (probs[13].cpu().numpy() > 0.04).astype(np.float32)
+    hair = cv2.GaussianBlur(hair, (3, 3), 0.8)
+    # Face exclude
     parsing = up.argmax(dim=1).squeeze(0).cpu().numpy()
     face_cls = list(range(1,6)) + list(range(8,13)) + [17,18]
     face_m = np.isin(parsing, face_cls).astype(np.float32)
     h, w = face_m.shape
     forehead = np.zeros_like(face_m, dtype=np.float32)
+    forehead[:int(h * 0.32)] = 1.0
     face_m = face_m * (1 - forehead * 0.45)
     hair = hair * (1 - face_m)
     hair = cv2.resize(hair, (orig_w, orig_h), interpolation=cv2.INTER_LINEAR)
+    # Exclude mask (nose + lips)
+    exclude = np.zeros((128, 128), dtype=np.float32)
+    exclude = np.maximum(exclude, (probs[2] > 0.5).cpu().numpy().astype(np.float32))
+    exclude = np.maximum(exclude, (probs[11] > 0.5).cpu().numpy().astype(np.float32))
+    exclude = np.maximum(exclude, (probs[12] > 0.5).cpu().numpy().astype(np.float32))
     exclude = cv2.resize(exclude, (orig_w, orig_h), interpolation=cv2.INTER_NEAREST)
     exclude = cv2.dilate(exclude, kernel, iterations=1)
 def get_beard_mask_fast(pil_image: Image.Image, exclude_mask: np.ndarray):
     model = load_beard_model()
     orig_w, orig_h = pil_image.size
     img_small = pil_image.resize((128, 128), Image.BILINEAR)
     img_array = np.array(img_small)
     results = model.predict(
         img_array,
         device=DEVICE.type,
+        conf=0.30,
         iou=0.50,
         imgsz=128,
         half=(DEVICE.type == "cuda"),
         verbose=False,
+        max_det=8
     )
     mask = np.zeros((orig_h, orig_w), dtype=np.float32)
     return mask
 @timed("Color Transfer")
 def apply_strong_grey_hair(image: Image.Image, hair_mask: np.ndarray, beard_mask: np.ndarray):
     comb = np.maximum(hair_mask, beard_mask)
     if comb.sum() < 100:
         comb = cv2.GaussianBlur(comb, (5,5), 1.5)
     return result
+# ====================== MAIN PROCESSING ======================
 @timed("Total Processing")
 def process_face_whitening(input_image: Image.Image):
     orig = input_image.convert("RGB")