kayte0342 commited on
Commit
acef9fe
·
verified ·
1 Parent(s): 26f0d8b

Upload 2 files

Browse files
Files changed (2) hide show
  1. llm_captions.py +85 -0
  2. maskgen.py +159 -0
llm_captions.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from pathlib import Path
3
+ from PIL import Image
4
+ from io import BytesIO
5
+ import base64
6
+ from tqdm import tqdm
7
+ import torch
8
+
9
+ OLLAMA_URL = "http://127.0.0.1:11434/api/generate"
10
+ OLLAMA_MODEL = "gemma3"
11
+ device = "cuda" if torch.cuda.is_available() else "cpu"
12
+
13
+ def image_to_base64(image_path):
14
+ with Image.open(image_path).convert("RGB") as img:
15
+ buffered = BytesIO()
16
+ img.save(buffered, format="JPEG")
17
+ return base64.b64encode(buffered.getvalue()).decode("utf-8")
18
+
19
+ def generate_caption_vlm(image_path, base_tags=None):
20
+ prompt = (
21
+ "Write one concise and factual caption for this image for machine learning training; write only the caption without extra text. "
22
+ " Describe ONLY the image contents. Describe the characters, background, and color, style, coverage, and relevant characteristics of the clothes. "
23
+ " NO style words or lighting descriptions. NO mention of camera, lens, quality or mood. One simple descriptive paragraph."
24
+ "Avoid creative or poetic language. Avoid context setting language. Use 50 words or less. "
25
+ f"Include these tags at the end: {', '.join(base_tags) if base_tags else ''}."
26
+ )
27
+ image_base64 = image_to_base64(image_path)
28
+ payload = {
29
+ "model": OLLAMA_MODEL,
30
+ "prompt": prompt,
31
+ "images": [image_base64],
32
+ "options": {"temperature": 0.2},
33
+ "stream": False
34
+ }
35
+ try:
36
+ response = requests.post(OLLAMA_URL, json=payload)
37
+ response.raise_for_status()
38
+ result = response.json()
39
+ return result.get("response", "").strip()
40
+ except Exception as e:
41
+ print(f"⚠️ Ollama VLM request failed: {e}")
42
+ return "Auto-tagged"
43
+
44
+ def create_tags_for_images(image_dir, base_tags):
45
+ image_exts = ("*.jpg", "*.jpeg", "*.png", "*.webp")
46
+ image_files = []
47
+ for ext in image_exts:
48
+ image_files.extend(Path(image_dir).glob(ext))
49
+
50
+ for image_path in tqdm(image_files, desc=f"Tagging images in {image_dir}"):
51
+ caption = generate_caption_vlm(image_path, base_tags)
52
+ tag_file = image_path.with_suffix(".txt")
53
+ with open(tag_file, "w", encoding="utf-8") as f:
54
+ f.write(caption)
55
+ print(f"[✓] Tagged {image_path.name}")
56
+
57
+ if __name__ == "__main__":
58
+ # === CONFIGURATION OPTIONS ===
59
+ # Option 1: Single folder (set to None to skip)
60
+ #single_folder = None
61
+ #OR
62
+ single_folder = r"G:\My Drive\AI\training_data\kawaii_goth"
63
+
64
+ # Option 2: Process all subfolders in parent_dir except excluded ones
65
+ parent_dir = r"G:\My Drive\AI\images\tbd\N1na"
66
+ exclude_folders = {"video"}
67
+
68
+ if single_folder is not None:
69
+ folder_tag = Path(single_folder).name.replace("_", " ").lower()
70
+ base_tags = ["{kawaii goth}"]
71
+ print(f"\n🏷️ Processing single folder: {single_folder} | Base tags: {base_tags}")
72
+ create_tags_for_images(single_folder, base_tags)
73
+ else:
74
+ folders_to_process = [
75
+ str(folder)
76
+ for folder in Path(parent_dir).iterdir()
77
+ if folder.is_dir() and folder.name not in exclude_folders
78
+ ]
79
+ for folder in folders_to_process:
80
+ folder_tag = Path(folder).name.replace("_", " ").lower()
81
+ base_tags = [folder_tag, "Mature woman, realistic, detailed"]
82
+ print(f"\n🏷️ Processing folder: {folder} | Base tags: {base_tags}")
83
+ create_tags_for_images(folder, base_tags)
84
+
85
+ print("✅ All images tagged via VLM.")
maskgen.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ import os
4
+ import argparse
5
+ from pathlib import Path
6
+ import urllib.request
7
+
8
+ import cv2
9
+ import numpy as np
10
+ import os
11
+ import argparse
12
+ from pathlib import Path
13
+ import urllib.request
14
+ import mediapipe
15
+ import cv2
16
+ import mediapipe as mp
17
+
18
+ def download_face_model():
19
+ prototxt = "deploy.prototxt"
20
+ caffemodel = "res10_300x300_ssd_iter_140000_fp16.caffemodel"
21
+ prototxt_url = "https://huggingface.co/Durraiya/deploy.prototxt/resolve/main/deploy.prototxt"
22
+ caffemodel_url = "https://huggingface.co/Durraiya/res10_300x300_ssd_iter_140000_fp16.caffemodel/resolve/main/res10_300x300_ssd_iter_140000_fp16.caffemodel"
23
+ if not os.path.exists(prototxt):
24
+ print("Downloading prototxt...")
25
+ urllib.request.urlretrieve(prototxt_url, prototxt)
26
+ if not os.path.exists(caffemodel):
27
+ print("Downloading caffemodel...")
28
+ urllib.request.urlretrieve(caffemodel_url, caffemodel)
29
+ return prototxt, caffemodel
30
+
31
+
32
+ def download_body_model():
33
+ yolov8_model = "person_yolov8s-seg.pt"
34
+ yolov8_url = "https://huggingface.co/Bingsu/adetailer/resolve/main/person_yolov8s-seg.pt"
35
+ if not os.path.exists(yolov8_model):
36
+ print("Downloading YOLOv8 segmentation model...")
37
+ urllib.request.urlretrieve(yolov8_url, yolov8_model)
38
+ return yolov8_model
39
+
40
+
41
+
42
+
43
+
44
+ from ultralytics import YOLO
45
+
46
+ def segment_person_mask_yolov8(image, action="ignore"):
47
+ model = YOLO("person_yolov8s-seg.pt")
48
+ results = model(image)
49
+ mask = np.zeros(image.shape[:2], dtype=np.uint8)
50
+ for r in results:
51
+ for m in r.masks.data:
52
+ m = m.cpu().numpy().astype(np.uint8) * 255
53
+ m = cv2.resize(m, (image.shape[1], image.shape[0]), interpolation=cv2.INTER_NEAREST)
54
+ if action == "ignore":
55
+ mask = cv2.bitwise_or(mask, 255 - m)
56
+ else:
57
+ mask = cv2.bitwise_or(mask, m)
58
+ return mask
59
+
60
+
61
+ def detect_face_bbox(image, net):
62
+ if image is None:
63
+ print("Warning: Image is None in detect_face_bbox")
64
+ return []
65
+ h, w = image.shape[:2]
66
+ blob = cv2.dnn.blobFromImage(cv2.resize(image, (300, 300)), 1.0,
67
+ (300, 300), (104.0, 177.0, 123.0))
68
+ net.setInput(blob)
69
+ detections = net.forward()
70
+ bboxes = []
71
+ for i in range(detections.shape[2]):
72
+ confidence = detections[0, 0, i, 2]
73
+ if confidence > 0.6:
74
+ box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
75
+ (startX, startY, endX, endY) = box.astype("int")
76
+ bboxes.append((startX, startY, endX, endY))
77
+ return bboxes
78
+
79
+
80
+ def create_face_bbox_mask(image, bboxes, action="ignore"):
81
+ mask = np.ones(image.shape[:2], dtype=np.uint8) * 255 if action == "ignore" else np.zeros(image.shape[:2],
82
+ dtype=np.uint8)
83
+ for (x1, y1, x2, y2) in bboxes:
84
+ if action == "ignore":
85
+ mask[y1:y2, x1:x2] = 0
86
+ else: # "keep"
87
+ mask[y1:y2, x1:x2] = 255
88
+ return mask
89
+
90
+
91
+ # Python
92
+ if __name__ == "__main__":
93
+ parser = argparse.ArgumentParser(description="Generate face/body region masks using detectors/segmentation.")
94
+ parser.add_argument("input_dir", type=str, help="Path to the input image directory")
95
+ parser.add_argument("--face_action", choices=["ignore", "keep"], default="ignore",
96
+ help="Masking action for face: 'ignore' to mask face region, 'keep' to keep face region")
97
+ parser.add_argument("--body_action", choices=["ignore", "keep", "none"], default="none",
98
+ help="Masking action for body: 'ignore' to mask body region, 'keep' to keep body region, 'none' to skip body masking")
99
+ parser.add_argument("--include_face_in_body_keep", action="store_true",
100
+ help="If set and body_action=keep, include face region in the mask")
101
+ args = parser.parse_args()
102
+
103
+ INPUT_DIR = Path(args.input_dir)
104
+ MASK_DIR = INPUT_DIR.parent / f"mask_{INPUT_DIR.name}"
105
+ MASK_DIR.mkdir(parents=True, exist_ok=True)
106
+
107
+ # Face model
108
+ face_prototxt, face_caffemodel = download_face_model()
109
+ face_net = cv2.dnn.readNetFromCaffe(face_prototxt, face_caffemodel)
110
+
111
+ # Body segmentation model (YOLOv8)
112
+ if args.body_action != "none":
113
+ yolov8_model = download_body_model()
114
+ else:
115
+ yolov8_model = None
116
+
117
+ for image_file in INPUT_DIR.glob("*.*"):
118
+ img = cv2.imread(str(image_file), cv2.IMREAD_UNCHANGED)
119
+ if img is None:
120
+ print(f"Warning: Failed to read image {image_file}, skipping.")
121
+ continue
122
+ if len(img.shape) == 2:
123
+ img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
124
+ elif img.shape[-1] == 4:
125
+ img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR)
126
+
127
+ # Only body mask if face+body, no background
128
+ if yolov8_model is not None and args.body_action == "keep" and args.include_face_in_body_keep:
129
+ mask = segment_person_mask_yolov8(img, action="keep")
130
+ else:
131
+ # Face mask
132
+ face_bboxes = detect_face_bbox(img, face_net)
133
+ mask = create_face_bbox_mask(img, face_bboxes, action=args.face_action)
134
+ # Body mask (combine with face mask)
135
+ if yolov8_model is not None:
136
+ body_mask = segment_person_mask_yolov8(img, action=args.body_action)
137
+ if args.body_action == "keep" and args.include_face_in_body_keep:
138
+ mask = cv2.bitwise_or(mask, body_mask)
139
+ else:
140
+ mask = cv2.bitwise_and(mask, body_mask)
141
+
142
+ # Make mask same number of channels as input image
143
+ if img.ndim == 3:
144
+ if img.shape[2] == 4:
145
+ mask_out = cv2.merge([mask, mask, mask, mask])
146
+ elif img.shape[2] == 3:
147
+ mask_out = cv2.merge([mask, mask, mask])
148
+ else:
149
+ mask_out = mask
150
+ else:
151
+ mask_out = mask
152
+ if mask_out.shape[:2] != img.shape[:2]:
153
+ print(f"Error: Mask shape {mask_out.shape[:2]} does not match image shape {img.shape[:2]} for {image_file}")
154
+ continue
155
+ out_path = MASK_DIR / (image_file.stem + ".png")
156
+ cv2.imwrite(str(out_path), mask_out)
157
+
158
+ print(
159
+ f"Masks generated in {MASK_DIR.resolve()} using face/body detection with actions face='{args.face_action}', body='{args.body_action}'")