DEVAN CHAUHAN commited on
Commit
80e1925
·
1 Parent(s): c60ae80

[add] image-op

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .venv
__pycache__/image_embedding.cpython-310.pyc ADDED
Binary file (426 Bytes). View file
 
app.py CHANGED
@@ -1,17 +1,66 @@
1
- from sentence_transformers import SentenceTransformer
2
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
- # Load once
5
  image_model = SentenceTransformer("clip-ViT-B-32")
6
- text_model = SentenceTransformer("clip-ViT-B-32-multilingual-v1")
7
 
8
  def get_image_embedding(image):
9
  emb = image_model.encode(image)
10
  return {"embedding": emb.tolist()}
11
 
12
- def get_text_embedding(text):
13
- emb = text_model.encode(text)
14
- return {"embedding": emb.tolist()}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  with gr.Blocks() as demo:
17
  with gr.Tab("Image Embedding"):
@@ -19,11 +68,25 @@ with gr.Blocks() as demo:
19
  img_output = gr.JSON()
20
  img_btn = gr.Button("Generate")
21
  img_btn.click(get_image_embedding, img_input, img_output)
 
 
 
 
 
 
 
22
 
23
- with gr.Tab("Text Embedding"):
24
- text_input = gr.Textbox()
25
- text_output = gr.JSON()
26
- text_btn = gr.Button("Generate")
27
- text_btn.click(get_text_embedding, text_input, text_output)
 
 
 
 
 
 
28
 
 
29
  demo.launch()
 
 
1
  import gradio as gr
2
+ print("Loading models...")
3
+ from retinaface import RetinaFace
4
+ print("retinaface loaded")
5
+ import cv2
6
+ print("opencv loaded")
7
+ import numpy as np
8
+ print("numpy loaded")
9
+ from PIL import Image
10
+ print("PIL loaded")
11
+ from rembg import remove
12
+ print("rembg loaded")
13
+ from sentence_transformers import SentenceTransformer
14
+ print("sentence_transformers loaded")
15
 
 
16
  image_model = SentenceTransformer("clip-ViT-B-32")
17
+ print("CLIP loaded")
18
 
19
  def get_image_embedding(image):
20
  emb = image_model.encode(image)
21
  return {"embedding": emb.tolist()}
22
 
23
+ def process_image(input_image):
24
+ # Convert PIL → OpenCV
25
+ img = cv2.cvtColor(np.array(input_image), cv2.COLOR_RGB2BGR)
26
+
27
+ # Detect faces
28
+ faces = RetinaFace.detect_faces(img)
29
+
30
+ if not faces:
31
+ return "No face detected", None
32
+
33
+ face = list(faces.values())[0]
34
+ x1, y1, x2, y2 = face["facial_area"]
35
+
36
+ h, w, _ = img.shape
37
+
38
+ # Expand bounding box (hair included)
39
+ top_expand = 0.5
40
+ side_expand = 0.3
41
+ bottom_expand = 0.2
42
+
43
+ box_width = x2 - x1
44
+ box_height = y2 - y1
45
+
46
+ x1_new = int(max(0, x1 - box_width * side_expand))
47
+ x2_new = int(min(w, x2 + box_width * side_expand))
48
+ y1_new = int(max(0, y1 - box_height * top_expand))
49
+ y2_new = int(min(h, y2 + box_height * bottom_expand))
50
+
51
+ cropped = img[y1_new:y2_new, x1_new:x2_new]
52
+
53
+ # Convert back to PIL
54
+ pil_image = Image.fromarray(cv2.cvtColor(cropped, cv2.COLOR_BGR2RGB))
55
+
56
+ # Background removal
57
+ output = remove(pil_image)
58
+
59
+ # Resize for CLIP
60
+ output = output.resize((224, 224))
61
+
62
+ return "Success ✅", output
63
+
64
 
65
  with gr.Blocks() as demo:
66
  with gr.Tab("Image Embedding"):
 
68
  img_output = gr.JSON()
69
  img_btn = gr.Button("Generate")
70
  img_btn.click(get_image_embedding, img_input, img_output)
71
+
72
+ with gr.Tab("Face Crop & Background Removal"):
73
+ face_input = gr.Image(type="pil")
74
+ face_output = gr.Image()
75
+ face_status = gr.Text()
76
+ face_btn = gr.Button("Process")
77
+ face_btn.click(process_image, face_input, [face_status, face_output])
78
 
79
+ with gr.Tab("Pipe"):
80
+ pipe_input = gr.Image(type="pil")
81
+ pipe_output = gr.JSON()
82
+ pipe_btn = gr.Button("Run Pipe")
83
+ def run_pipe(img):
84
+ status, processed_img = process_image(img)
85
+ if status != "Success ✅":
86
+ return {"status": status, "embedding": None}
87
+ return get_image_embedding(processed_img)
88
+
89
+ pipe_btn.click(run_pipe, pipe_input, pipe_output)
90
 
91
+ print("Launching demo...")
92
  demo.launch()
requirements.txt CHANGED
@@ -1,2 +1,8 @@
1
  sentence_transformers
2
- Pillow
 
 
 
 
 
 
 
1
  sentence_transformers
2
+ retina-face
3
+ opencv-python
4
+ pillow
5
+ rembg
6
+ numpy
7
+ tf-keras
8
+ onnxruntime