hbatali2020 commited on
Commit
cb453e9
ยท
verified ยท
1 Parent(s): b9ca46d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -61
app.py CHANGED
@@ -1,40 +1,77 @@
1
  import io
2
  import time
 
3
  import torch
 
4
  from PIL import Image
5
- from ultralytics import YOLO
 
6
  from fastapi import FastAPI, HTTPException, UploadFile, File
7
  from fastapi.middleware.cors import CORSMiddleware
8
  from contextlib import asynccontextmanager
9
 
10
- # โ”€โ”€โ”€ ุฃุณู…ุงุก ุงู„ู€ 17 Keypoints โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
11
- KEYPOINT_NAMES = [
12
- "nose", "left_eye", "right_eye", "left_ear", "right_ear",
13
- "left_shoulder", "right_shoulder", "left_elbow", "right_elbow",
14
- "left_wrist", "right_wrist", "left_hip", "right_hip",
15
- "left_knee", "right_knee", "left_ankle", "right_ankle"
 
 
 
16
  ]
17
 
 
 
 
18
  MODEL_DATA = {}
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  @asynccontextmanager
21
  async def lifespan(app: FastAPI):
22
- print("๐Ÿ“ฅ Loading YOLO11x-pose...")
23
  start = time.time()
24
- # ูŠุชุญู…ู„ ุชู„ู‚ุงุฆูŠุงู‹ ู…ู† ultralytics
25
- MODEL_DATA["model"] = YOLO("yolo11x-pose.pt")
26
- print(f"โœ… YOLO11x-pose ready in {time.time()-start:.1f}s")
 
 
 
 
 
 
 
 
 
 
27
  yield
28
  MODEL_DATA.clear()
29
 
30
  app = FastAPI(
31
- title="Human Body Detection - YOLO11x-pose",
32
- description="Detects human body and keypoints using YOLO11x-pose",
33
  version="1.0.0",
34
  lifespan=lifespan
35
  )
36
 
37
- # โ”€โ”€โ”€ CORS โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
38
  app.add_middleware(
39
  CORSMiddleware,
40
  allow_origins=["*"],
@@ -60,58 +97,48 @@ async def analyze_image(file: UploadFile = File(...)):
60
 
61
  try:
62
  model = MODEL_DATA["model"]
63
- start_time = time.time()
64
-
65
- # โ”€โ”€โ”€ ุชุดุบูŠู„ YOLO โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
66
- results = model(image, verbose=False)
67
- elapsed = round(time.time() - start_time, 2)
68
-
69
- persons = []
70
 
71
- for r in results:
72
- num_persons = len(r.boxes) if r.boxes is not None else 0
73
 
74
- for i in range(num_persons):
75
- person = {"id": i + 1}
76
-
77
- # โ”€โ”€โ”€ Bounding Box โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
78
- if r.boxes is not None and i < len(r.boxes):
79
- box = r.boxes.xyxy[i].tolist()
80
- conf = float(r.boxes.conf[i])
81
- person["bbox"] = [round(x, 1) for x in box]
82
- person["confidence"] = round(conf, 3)
83
-
84
- # โ”€โ”€โ”€ Keypoints โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
85
- if r.keypoints is not None and i < len(r.keypoints.xy):
86
- kp_xy = r.keypoints.xy[i].tolist()
87
- kp_conf = r.keypoints.conf[i].tolist() if r.keypoints.conf is not None else [1.0] * 17
88
-
89
- visible_keypoints = {}
90
- for name, (x, y), c in zip(KEYPOINT_NAMES, kp_xy, kp_conf):
91
- if x > 0 and y > 0 and c > 0.3:
92
- visible_keypoints[name] = {
93
- "x": round(x, 1),
94
- "y": round(y, 1),
95
- "confidence": round(c, 3)
96
- }
97
-
98
- person["visible_keypoints"] = visible_keypoints
99
- person["visible_keypoints_count"] = len(visible_keypoints)
100
- person["visible_keypoints_names"] = list(visible_keypoints.keys())
101
-
102
- persons.append(person)
103
 
104
- # โ”€โ”€โ”€ ุงู„ู‚ุฑุงุฑ ุงู„ู†ู‡ุงุฆูŠ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
105
- human_detected = len(persons) > 0
106
- summary = f"yes detected human: {len(persons)} person(s)" if human_detected else "no detected human body"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
  return {
109
- "summary": summary,
110
- "detected": human_detected,
111
- "persons_count": len(persons),
112
- "persons": persons,
113
- "execution_time": elapsed,
114
- "status": "success"
115
  }
116
 
117
  except Exception as e:
 
1
  import io
2
  import time
3
+ import numpy as np
4
  import torch
5
+ import torch.nn.functional as F
6
  from PIL import Image
7
+ from huggingface_hub import hf_hub_download
8
+ from torchvision import transforms
9
  from fastapi import FastAPI, HTTPException, UploadFile, File
10
  from fastapi.middleware.cors import CORSMiddleware
11
  from contextlib import asynccontextmanager
12
 
13
+ # โ”€โ”€โ”€ 28 Class Labels ู„ู€ Sapiens โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
14
+ SEG_CLASSES = [
15
+ "Background", "Apparel", "Face Neck", "Hair", "Left Foot",
16
+ "Left Hand", "Left Lower Arm", "Left Lower Leg", "Left Shoe",
17
+ "Left Sock", "Left Upper Arm", "Left Upper Leg", "Lower Clothing",
18
+ "Right Foot", "Right Hand", "Right Lower Arm", "Right Lower Leg",
19
+ "Right Shoe", "Right Sock", "Right Upper Arm", "Right Upper Leg",
20
+ "Torso", "Upper Clothing", "Lower Lip", "Upper Lip", "Lower Teeth",
21
+ "Upper Teeth", "Tongue"
22
  ]
23
 
24
+ # โ”€โ”€โ”€ Classes ุงู„ุชูŠ ุชุฏู„ ุนู„ู‰ ูˆุฌูˆุฏ ุฅู†ุณุงู† โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
25
+ HUMAN_CLASSES = set(range(1, 28)) # ูƒู„ ุดูŠุก ู…ุง ุนุฏุง Background (0)
26
+
27
  MODEL_DATA = {}
28
 
29
+ def preprocess(image: Image.Image) -> torch.Tensor:
30
+ transform = transforms.Compose([
31
+ transforms.Resize((1024, 768)),
32
+ transforms.ToTensor(),
33
+ transforms.Normalize(
34
+ mean=[0.485, 0.456, 0.406],
35
+ std=[0.229, 0.224, 0.225]
36
+ )
37
+ ])
38
+ return transform(image).unsqueeze(0)
39
+
40
+ def postprocess(output: torch.Tensor, original_size: tuple) -> np.ndarray:
41
+ # output shape: [1, 28, H, W]
42
+ seg_map = F.interpolate(
43
+ output, size=original_size, mode="bilinear", align_corners=False
44
+ )
45
+ seg_map = torch.argmax(seg_map, dim=1).squeeze(0).cpu().numpy()
46
+ return seg_map.astype(np.uint8)
47
+
48
  @asynccontextmanager
49
  async def lifespan(app: FastAPI):
50
+ print("๐Ÿ“ฅ Downloading sapiens-seg-1b TorchScript model (~4.72 GB)...")
51
  start = time.time()
52
+
53
+ model_path = hf_hub_download(
54
+ repo_id="facebook/sapiens-seg-1b-torchscript",
55
+ filename="sapiens_1b_goliath_best_goliath_mIoU_7994_epoch_151_torchscript.pt2"
56
+ )
57
+ print(f"โœ… Downloaded in {time.time()-start:.1f}s")
58
+
59
+ print("๐Ÿ“ฆ Loading model into memory...")
60
+ load_start = time.time()
61
+ model = torch.jit.load(model_path, map_location="cpu")
62
+ model.eval()
63
+ MODEL_DATA["model"] = model
64
+ print(f"โœ… Model ready in {time.time()-load_start:.1f}s")
65
  yield
66
  MODEL_DATA.clear()
67
 
68
  app = FastAPI(
69
+ title="Human Body Segmentation - Sapiens-1B",
70
+ description="Meta Sapiens-seg-1b | 28 class body part segmentation",
71
  version="1.0.0",
72
  lifespan=lifespan
73
  )
74
 
 
75
  app.add_middleware(
76
  CORSMiddleware,
77
  allow_origins=["*"],
 
97
 
98
  try:
99
  model = MODEL_DATA["model"]
100
+ original_size = (image.height, image.width)
 
 
 
 
 
 
101
 
102
+ # โ”€โ”€โ”€ Preprocessing โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
103
+ tensor = preprocess(image)
104
 
105
+ # โ”€โ”€โ”€ Inference โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
106
+ start_time = time.time()
107
+ with torch.inference_mode():
108
+ output = model(tensor)
109
+ elapsed = round(time.time() - start_time, 2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
111
+ # โ”€โ”€โ”€ Postprocessing โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
112
+ seg_map = postprocess(output, original_size)
113
+
114
+ # โ”€โ”€โ”€ ุงุณุชุฎุฑุงุฌ ุงู„ู€ classes ุงู„ู…ูˆุฌูˆุฏุฉ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
115
+ unique_classes = set(np.unique(seg_map).tolist())
116
+ detected_parts = [
117
+ SEG_CLASSES[c] for c in sorted(unique_classes)
118
+ if c in HUMAN_CLASSES
119
+ ]
120
+
121
+ # โ”€โ”€โ”€ ุญุณุงุจ ู†ุณุจุฉ ูƒู„ ุฌุฒุก ู…ู† ุงู„ุตูˆุฑุฉ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
122
+ total_pixels = seg_map.size
123
+ parts_coverage = {}
124
+ for c in sorted(unique_classes):
125
+ if c in HUMAN_CLASSES:
126
+ count = int(np.sum(seg_map == c))
127
+ percentage = round((count / total_pixels) * 100, 2)
128
+ if percentage > 0.1: # ุชุฌุงู‡ู„ ุฃู‚ู„ ู…ู† 0.1%
129
+ parts_coverage[SEG_CLASSES[c]] = f"{percentage}%"
130
+
131
+ human_detected = len(detected_parts) > 0
132
+ summary = f"yes detected human body parts: {', '.join(detected_parts)}" \
133
+ if human_detected else "no detected human body"
134
 
135
  return {
136
+ "summary": summary,
137
+ "detected": human_detected,
138
+ "parts_found": detected_parts,
139
+ "parts_coverage": parts_coverage,
140
+ "execution_time": elapsed,
141
+ "status": "success"
142
  }
143
 
144
  except Exception as e: