hbatali2020 commited on
Commit
5262916
ยท
verified ยท
1 Parent(s): 3d2e5b6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -88
app.py CHANGED
@@ -1,73 +1,46 @@
1
  import io
2
  import time
3
- import numpy as np
4
  import torch
5
- import torch.nn.functional as F
6
  from PIL import Image
7
- from huggingface_hub import hf_hub_download
8
- from torchvision import transforms
9
  from fastapi import FastAPI, HTTPException, UploadFile, File
10
  from fastapi.middleware.cors import CORSMiddleware
11
  from contextlib import asynccontextmanager
12
 
13
- # โ”€โ”€โ”€ 28 Class Labels ู„ู€ Sapiens โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
14
- SEG_CLASSES = [
15
- "Background", "Apparel", "Face Neck", "Hair", "Left Foot",
16
- "Left Hand", "Left Lower Arm", "Left Lower Leg", "Left Shoe",
17
- "Left Sock", "Left Upper Arm", "Left Upper Leg", "Lower Clothing",
18
- "Right Foot", "Right Hand", "Right Lower Arm", "Right Lower Leg",
19
- "Right Shoe", "Right Sock", "Right Upper Arm", "Right Upper Leg",
20
- "Torso", "Upper Clothing", "Lower Lip", "Upper Lip", "Lower Teeth",
21
- "Upper Teeth", "Tongue"
22
- ]
23
 
24
- # โ”€โ”€โ”€ Classes ุงู„ุชูŠ ุชุฏู„ ุนู„ู‰ ูˆุฌูˆุฏ ุฅู†ุณุงู† โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
25
- HUMAN_CLASSES = set(range(1, 28)) # ูƒู„ ุดูŠุก ู…ุง ุนุฏุง Background (0)
 
 
26
 
27
  MODEL_DATA = {}
28
 
29
- def preprocess(image: Image.Image) -> torch.Tensor:
30
- transform = transforms.Compose([
31
- transforms.Resize((1024, 768)),
32
- transforms.ToTensor(),
33
- transforms.Normalize(
34
- mean=[0.485, 0.456, 0.406],
35
- std=[0.229, 0.224, 0.225]
36
- )
37
- ])
38
- return transform(image).unsqueeze(0)
39
-
40
- def postprocess(output: torch.Tensor, original_size: tuple) -> np.ndarray:
41
- # output shape: [1, 28, H, W]
42
- seg_map = F.interpolate(
43
- output, size=original_size, mode="bilinear", align_corners=False
44
- )
45
- seg_map = torch.argmax(seg_map, dim=1).squeeze(0).cpu().numpy()
46
- return seg_map.astype(np.uint8)
47
-
48
  @asynccontextmanager
49
  async def lifespan(app: FastAPI):
50
- print("๐Ÿ“ฅ Downloading sapiens-seg-1b TorchScript model (~4.72 GB)...")
51
  start = time.time()
52
 
53
- model_path = hf_hub_download(
54
- repo_id="facebook/sapiens-seg-1b-torchscript",
55
- filename="sapiens_1b_goliath_best_goliath_mIoU_7994_epoch_151_torchscript.pt2"
56
  )
57
- print(f"โœ… Downloaded in {time.time()-start:.1f}s")
58
-
59
- print("๐Ÿ“ฆ Loading model into memory...")
60
- load_start = time.time()
61
- model = torch.jit.load(model_path, map_location="cpu")
62
- model.eval()
63
- MODEL_DATA["model"] = model
64
- print(f"โœ… Model ready in {time.time()-load_start:.1f}s")
 
65
  yield
66
  MODEL_DATA.clear()
67
 
68
  app = FastAPI(
69
- title="Human Body Segmentation - Sapiens-1B",
70
- description="Meta Sapiens-seg-1b | 28 class body part segmentation",
71
  version="1.0.0",
72
  lifespan=lifespan
73
  )
@@ -84,6 +57,15 @@ app.add_middleware(
84
  def health():
85
  return {"status": "ok", "model_loaded": "model" in MODEL_DATA}
86
 
 
 
 
 
 
 
 
 
 
87
  @app.post("/analyze")
88
  async def analyze_image(file: UploadFile = File(...)):
89
 
@@ -91,54 +73,67 @@ async def analyze_image(file: UploadFile = File(...)):
91
  raise HTTPException(status_code=400, detail="ุงู„ู…ู„ู ู„ูŠุณ ุตูˆุฑุฉ")
92
 
93
  try:
94
- image = Image.open(io.BytesIO(await file.read())).convert("RGB")
 
95
  except Exception as e:
96
  raise HTTPException(status_code=400, detail=f"ุฎุทุฃ ููŠ ู‚ุฑุงุกุฉ ุงู„ุตูˆุฑุฉ: {str(e)}")
97
 
98
  try:
99
- model = MODEL_DATA["model"]
100
- original_size = (image.height, image.width)
101
-
102
- # โ”€โ”€โ”€ Preprocessing โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
103
- tensor = preprocess(image)
104
-
105
- # โ”€โ”€โ”€ Inference โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
106
- start_time = time.time()
107
- with torch.inference_mode():
108
- output = model(tensor)
109
- elapsed = round(time.time() - start_time, 2)
110
-
111
- # โ”€โ”€โ”€ Postprocessing โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
112
- seg_map = postprocess(output, original_size)
113
-
114
- # โ”€โ”€โ”€ ุงุณุชุฎุฑุงุฌ ุงู„ู€ classes ุงู„ู…ูˆุฌูˆุฏุฉ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
115
- unique_classes = set(np.unique(seg_map).tolist())
116
- detected_parts = [
117
- SEG_CLASSES[c] for c in sorted(unique_classes)
118
- if c in HUMAN_CLASSES
119
  ]
120
 
121
- # โ”€โ”€โ”€ ุญุณุงุจ ู†ุณุจุฉ ูƒู„ ุฌุฒุก ู…ู† ุงู„ุตูˆุฑุฉ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
122
- total_pixels = seg_map.size
123
- parts_coverage = {}
124
- for c in sorted(unique_classes):
125
- if c in HUMAN_CLASSES:
126
- count = int(np.sum(seg_map == c))
127
- percentage = round((count / total_pixels) * 100, 2)
128
- if percentage > 0.1: # ุชุฌุงู‡ู„ ุฃู‚ู„ ู…ู† 0.1%
129
- parts_coverage[SEG_CLASSES[c]] = f"{percentage}%"
130
 
131
- human_detected = len(detected_parts) > 0
132
- summary = f"yes detected human body parts: {', '.join(detected_parts)}" \
133
- if human_detected else "no detected human body"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
 
135
  return {
136
- "summary": summary,
137
- "detected": human_detected,
138
- "parts_found": detected_parts,
139
- "parts_coverage": parts_coverage,
140
- "execution_time": elapsed,
141
- "status": "success"
142
  }
143
 
144
  except Exception as e:
 
1
  import io
2
  import time
3
+ import base64
4
  import torch
 
5
  from PIL import Image
6
+ from transformers import AutoProcessor, AutoModelForCausalLM
 
7
  from fastapi import FastAPI, HTTPException, UploadFile, File
8
  from fastapi.middleware.cors import CORSMiddleware
9
  from contextlib import asynccontextmanager
10
 
11
+ MODEL_ID = "Qwen/Qwen3.5-0.8B"
 
 
 
 
 
 
 
 
 
12
 
13
+ VQA_QUESTION = (
14
+ "Is there a woman or any part of a woman's body in this image? "
15
+ "Answer yes or no only. /no_think"
16
+ )
17
 
18
  MODEL_DATA = {}
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  @asynccontextmanager
21
  async def lifespan(app: FastAPI):
22
+ print(f"๐Ÿ“ฅ Loading {MODEL_ID}...")
23
  start = time.time()
24
 
25
+ MODEL_DATA["processor"] = AutoProcessor.from_pretrained(
26
+ MODEL_ID,
27
+ trust_remote_code=True
28
  )
29
+ MODEL_DATA["model"] = AutoModelForCausalLM.from_pretrained(
30
+ MODEL_ID,
31
+ torch_dtype=torch.float32,
32
+ trust_remote_code=True,
33
+ attn_implementation="eager",
34
+ device_map="cpu"
35
+ ).eval()
36
+
37
+ print(f"โœ… Model ready in {time.time()-start:.1f}s")
38
  yield
39
  MODEL_DATA.clear()
40
 
41
  app = FastAPI(
42
+ title="Female Detection API - Qwen3.5-0.8B",
43
+ description="Qwen3.5-0.8B VLM | VQA",
44
  version="1.0.0",
45
  lifespan=lifespan
46
  )
 
57
  def health():
58
  return {"status": "ok", "model_loaded": "model" in MODEL_DATA}
59
 
60
+ def decide(answer: str) -> tuple[str, str]:
61
+ a = answer.strip().lower()
62
+ if a == "no" or a.startswith("no"):
63
+ return "allow", "model_answered_no"
64
+ elif "yes" in a:
65
+ return "block", "model_answered_yes"
66
+ else:
67
+ return "block", "unexpected_answer_blocked_for_safety"
68
+
69
  @app.post("/analyze")
70
  async def analyze_image(file: UploadFile = File(...)):
71
 
 
73
  raise HTTPException(status_code=400, detail="ุงู„ู…ู„ู ู„ูŠุณ ุตูˆุฑุฉ")
74
 
75
  try:
76
+ image_bytes = await file.read()
77
+ image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
78
  except Exception as e:
79
  raise HTTPException(status_code=400, detail=f"ุฎุทุฃ ููŠ ู‚ุฑุงุกุฉ ุงู„ุตูˆุฑุฉ: {str(e)}")
80
 
81
  try:
82
+ processor = MODEL_DATA["processor"]
83
+ model = MODEL_DATA["model"]
84
+
85
+ # โ”€โ”€โ”€ Qwen3.5 ูŠุณุชุฎุฏู… ู†ูุณ ุทุฑูŠู‚ุฉ Qwen3-VL โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
86
+ # ุงู„ุตูˆุฑุฉ ุชุฑุณู„ ูƒู€ type: "image" ู…ุจุงุดุฑุฉ ููŠ content
87
+ messages = [
88
+ {
89
+ "role": "user",
90
+ "content": [
91
+ {
92
+ "type": "image",
93
+ "image": image # PIL Image ู…ุจุงุดุฑุฉ
94
+ },
95
+ {
96
+ "type": "text",
97
+ "text": VQA_QUESTION
98
+ }
99
+ ]
100
+ }
 
101
  ]
102
 
103
+ # โ”€โ”€โ”€ apply_chat_template โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
104
+ inputs = processor.apply_chat_template(
105
+ messages,
106
+ tokenize=True,
107
+ add_generation_prompt=True,
108
+ return_dict=True,
109
+ return_tensors="pt"
110
+ )
 
111
 
112
+ start_time = time.time()
113
+ with torch.no_grad():
114
+ generated_ids = model.generate(
115
+ **inputs,
116
+ max_new_tokens=20,
117
+ do_sample=False,
118
+ temperature=None,
119
+ top_p=None,
120
+ )
121
+
122
+ # โ”€โ”€โ”€ ุงุณุชุฎุฑุงุฌ ุงู„ุฅุฌุงุจุฉ ุงู„ุฌุฏูŠุฏุฉ ูู‚ุท โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
123
+ input_len = inputs["input_ids"].shape[1]
124
+ new_tokens = generated_ids[:, input_len:]
125
+ answer = processor.decode(new_tokens[0], skip_special_tokens=True).strip()
126
+
127
+ elapsed = round(time.time() - start_time, 2)
128
+ decision, reason = decide(answer)
129
 
130
  return {
131
+ "decision": decision,
132
+ "reason": reason,
133
+ "vqa_answer": answer,
134
+ "question": VQA_QUESTION,
135
+ "execution_time": elapsed,
136
+ "status": "success"
137
  }
138
 
139
  except Exception as e: