import gradio as gr import requests import base64 import numpy as np import cv2 # Hugging Face SAM API 配置 HF_API_URL = "https://api-inference.huggingface.co/models/facebook/sam-vit-base" HF_TOKEN = "YOUR_HUGGINGFACE_TOKEN" # 替换成你自己的 token headers = {"Authorization": f"Bearer {HF_TOKEN}"} def segment_and_crop(image): # 将输入图像转为 PNG bytes _, buffer = cv2.imencode(".png", cv2.cvtColor(image, cv2.COLOR_RGB2BGR)) img_bytes = buffer.tobytes() # 调用 Hugging Face SAM API response = requests.post(HF_API_URL, headers=headers, files={"image": img_bytes}) if response.status_code != 200: return None, f"API error: {response.status_code}\n{response.text}" result = response.json() # SAM 输出 mask(有的模型返回 'masks' base64) masks = result.get("masks", []) if not masks: return None, "No masks returned" cropped_images = [] for idx, mask_b64 in enumerate(masks): mask_bytes = base64.b64decode(mask_b64) mask_array = cv2.imdecode(np.frombuffer(mask_bytes, np.uint8), cv2.IMREAD_GRAYSCALE) ys, xs = np.where(mask_array > 127) if len(xs) == 0 or len(ys) == 0: continue # 取边界框并裁剪 x1, x2 = xs.min(), xs.max() y1, y2 = ys.min(), ys.max() cropped = image[y1:y2, x1:x2] cropped_images.append(cropped) if not cropped_images: return None, "No valid crops" # 输出多张裁剪图 return cropped_images, f"Detected {len(cropped_images)} object(s)." demo = gr.Interface( fn=segment_and_crop, inputs=gr.Image(type="numpy", label="Upload or Capture an Image"), outputs=[gr.Gallery(label="Cropped Objects"), gr.Textbox(label="Status")], title="Smart Object Cropper with SAM", description="Upload or capture an image. The app will use the Segment Anything Model (SAM) on Hugging Face to detect and crop main objects.", ) if __name__ == "__main__": demo.launch()