from pathlib import Path import torch import gradio as gr from torch import nn import cv2 import numpy as np BASE_DIR = Path(__file__).parent LABELS = (BASE_DIR / "class_names.txt").read_text().splitlines() model = nn.Sequential( nn.Conv2d(1, 32, 3, padding="same"), nn.ReLU(), nn.MaxPool2d(2), nn.Conv2d(32, 64, 3, padding="same"), nn.ReLU(), nn.MaxPool2d(2), nn.Conv2d(64, 128, 3, padding="same"), nn.ReLU(), nn.MaxPool2d(2), nn.Flatten(), nn.Linear(1152, 256), nn.ReLU(), nn.Linear(256, len(LABELS)), ) state_dict = torch.load(BASE_DIR /"pytorch_model.bin", map_location="cpu") model.load_state_dict(state_dict, strict=False) model.eval() def predict(im): # 检查是否为空画布 if isinstance(im, dict): # 空画布 if "layers" in im and (not im["layers"]): return {"请先在画板上画图": 1.0} # 优先用composite if "composite" in im and im["composite"] is not None: im = im["composite"] # 否则用layers[0] elif "layers" in im and isinstance(im["layers"], list) and len(im["layers"]) > 0: im = im["layers"][0] else: return {"无法识别输入": 1.0} # 转为numpy数组 im = np.array(im) # 只取第一个通道(灰度) if im.ndim == 3: im = im[..., 0] # resize 到模型训练时的尺寸(假设24x24) im = cv2.resize(im, (24, 24), interpolation=cv2.INTER_AREA) x = torch.tensor(im, dtype=torch.float32).unsqueeze(0).unsqueeze(0) / 255.0 with torch.no_grad(): out = model(x) probabilities = torch.nn.functional.softmax(out[0], dim=0) values, indices = torch.topk(probabilities, 5) return {LABELS[i]: v.item() for i, v in zip(indices, values)} interface = gr.Interface( predict, inputs="sketchpad", outputs="label", # theme="huggingface", title="Sketch Recognition", description="Who wants to play Pictionary? Draw a common object like a shovel or a laptop, and the algorithm will guess in real time!", article="

Sketch Recognition | Demo Model

", # live=True, ) interface.launch(share=True)