pictionary / app.py
git_Josh
add application file
6b99da0
from pathlib import Path
import torch
import gradio as gr
from torch import nn
import cv2
import numpy as np
BASE_DIR = Path(__file__).parent
LABELS = (BASE_DIR / "class_names.txt").read_text().splitlines()
model = nn.Sequential(
nn.Conv2d(1, 32, 3, padding="same"),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Conv2d(32, 64, 3, padding="same"),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Conv2d(64, 128, 3, padding="same"),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Flatten(),
nn.Linear(1152, 256),
nn.ReLU(),
nn.Linear(256, len(LABELS)),
)
state_dict = torch.load(BASE_DIR /"pytorch_model.bin", map_location="cpu")
model.load_state_dict(state_dict, strict=False)
model.eval()
def predict(im):
# 检查是否为空画布
if isinstance(im, dict):
# 空画布
if "layers" in im and (not im["layers"]):
return {"请先在画板上画图": 1.0}
# 优先用composite
if "composite" in im and im["composite"] is not None:
im = im["composite"]
# 否则用layers[0]
elif "layers" in im and isinstance(im["layers"], list) and len(im["layers"]) > 0:
im = im["layers"][0]
else:
return {"无法识别输入": 1.0}
# 转为numpy数组
im = np.array(im)
# 只取第一个通道(灰度)
if im.ndim == 3:
im = im[..., 0]
# resize 到模型训练时的尺寸(假设24x24)
im = cv2.resize(im, (24, 24), interpolation=cv2.INTER_AREA)
x = torch.tensor(im, dtype=torch.float32).unsqueeze(0).unsqueeze(0) / 255.0
with torch.no_grad():
out = model(x)
probabilities = torch.nn.functional.softmax(out[0], dim=0)
values, indices = torch.topk(probabilities, 5)
return {LABELS[i]: v.item() for i, v in zip(indices, values)}
interface = gr.Interface(
predict,
inputs="sketchpad",
outputs="label",
# theme="huggingface",
title="Sketch Recognition",
description="Who wants to play Pictionary? Draw a common object like a shovel or a laptop, and the algorithm will guess in real time!",
article="<p style='text-align: center'>Sketch Recognition | Demo Model</p>",
# live=True,
)
interface.launch(share=True)