Spaces:

scymz2
/

MNISTFormer

Sleeping

mochuan zhan commited on Nov 11, 2024

Commit

aa2c6eb

1 Parent(s): dfefec8

fix again again

Files changed (1) hide show

app.py CHANGED Viewed

@@ -80,32 +80,30 @@ transform = transforms.Compose([
 # 定义预测函数
 def classify_image(image):
-    # 将 NumPy 数组转换为 PIL 图像
-    image = Image.fromarray(image).convert("L")
     # 反转颜色
     image = ImageOps.invert(image)
-    # 调整图像大小到模型需要的输入尺寸
     image = image.resize((224, 224))
-    # 图像预处理（根据您的模型需要进行调整）
     img = transform(image).unsqueeze(0)  # 添加批次维度
     # 模型预测
     with torch.no_grad():
         outputs = model(img)
-        # 如果模型输出未经过 softmax，可以添加
         probabilities = F.softmax(outputs, dim=1)
     # 获取预测结果
     _, predicted = torch.max(outputs, 1)
-    # 如果需要返回概率
-    # return {str(predicted.item()): probabilities[0][predicted].item()}
-    # 只返回预测的类别
-    return str(predicted.item())
 # # 创建Gradio界面
 # iface = gr.Interface(
@@ -118,11 +116,17 @@ def classify_image(image):
 iface = gr.Interface(
     fn=classify_image,
-    inputs=gr.Sketchpad(crop_size=(256,256), type='numpy', image_mode='L', brush=gr.Brush()),
     outputs=gr.Label(num_top_classes=1),
     title="MNIST Digit Classification with ViT",
-    description="使用鼠标手绘一个数字，模型将预测其所属的类别。"
 )
 iface.launch()

 # 定义预测函数
 def classify_image(image):
+    # image 已经是一个 PIL 图像
+    # 将图像转换为灰度模式
+    image = image.convert("L")
     # 反转颜色
     image = ImageOps.invert(image)
+    # 调整图像大小
     image = image.resize((224, 224))
+    # 图像预处理
     img = transform(image).unsqueeze(0)  # 添加批次维度
     # 模型预测
     with torch.no_grad():
         outputs = model(img)
         probabilities = F.softmax(outputs, dim=1)
     # 获取预测结果
     _, predicted = torch.max(outputs, 1)
+    confidence = probabilities[0][predicted].item()
+    # 返回结果字典，包含预测类别和置信度
+    return {str(predicted.item()): confidence}
 # # 创建Gradio界面
 # iface = gr.Interface(
 iface = gr.Interface(
     fn=classify_image,
+    inputs=gr.Sketchpad(
+        shape=(224, 224),
+        invert_colors=False,
+        label="Draw a digit",
+        type='pil'  # 设置为返回 PIL 图像
+    ),
     outputs=gr.Label(num_top_classes=1),
     title="MNIST Digit Classification with ViT",
+    description="Use the mouse to hand draw a number and the model will predict the category it belongs to."
 )
 iface.launch()