Spaces:

JosephCatrambone
/

tiny_doodle_embedding

Sleeping

App Files Files Community

JosephCatrambone commited on May 10, 2025

Commit

db8bba4

1 Parent(s): f29dbf2

First import.

Browse files

Files changed (2) hide show

app.py +91 -0
tiny_doodle_embedding.onnx +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import onnxruntime as ort
+import numpy
+import gradio as gr
+from PIL import Image
+ort_sess = ort.InferenceSession('tiny_doodle_embedding_v14.onnx')
+# force reload now!
+def get_bounds(img):
+    # Assumes a BLACK BACKGROUND!
+    # White letters on a black background!
+    left = img.shape[1]
+    right = 0
+    top = img.shape[0]
+    bottom = 0
+    min_color = numpy.min(img)
+    max_color = numpy.max(img)
+    mean_color = 0.5*(min_color+max_color)
+    # Do this the dumb way.
+    for y in range(0, img.shape[0]):
+        for x in range(0, img.shape[1]):
+            if img[y,x] > mean_color:
+                left = min(left, x)
+                right = max(right, x)
+                top = min(top, y)
+                bottom = max(bottom, y)
+    return (top, bottom, left, right)
+def resize_maxpool(img, out_width: int, out_height: int):
+    out = numpy.zeros((out_height, out_width), dtype=img.dtype)
+    scale_factor_y = img.shape[0] // out_height
+    scale_factor_x = img.shape[1] // out_width
+    for y in range(0, out.shape[0]):
+        for x in range(0, out.shape[1]):
+            out[y,x] = numpy.max(img[y*scale_factor_y:(y+1)*scale_factor_y, x*scale_factor_x:(x+1)*scale_factor_x])
+    return out
+def process_input(input_msg):
+    img = input_msg["composite"]
+    # Image is inverted.  255 is white, 0 is what's drawn.
+    img_mean = 0.5 * (numpy.max(img) + numpy.min(img))
+    img = 1.0 * (img < img_mean)  # Invert the image and convert to a float.
+    crop_area = get_bounds(img)
+    img = img[crop_area[0]:crop_area[1], crop_area[2]:crop_area[3]]
+    img = resize_maxpool(img, 32, 32)
+    #img_a = numpy.resize(img_a, (32, 32))
+    img = numpy.expand_dims(img, axis=0)  # Unsqueeze
+    return img
+def compare(input_img_a, input_img_b):
+    text_out = ""
+    img_a = process_input(input_img_a)
+    img_b = process_input(input_img_b)
+    # We could vcat these and run them in parallel.
+    a_embedding = ort_sess.run(None, {'input': img_a.astype(numpy.float32)})[0]
+    b_embedding = ort_sess.run(None, {'input': img_b.astype(numpy.float32)})[0]
+    a_mag = 1.0#+numpy.dot(a_embedding, a_embedding.T)
+    b_mag = 1.0#+numpy.dot(b_embedding, b_embedding.T)
+    a_embedding /= a_mag
+    b_embedding /= b_mag
+    text_out += f"img_a_embedding: {a_embedding}\n"
+    text_out += f"img_b_embedding: {b_embedding}\n"
+    sim = numpy.dot(a_embedding , b_embedding.T)
+    print(sim)
+    print(text_out)
+    return Image.fromarray(numpy.clip((numpy.hstack([img_a[0], img_b[0]]) * 254), 0, 255).astype(numpy.uint8)), sim[0][0], text_out
+    #return sim[0][0], text_out
+demo = gr.Interface(
+    fn=compare,
+    inputs=[
+        gr.Sketchpad(image_mode='L', type='numpy'),
+        gr.Sketchpad(image_mode='L', type='numpy'),
+        #gr.ImageEditor(
+        #    width=320, height=320,
+        #    canvas_size=(320, 320),
+        #    sources = ["upload", "clipboard"], # Webcam
+        #    layers=False,
+        #    image_mode='L', type='numpy',
+        #),
+    ],
+    outputs=["image", "number", "text"],
+)
+demo.launch(share=True)

tiny_doodle_embedding.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:398b48fcb1ced8b55d3e99300094f5e8c4974d87a3052d61f92111b44bbd2b5f
+size 42218748