Spaces:

oxkitsune
/

rerun-example-zero-gpu

Sleeping

App Files Files Community

oxkitsune commited on Apr 14, 2025

Commit

79c4fdc

1 Parent(s): 453d9af

Actually run inference on the image

Browse files

Files changed (1) hide show

app.py +26 -101

app.py CHANGED Viewed

@@ -12,6 +12,7 @@ import time
 import uuid
 import subprocess
 subprocess.run(
     "pip install gradio_rerun-0.23.0a2.tar.gz",
     shell=True,
@@ -40,11 +41,14 @@ image = Image.open(requests.get(url, stream=True).raw)
 processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
 model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
 # Whenever we need a recording, we construct a new recording stream.
 # As long as the app and recording IDs remain the same, the data
 # will be merged by the Viewer.
 def get_recording(recording_id: str) -> rr.RecordingStream:
-    return rr.RecordingStream(application_id="rerun_example_gradio", recording_id=recording_id)
 # A task can directly log to a binary stream, which is routed to the embedded viewer.
@@ -53,7 +57,7 @@ def get_recording(recording_id: str) -> rr.RecordingStream:
 # This is the preferred way to work with Rerun in Gradio since your data can be immediately and
 # incrementally seen by the viewer. Also, there are no ephemeral RRDs to cleanup or manage.
 @spaces.GPU
-def streaming_repeated_blur(recording_id: str, img):
     # Here we get a recording using the provided recording id.
     rec = get_recording(recording_id)
     stream = rec.binary_stream()
@@ -73,108 +77,38 @@ def streaming_repeated_blur(recording_id: str, img):
     rec.log("image", rr.Image(img))
     yield stream.read()
-    inputs = processor(images=image, return_tensors="pt")
-    outputs = model(**inputs)
     # convert outputs (bounding boxes and class logits) to COCO API
     # let's only keep detections with score > 0.9
-    target_sizes = torch.tensor([image.size[::-1]])
-    results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]
-    print(results)
-    rec.log("image/objects", rr.Boxes2D(sizes=results["boxes"], labels=[model.config.id2label[label.item()] for label in results["labels"]]))
-    # Ensure we consume everything from the recording.
-    stream.flush()
-    yield stream.read()
-# In this example the user is able to add keypoints to an image visualized in Rerun.
-# These keypoints are stored in the global state, we use the session id to keep track of which keypoints belong
-# to a specific session (https://www.gradio.app/guides/state-in-blocks).
-#
-# The current session can be obtained by adding a parameter of type `gradio.Request` to your event listener functions.
-Keypoint = tuple[float, float]
-keypoints_per_session_per_sequence_index: dict[str, dict[int, list[Keypoint]]] = {}
-def get_keypoints_for_user_at_sequence_index(request: gr.Request, sequence: int) -> list[Keypoint]:
-    per_sequence = keypoints_per_session_per_sequence_index[request.session_hash]
-    if sequence not in per_sequence:
-        per_sequence[sequence] = []
-    return per_sequence[sequence]
-def initialize_instance(request: gr.Request) -> None:
-    keypoints_per_session_per_sequence_index[request.session_hash] = {}
-def cleanup_instance(request: gr.Request) -> None:
-    if request.session_hash in keypoints_per_session_per_sequence_index:
-        del keypoints_per_session_per_sequence_index[request.session_hash]
-# In this function, the `request` and `evt` parameters will be automatically injected by Gradio when this
-# event listener is fired.
-#
-# `SelectionChange` is a subclass of `EventData`: https://www.gradio.app/docs/gradio/eventdata
-# `gr.Request`: https://www.gradio.app/main/docs/gradio/request
-def register_keypoint(
-    active_recording_id: str,
-    current_timeline: str,
-    current_time: float,
-    request: gr.Request,
-    evt: SelectionChange,
-):
-    if active_recording_id == "":
-        return
-    if current_timeline != "iteration":
-        return
-    # We can only log a keypoint if the user selected only a single item.
-    if len(evt.items) != 1:
-        return
-    item = evt.items[0]
-    # If the selected item isn't an entity, or we don't have its position, then bail out.
-    if item.kind != "entity" or item.position is None:
-        return
-    # Now we can produce a valid keypoint.
-    rec = get_recording(active_recording_id)
-    stream = rec.binary_stream()
-    # We round `current_time` toward 0, because that gives us the sequence index
-    # that the user is currently looking at, due to the Viewer's latest-at semantics.
-    index = math.floor(current_time)
-    # We keep track of the keypoints per sequence index for each user manually.
-    keypoints = get_keypoints_for_user_at_sequence_index(request, index)
-    keypoints.append(item.position[0:2])
-    rec.set_time("iteration", sequence=index)
-    rec.log(f"{item.entity_path}/keypoint", rr.Points2D(keypoints, radii=2))
     # Ensure we consume everything from the recording.
     stream.flush()
     yield stream.read()
-def track_current_time(evt: TimeUpdate):
-    return evt.time
-def track_current_timeline_and_time(evt: TimelineChange):
-    return evt.timeline, evt.time
 with gr.Blocks() as demo:
     with gr.Row():
         img = gr.Image(interactive=True, label="Image")
         with gr.Column():
-            stream_blur = gr.Button("Stream Repeated Blur")
     with gr.Row():
         viewer = Rerun(
@@ -195,20 +129,11 @@ with gr.Blocks() as demo:
     # When registering the event listeners, we pass the `recording_id` in as input in order to create
     # a recording stream using that id.
-    stream_blur.click(
         # Using the `viewer` as an output allows us to stream data to it by yielding bytes from the callback.
-        streaming_repeated_blur,
         inputs=[recording_id, img],
         outputs=[viewer],
     )
-    viewer.selection_change(
-        register_keypoint,
-        inputs=[recording_id, current_timeline, current_time],
-        outputs=[viewer],
-    )
-    viewer.time_update(track_current_time, outputs=[current_time])
-    viewer.timeline_change(track_current_timeline_and_time, outputs=[current_timeline, current_time])
 if __name__ == "__main__":
-    demo.launch()

 import uuid
 import subprocess
 subprocess.run(
     "pip install gradio_rerun-0.23.0a2.tar.gz",
     shell=True,
 processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
 model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
 # Whenever we need a recording, we construct a new recording stream.
 # As long as the app and recording IDs remain the same, the data
 # will be merged by the Viewer.
 def get_recording(recording_id: str) -> rr.RecordingStream:
+    return rr.RecordingStream(
+        application_id="rerun_example_gradio", recording_id=recording_id
+    )
 # A task can directly log to a binary stream, which is routed to the embedded viewer.
 # This is the preferred way to work with Rerun in Gradio since your data can be immediately and
 # incrementally seen by the viewer. Also, there are no ephemeral RRDs to cleanup or manage.
 @spaces.GPU
+def streaming_object_detection(recording_id: str, img):
     # Here we get a recording using the provided recording id.
     rec = get_recording(recording_id)
     stream = rec.binary_stream()
     rec.log("image", rr.Image(img))
     yield stream.read()
+    with torch.inference_mode():
+        inputs = processor(images=img, return_tensors="pt")
+        outputs = model(**inputs)
     # convert outputs (bounding boxes and class logits) to COCO API
     # let's only keep detections with score > 0.9
+    target_sizes = torch.tensor([img.height, img.width])
+    results = processor.post_process_object_detection(
+        outputs, target_sizes=target_sizes, threshold=0.9
+    )[0]
+    print(results)
+    rec.log(
+        "image/objects",
+        rr.Boxes2D(
+            array=results["boxes"],
+            array_format=rr.Box2DFormat.XYXY,
+            labels=[model.config.id2label[label.item()] for label in results["labels"]],
+        ),
+    )
     # Ensure we consume everything from the recording.
     stream.flush()
     yield stream.read()
 with gr.Blocks() as demo:
     with gr.Row():
         img = gr.Image(interactive=True, label="Image")
         with gr.Column():
+            detect_objects = gr.Button("Detect objects")
     with gr.Row():
         viewer = Rerun(
     # When registering the event listeners, we pass the `recording_id` in as input in order to create
     # a recording stream using that id.
+    detect_objects.click(
         # Using the `viewer` as an output allows us to stream data to it by yielding bytes from the callback.
+        streaming_object_detection,
         inputs=[recording_id, img],
         outputs=[viewer],
     )
 if __name__ == "__main__":
+    demo.launch(ssr_mode=False)