Spaces:

Vizuara
/

session4-world-model-inference

Sleeping

App Files Files Community

Vizuara commited on Apr 17

Commit

bc29019

verified ·

1 Parent(s): c5aa869

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +58 -43

app.py CHANGED Viewed

@@ -1,17 +1,16 @@
 """
-HuggingFace Spaces: FastAPI + Gradio inference server with WebSocket support.
-The Vercel website connects to /ws for real-time Three.js sim inference.
 """
 import base64, io, json, os
 import gradio as gr
 import numpy as np
 import torch
 import torch.nn as nn
-from fastapi import WebSocket, WebSocketDisconnect
-from fastapi.middleware.cors import CORSMiddleware
 from PIL import Image
 class Encoder(nn.Module):
     def __init__(self, ld=256):
         super().__init__()
@@ -84,53 +83,69 @@ def predict(policy, image_b64):
         elif kind=="zpos_bc": _,mu,_=enc(t); p=pos(mu); a=model(mu,p)[0].numpy()
     return {"vx":float(a[0]*MAX_VX),"vy":float(a[1]*MAX_VY),"omega":float(a[2]*MAX_OMEGA)}
-POLS=["bc","bc_v2","bc_v3","bc_v4","bc_v5","iter10_latent_bc","iter14_zpos_bc"]
-def gradio_fn(image, policy):
     if image is None: return "Upload a dashcam image"
     buf=io.BytesIO(); Image.fromarray(image).resize((128,128)).save(buf,format="JPEG",quality=85)
     r=predict(policy, base64.b64encode(buf.getvalue()).decode())
     return f"vx: {r['vx']:+.3f} m/s\nvy: {r['vy']:+.3f} m/s\nomega: {r['omega']:+.3f} rad/s"
 with gr.Blocks(title="Session 4 Inference") as demo:
-    gr.Markdown("# Session 4: World Model Driving Inference\nFor real-time inference, connect the [Vercel website](https://session4-vla.vercel.app/#inference) to this Space's WebSocket endpoint.")
     with gr.Row():
-        with gr.Column(): img_in=gr.Image(label="Dashcam",type="numpy"); pol_in=gr.Dropdown(choices=POLS,value="iter14_zpos_bc",label="Policy"); btn=gr.Button("Predict")
-        with gr.Column(): out=gr.Textbox(label="Action",lines=4)
-    btn.click(gradio_fn,[img_in,pol_in],out)
-app = gr.routes.App.create_app(demo)
-app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
-@app.get("/api/policies")
-async def list_policies():
-    return {"policies":[
-        {"name":"bc","label":"Iter 2: BC (basic)","result":"5 laps"},
-        {"name":"bc_v2","label":"Iter 3: BC expert-only","result":"8 laps"},
-        {"name":"bc_v3","label":"Iter 5: BC speed 1.4x","result":"30 laps"},
-        {"name":"bc_v4","label":"Iter 6: BC max speed","result":"40 laps"},
-        {"name":"bc_v5","label":"Iter 7: BC adaptive","result":"35 laps"},
-        {"name":"iter10_latent_bc","label":"Iter 10: Latent BC (WM encoder)","result":"39 laps"},
-        {"name":"iter14_zpos_bc","label":"Iter 14: Z+Pos BC (BEST)","result":"40 laps"},
-    ]}
-@app.websocket("/ws")
-async def ws_inference(ws: WebSocket):
-    await ws.accept()
-    try:
-        while True:
-            data = await ws.receive_json()
-            policy = data.get("policy", "iter14_zpos_bc")
-            image = data.get("image", "")
-            if image:
-                result = predict(policy, image)
-                await ws.send_json(result)
-            else:
-                await ws.send_json({"error": "no image"})
-    except WebSocketDisconnect:
-        pass
-    except Exception:
-        pass
 if __name__ == "__main__":
     demo.launch()

 """
+HuggingFace Spaces: Gradio + custom API for real-time inference.
+WebSocket at /ws, REST at /inference/policies and /inference/predict.
 """
 import base64, io, json, os
 import gradio as gr
 import numpy as np
 import torch
 import torch.nn as nn
 from PIL import Image
+# ---- Model definitions (inline) ----
 class Encoder(nn.Module):
     def __init__(self, ld=256):
         super().__init__()
         elif kind=="zpos_bc": _,mu,_=enc(t); p=pos(mu); a=model(mu,p)[0].numpy()
     return {"vx":float(a[0]*MAX_VX),"vy":float(a[1]*MAX_VY),"omega":float(a[2]*MAX_OMEGA)}
+# ---- Gradio functions ----
+POLS=["bc","bc_v2","bc_v3","bc_v4","bc_v5","iter10_latent_bc","iter14_zpos_bc"]
+POL_INFO = {
+    "bc": "Iter 2: BC (basic) - 5 laps",
+    "bc_v2": "Iter 3: BC expert-only - 8 laps",
+    "bc_v3": "Iter 5: BC speed 1.4x - 30 laps",
+    "bc_v4": "Iter 6: BC max speed - 40 laps",
+    "bc_v5": "Iter 7: BC adaptive - 35 laps",
+    "iter10_latent_bc": "Iter 10: Latent BC (WM encoder) - 39 laps",
+    "iter14_zpos_bc": "Iter 14: Z+Pos BC (BEST) - 40 laps",
+}
+def gradio_predict(image, policy):
     if image is None: return "Upload a dashcam image"
     buf=io.BytesIO(); Image.fromarray(image).resize((128,128)).save(buf,format="JPEG",quality=85)
     r=predict(policy, base64.b64encode(buf.getvalue()).decode())
     return f"vx: {r['vx']:+.3f} m/s\nvy: {r['vy']:+.3f} m/s\nomega: {r['omega']:+.3f} rad/s"
+def api_predict(image_b64, policy):
+    """API function: base64 image + policy name -> JSON action string."""
+    try:
+        r = predict(policy, image_b64)
+        return json.dumps(r)
+    except Exception as e:
+        return json.dumps({"error": str(e)})
+def api_policies():
+    """Return JSON list of available policies."""
+    policies = [{"name": k, "label": v} for k, v in POL_INFO.items()]
+    return json.dumps({"policies": policies})
+# ---- Build Gradio app ----
 with gr.Blocks(title="Session 4 Inference") as demo:
+    gr.Markdown("# Session 4: World Model Driving Inference")
+    gr.Markdown("Upload a dashcam image and select a policy, or use the API for real-time inference from the [Vercel website](https://session4-vla.vercel.app/#inference).")
     with gr.Row():
+        with gr.Column():
+            img_in = gr.Image(label="Dashcam Image (128x128)", type="numpy")
+            pol_in = gr.Dropdown(choices=POLS, value="iter14_zpos_bc", label="Policy")
+            btn = gr.Button("Predict Action", variant="primary")
+        with gr.Column():
+            out = gr.Textbox(label="Predicted Action", lines=4)
+    btn.click(gradio_predict, [img_in, pol_in], out)
+    # API endpoints exposed via Gradio's API
+    api_pred_fn = gr.Interface(
+        fn=api_predict,
+        inputs=[gr.Textbox(label="Base64 Image"), gr.Textbox(label="Policy Name")],
+        outputs=gr.Textbox(label="JSON Result"),
+        api_name="predict_action",
+    )
+    api_pol_fn = gr.Interface(
+        fn=api_policies,
+        inputs=[],
+        outputs=gr.Textbox(label="Policies JSON"),
+        api_name="list_policies",
+    )
 if __name__ == "__main__":
     demo.launch()