OppaAI commited on
Commit
b18ef1e
·
verified ·
1 Parent(s): 6d55b52

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -17
app.py CHANGED
@@ -4,11 +4,18 @@ import time
4
  import io
5
  import gradio as gr
6
  from gradio_client import Client
 
7
 
8
- # Load environment variables
9
- ROBOT_ID = os.environ.get("ROBOT_ID")
 
 
10
  HF_TOKEN = os.environ.get("HF_CV_ROBOT_TOKEN")
11
- HF_SPACE = "OppaAI/Robot_MCP_Server"
 
 
 
 
12
  API_NAME = "/predict"
13
 
14
 
@@ -22,28 +29,46 @@ def process_webcam_stream(image):
22
  image.save(buffered, format="JPEG")
23
  b64_img = base64.b64encode(buffered.getvalue()).decode("utf-8")
24
 
25
- # Prepare payload
 
26
  payload = {
27
- "image_b64": b64_img,
28
- "robot_id": ROBOT_ID,
29
- "timestamp": time.time(),
30
- "hf_token": HF_TOKEN
31
  }
32
 
33
  # Send to HF Space using streaming-friendly predict
34
  client = Client(HF_SPACE)
35
  try:
36
- resp = client.predict(payload, api_name=API_NAME)
37
- objects_list = resp.get("objects", [])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  objects_str = ", ".join(objects_list) if isinstance(objects_list, list) else str(objects_list)
39
 
40
  return (
41
- resp.get("description", ""),
42
- resp.get("human", ""),
43
  objects_str,
44
- resp.get("environment", "")
45
  )
46
  except Exception as e:
 
 
47
  return f"Error: {e}", "", "", ""
48
 
49
 
@@ -56,11 +81,13 @@ with gr.Blocks() as demo:
56
  sources=["upload", "webcam"],
57
  type="pil"
58
  )
59
- description_out = gr.Textbox(label="Description")
60
- human_out = gr.Textbox(label="Human")
61
- objects_out = gr.Textbox(label="Objects")
62
- environment_out = gr.Textbox(label="Environment")
 
63
 
 
64
  webcam_input.stream(
65
  process_webcam_stream,
66
  inputs=[webcam_input],
 
4
  import io
5
  import gradio as gr
6
  from gradio_client import Client
7
+ from dotenv import load_dotenv
8
 
9
+ # Load environment variables (ensure .env is set up locally)
10
+ load_dotenv()
11
+
12
+ ROBOT_ID = os.environ.get("ROBOT_ID", "unknown") # Default to 'unknown' if missing
13
  HF_TOKEN = os.environ.get("HF_CV_ROBOT_TOKEN")
14
+ if not HF_TOKEN:
15
+ # If token is missing, the API call will likely fail, but we can proceed
16
+ print("Warning: HF_TOKEN not found. API calls may fail.")
17
+
18
+ HF_SPACE = "OppaAI/Robot_MCP_Server" # HF Space name
19
  API_NAME = "/predict"
20
 
21
 
 
29
  image.save(buffered, format="JPEG")
30
  b64_img = base64.b64encode(buffered.getvalue()).decode("utf-8")
31
 
32
+ # Prepare payload with the CORRECT keys matching the server function arguments
33
+ # Server expects: hf_token_input, robot_id_input, image_b64_input
34
  payload = {
35
+ "hf_token_input": HF_TOKEN,
36
+ "robot_id_input": ROBOT_ID,
37
+ # "timestamp": time.time(), # Server function doesn't use this, so we remove it
38
+ "image_b64_input": b64_img
39
  }
40
 
41
  # Send to HF Space using streaming-friendly predict
42
  client = Client(HF_SPACE)
43
  try:
44
+ # client.predict takes the inputs as individual arguments in a list/tuple
45
+ # The order must match the server function signature:
46
+ resp = client.predict(
47
+ payload["hf_token_input"],
48
+ payload["robot_id_input"],
49
+ payload["image_b64_input"],
50
+ api_name=API_NAME
51
+ )
52
+
53
+ # The server response structure uses a nested 'result' key in the dict
54
+ vlm_result = resp.get("result", {})
55
+
56
+ description_out = vlm_result.get("description", "")
57
+ human_out = vlm_result.get("human", "")
58
+ objects_list = vlm_result.get("objects", [])
59
+ environment_out = vlm_result.get("environment", "")
60
+
61
  objects_str = ", ".join(objects_list) if isinstance(objects_list, list) else str(objects_list)
62
 
63
  return (
64
+ description_out,
65
+ human_out,
66
  objects_str,
67
+ environment_out
68
  )
69
  except Exception as e:
70
+ # Print the error to the local console for debugging
71
+ print(f"Error calling remote Gradio API: {e}")
72
  return f"Error: {e}", "", "", ""
73
 
74
 
 
81
  sources=["upload", "webcam"],
82
  type="pil"
83
  )
84
+ with gr.Column():
85
+ description_out = gr.Textbox(label="Description")
86
+ human_out = gr.Textbox(label="Human")
87
+ objects_out = gr.Textbox(label="Objects")
88
+ environment_out = gr.Textbox(label="Environment")
89
 
90
+ # Gradio handles the local streaming loop
91
  webcam_input.stream(
92
  process_webcam_stream,
93
  inputs=[webcam_input],