import os import base64 import time import io import gradio as gr from fastmcp import Client from fastmcp.client import StreamableHttpTransport import asyncio from dotenv import load_dotenv import ast # Import the Abstract Syntax Tree module # ... (imports and global variables remain the same) ... # ... (process_webcam_stream_async function remains the same) ... with gr.Blocks() as demo: gr.Markdown("## 🎥 Robot Vision Webcam Stream (using MCP Client)") gr.Markdown(""" ### 🔑 Hugging Face Token Required To use this application, you must set a valid **Hugging Face API Token** in your local environment variables: `HF_TOKEN`. **A write token is required** to upload images to the public dataset associated with this space. The resource usage for VLM inference will be tracked against *your* account. """) with gr.Row(): webcam_input = gr.Image( label="Captured from Web-Cam", sources=["upload", "webcam"], type="pil" ) with gr.Column(): # --- 👇 INCREASED 'lines' PARAMETER HERE 👇 --- description_out = gr.Textbox(label="Description", lines=5) # Made larger human_out = gr.Textbox(label="Human", lines=3) # Made larger objects_out = gr.Textbox(label="Objects", lines=2) # Made larger environment_out = gr.Textbox(label="Environment", lines=3) # Made larger # -------------------------------------------------- webcam_input.stream( process_webcam_stream_async, inputs=[webcam_input], outputs=[description_out, human_out, objects_out, environment_out], stream_every=0.5 ) if __name__ == "__main__": demo.launch()