import os
import base64
import time
import io
import gradio as gr
from fastmcp import Client
from fastmcp.client import StreamableHttpTransport 
import asyncio
from dotenv import load_dotenv
import ast # Import the Abstract Syntax Tree module

# ... (imports and global variables remain the same) ...

# ... (process_webcam_stream_async function remains the same) ...

with gr.Blocks() as demo:
    gr.Markdown("## 🎥 Robot Vision Webcam Stream (using MCP Client)")
    gr.Markdown("""   
    ### 🔑 Hugging Face Token Required
    To use this application, you must set a valid **Hugging Face API Token** in your local environment variables: `HF_TOKEN`.
    **A write token is required** to upload images to the public dataset associated with this space. The resource usage for VLM inference will be tracked against *your* account.
    """)
    with gr.Row():
        webcam_input = gr.Image(
            label="Captured from Web-Cam",
            sources=["upload", "webcam"],
            type="pil"
        )
        with gr.Column():
            # --- 👇 INCREASED 'lines' PARAMETER HERE 👇 ---
            description_out = gr.Textbox(label="Description", lines=5) # Made larger
            human_out = gr.Textbox(label="Human", lines=3)             # Made larger
            objects_out = gr.Textbox(label="Objects", lines=2)         # Made larger
            environment_out = gr.Textbox(label="Environment", lines=3) # Made larger
            # --------------------------------------------------

    webcam_input.stream(
        process_webcam_stream_async,
        inputs=[webcam_input],
        outputs=[description_out, human_out, objects_out, environment_out],
        stream_every=0.5
    )

if __name__ == "__main__":
    demo.launch()