{label}
Point your camera. Ask out loud. Listen to the answer.
BLIND-FIRST NAVIGATION
Third Eye is designed to reduce hesitation in the real world: capture what is ahead, ask what matters, and hear the result without hunting through a crowded interface.
Best results: hold the camera still, keep text centered, and move closer for labels or menus.
Status guide: Listening means voice input, Seeing means image analysis, Thinking means answer generation, Speaking means audio playback.
{backend_status_text()}
", padding=False) warmup_btn = gr.Button( "Pre-load models", variant="secondary", size="sm", elem_classes="diagnostics-btn", ) warmup_output = gr.Textbox( label="Model status", interactive=False, lines=3, ) gr.HTML( """ """, padding=False, ) mode.change( fn=on_mode_change, inputs=mode, outputs=[audio_input, typed, quick_prompt, submit], ) load_example.click( fn=load_sample, inputs=[sample_choice], outputs=[image, status], show_progress="hidden", ) quick_prompt.change( fn=apply_quick_prompt, inputs=[quick_prompt], outputs=[typed], show_progress="hidden", ) submit.click( fn=run_pipeline, inputs=[image, audio_input, typed, mode, language], outputs=[audio_output, answer, question_output, iris, status], show_progress="full", ) warmup_btn.click( fn=warmup_all, inputs=[], outputs=[warmup_output], show_progress="full", ) theme_btn.click(fn=None, inputs=None, outputs=None, js=THEME_TOGGLE_JS) return demo demo = build_demo() if __name__ == "__main__": launch_host = os.getenv("THIRD_EYE_HOST", "0.0.0.0") launch_port = int(os.getenv("THIRD_EYE_PORT", os.getenv("PORT", "7860"))) demo.queue(default_concurrency_limit=2).launch( server_name=launch_host, server_port=launch_port, show_error=False, )