Spaces:
Runtime error
Runtime error
| #%% | |
| import os | |
| import openai | |
| import gradio as gr | |
| from PIL import Image | |
| import sys | |
| sys.path.append('./') | |
| from gpt_helper import GPT4VisionClass, response_to_json | |
| # Placeholder for the model variable and the confirmation text | |
| model = None | |
| model_status = "Model is not initialized." | |
| def initialize_model(api_key): | |
| global model, model_status | |
| if model is None: | |
| model = GPT4VisionClass(key=api_key, max_tokens=1024, temperature=0.9, | |
| gpt_model="gpt-4-vision-preview", | |
| role_msg="You are a helpful agent with vision capabilities; do not respond to objects not depicted in images.") | |
| model_status = "Model initialized successfully with the provided API key." | |
| else: | |
| model_status = "Model has already been initialized." | |
| return model_status | |
| def add_text(state, query_text, image_paths=None, images=None): | |
| if model is None: | |
| return state, [("Error", "Model is not initialized. Please enter your OpenAI API Key.")] | |
| images = image_paths if image_paths is not None else images | |
| response_interaction = model.chat(query_text=query_text, image_paths=image_paths, images=None, | |
| PRINT_USER_MSG=False, PRINT_GPT_OUTPUT=False, | |
| RESET_CHAT=False, RETURN_RESPONSE=True, VISUALIZE=False, DETAIL='high') | |
| result = model._get_response_content() | |
| state.append((query_text, result)) | |
| return state, state | |
| def scenario_button_clicked(scenario_name): | |
| print(f"Scenario clicked: {scenario_name}") | |
| return f"Scenario clicked: {scenario_name}" | |
| if __name__ == "__main__": | |
| # Define image paths for each subcategory under the main categories | |
| image_paths = { | |
| "Semantic Preference": { | |
| "Color Preference": "./images/semantic/color/4.webp", | |
| "Shape Preference": "./images/semantic/shape/5.webp", | |
| "Category Preference: Fruits and Beverages ": "./images/semantic/category/1/5.webp", | |
| "Category Preference: Beverages and Snacks": "./images/semantic/category/2/5.webp", | |
| }, | |
| "Spatial Pattern Preference": { | |
| "Vertical Line": "./images/spatial-pattern/vertical/5.webp", | |
| "Horizontal Line": "./images/spatial-pattern/horizontal/5.webp", | |
| "Diagonal Line": "./images/spatial-pattern/diagonal/4.webp", | |
| "Quadrants": "./images/spatial-pattern/quadrant/5.webp", | |
| }, | |
| } | |
| with gr.Blocks() as demo: | |
| ######## Introduction for the demo | |
| with gr.Column(): | |
| gr.Markdown(""" | |
| <div style='text-align: center;'> | |
| <span style='font-size: 32px; font-weight: bold;'>[Running Examples] <span style='color: #FF9300;'>C</span>hain-<span style='color: #FF9300;'>o</span>f-<span style='color: #FF9300;'>V</span>isual-<span style='color: #FF9300;'>R</span>esiduals</span> | |
| </div> | |
| """) | |
| gr.Markdown(""" | |
| In this paper, we focus on the problem of inferring underlying human preferences from a sequence of raw visual observations in tabletop manipulation environments with a variety of object types, named **V**isual **P**reference **I**nference (**VPI**). | |
| To facilitate visual reasoning in the context of manipulation, we introduce the <span style='color: #FF9300;'>C</span>hain-<span style='color: #FF9300;'>o</span>f-<span style='color: #FF9300;'>V</span>isual-<span style='color: #FF9300;'>R</span>esiduals</span> (<span style='color: #FF9300;'>CoVR</span>) method. <span style='color: #FF9300;'>CoVR</span> employs a prompting mechanism | |
| """) | |
| with gr.Row(): | |
| for category, scenarios in image_paths.items(): | |
| with gr.Column(): | |
| gr.Markdown(f"## {category}") | |
| with gr.Row(): | |
| for scenario, img_path in scenarios.items(): | |
| with gr.Column(scale=2): | |
| # img = Image.open(img_path) | |
| # gr.Image(value=img, visible=True) | |
| # gr.Image(value=img, visible=True, type="pil") | |
| gr.Image(f"/file={img_path}", visible=True) | |
| scenario_button = gr.Button(scenario) | |
| scenario_button.click(fn=lambda x=scenario: scenario_button_clicked(x), inputs=[]) | |
| # scenario_button.click(fn=lambda x=scenario: scenario_button_clicked(x), inputs=[], outputs=[output_text]) | |
| ######## Input OpenAI API Key and display initialization result | |
| with gr.Row(): | |
| # API Key Input | |
| with gr.Column(): | |
| openai_gpt4_key = gr.Textbox(label="OpenAI GPT4 Key", type="password", placeholder="sk..", | |
| info="You have to provide your own GPT4 keys for this app to function properly") | |
| initialize_button = gr.Button("Initialize Model") | |
| # Initialization Button and Result Display | |
| with gr.Column(): | |
| model_status_text = gr.Text(label="Initialize API Result", info="The result of the model initialization will be displayed here.") | |
| initialize_button.click(initialize_model, inputs=[openai_gpt4_key], outputs=[model_status_text]) | |
| ######## Chatbot | |
| chatbot = gr.Chatbot(elem_id="chatbot") | |
| state = gr.State([]) | |
| with gr.Row(): | |
| query_text = gr.Textbox(show_label=False, placeholder="Enter text and press enter, or upload an image") | |
| query_text.submit(add_text, inputs=[state, query_text], outputs=[state, chatbot]) | |
| query_text.submit(lambda: "", inputs=None, outputs=query_text) | |
| demo.launch(share=True) |