Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from inference import inference_and_run | |
| import spaces | |
| import os | |
| import re | |
| import shutil | |
| model_name = 'Ferret-UI' | |
| cur_dir = os.path.dirname(os.path.abspath(__file__)) | |
| def inference_with_gradio(chatbot, image, prompt, model_path, box=None, temperature=0.2, top_p=0.7, max_new_tokens=512): | |
| dir_path = os.path.dirname(image) | |
| # image_path = image | |
| # Define the directory where you want to save the image (current directory) | |
| filename = os.path.basename(image) | |
| dir_path = "./" | |
| # Create the new path for the file (in the current directory) | |
| image_path = os.path.join(dir_path, filename) | |
| shutil.copy(image, image_path) | |
| print("filename path: ", filename) | |
| if "gemma" in model_path.lower(): | |
| conv_mode = "ferret_gemma_instruct" | |
| else: | |
| conv_mode = "ferret_llama_3" | |
| # inference_text = inference_and_run( | |
| # image_path=image_path, | |
| # prompt=prompt, | |
| # conv_mode=conv_mode, | |
| # model_path=model_path, | |
| # box=box | |
| # ) | |
| inference_text = inference_and_run( | |
| image_path=filename, # double check this | |
| image_dir=dir_path, | |
| prompt=prompt, | |
| model_path="jadechoghari/Ferret-UI-Gemma2b", | |
| conv_mode=conv_mode, | |
| temperature=temperature, | |
| top_p=top_p, | |
| box=box, | |
| max_new_tokens=max_new_tokens, | |
| # stop=stop # Assuming we want to process the image | |
| ) | |
| if isinstance(inference_text, (list, tuple)): | |
| inference_text = str(inference_text[0]) | |
| # Update chatbot history with new message pair | |
| new_history = chatbot.copy() if chatbot else [] | |
| new_history.append((prompt, inference_text)) | |
| return new_history | |
| def submit_chat(chatbot, text_input): | |
| response = '' | |
| # chatbot.append((text_input, response)) | |
| return chatbot, '' | |
| def clear_chat(): | |
| return [], None, "", "", 0.2, 0.7, 512 | |
| html = f""" | |
| <div style="text-align: center; padding: 20px;"> | |
| <div style="display: inline-block; background-color: #f5f5f7; padding: 20px; border-radius: 20px; box-shadow: 0px 6px 20px rgba(0, 0, 0, 0.1);"> | |
| <div style="display: flex; align-items: center;"> | |
| <img src='https://github.com/apple/ml-ferret/blob/main/ferretui/figs/ferretui_icon.png?raw=true' alt='Ferret-UI' | |
| style='width: 80px; height: 80px; border-radius: 20px; box-shadow: 0px 8px 16px rgba(0, 0, 0, 0.2);'/> | |
| <div style="margin-left: 15px;"> | |
| <h1 style="font-size: 2.8em; font-family: -apple-system, BlinkMacSystemFont, sans-serif; color: #1D1D1F; | |
| font-weight: bold; margin-bottom: 0;">ο£Ώ {model_name}</h1> | |
| <p style="font-size: 1.2em; color: #6e6e73; font-family: -apple-system, BlinkMacSystemFont, sans-serif; margin-top: 5px;"> | |
| π± Grounded Mobile UI Understanding with Multimodal LLMs.<br> | |
| A new MLLM tailored for enhanced understanding of mobile UI screens, equipped with referring, grounding, and reasoning capabilities. | |
| </p> | |
| <a href='https://huggingface.co/jadechoghari/Ferret-UI-Gemma2b' style='text-decoration: none;'> | |
| <button style="background-color: #007aff; color: white; font-size: 1.2em; padding: 10px 20px; border-radius: 10px; border: none; margin-top: 10px; box-shadow: 0px 4px 12px rgba(0, 122, 255, 0.4); cursor: pointer;"> | |
| π€ Try on Hugging Face | |
| </button> | |
| </a> | |
| </div> | |
| </div> | |
| </div> | |
| <p style="font-size: 1.2em; color: #86868B; font-family: -apple-system, BlinkMacSystemFont, sans-serif; margin-top: 30px;"> | |
| We release two Ferret-UI checkpoints, built on gemma-2b and Llama-3-8B models respectively, for public exploration. π | |
| </p> | |
| </div> | |
| """ | |
| latex_delimiters_set = [{ | |
| "left": "\\(", | |
| "right": "\\)", | |
| "display": False | |
| }, { | |
| "left": "\\begin{equation}", | |
| "right": "\\end{equation}", | |
| "display": True | |
| }, { | |
| "left": "\\begin{align}", | |
| "right": "\\end{align}", | |
| "display": True | |
| }] | |
| # Set up UI components | |
| image_input = gr.Image(label="Upload Image", type="filepath", height=350) | |
| text_input = gr.Textbox(lines=2, placeholder="Enter your prompt here...", label="Prompt") | |
| model_dropdown = gr.Dropdown(choices=[ | |
| "jadechoghari/Ferret-UI-Gemma2b", | |
| "jadechoghari/Ferret-UI-Llama8b", | |
| ], label="Model Path", value="jadechoghari/Ferret-UI-Gemma2b") | |
| bounding_box_input = gr.Textbox(placeholder="Optional bounding box (x1, y1, x2, y2)", label="Bounding Box (optional)") | |
| # Adding Sliders for temperature, top_p, and max_new_tokens | |
| temperature_input = gr.Slider(minimum=0.1, maximum=2.0, step=0.1, value=0.2, label="Temperature") | |
| top_p_input = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, value=0.7, label="Top P") | |
| max_new_tokens_input = gr.Slider(minimum=1, maximum=1024, step=1, value=512, label="Max New Tokens") | |
| chatbot = gr.Chatbot(label="Chat with Ferret-UI", height=400, show_copy_button=True, latex_delimiters=latex_delimiters_set, type="tuples") | |
| with gr.Blocks(title=model_name, theme=gr.themes.Ocean()) as demo: | |
| gr.HTML(html) | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| image_input.render() | |
| text_input.render() | |
| model_dropdown.render() | |
| bounding_box_input.render() | |
| temperature_input.render() # Render temperature input | |
| top_p_input.render() # Render top_p input | |
| max_new_tokens_input.render() | |
| gr.Examples( | |
| examples=[ | |
| ["appstore_reminders.png", "Describe the image in details", "jadechoghari/Ferret-UI-Gemma2b", None], | |
| ["appstore_reminders.png", "What's inside the selected region?", "jadechoghari/Ferret-UI-Gemma2b", "189, 906, 404, 970"], | |
| ["appstore_reminders.png", "Where is the Game Tab?", "jadechoghari/Ferret-UI-Gemma2b", None], | |
| ], | |
| inputs=[image_input, text_input, model_dropdown, bounding_box_input] | |
| ) | |
| with gr.Column(scale=7): | |
| chatbot.render() | |
| with gr.Row(): | |
| send_btn = gr.Button("Send", variant="primary") | |
| clear_btn = gr.Button("Clear", variant="secondary") | |
| send_click_event = send_btn.click( | |
| inference_with_gradio, [chatbot, image_input, text_input, model_dropdown, bounding_box_input, temperature_input, top_p_input, max_new_tokens_input], chatbot | |
| ).then(submit_chat, [chatbot, text_input], [chatbot, text_input]) | |
| submit_event = text_input.submit( | |
| inference_with_gradio, [chatbot, image_input, text_input, model_dropdown, bounding_box_input, temperature_input, top_p_input, max_new_tokens_input], chatbot | |
| ).then(submit_chat, [chatbot, text_input], [chatbot, text_input]) | |
| clear_btn.click(clear_chat, outputs=[chatbot, image_input, text_input, bounding_box_input, temperature_input, top_p_input, max_new_tokens_input]) | |
| demo.launch() |