Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from PIL import Image | |
| from inference.main import MultiModalPhi2 | |
| messages = [] | |
| multimodal_phi2 = MultiModalPhi2( | |
| modelname_or_path="Navyabhat/Llava-Phi2", | |
| temperature=0.2, | |
| max_new_tokens=1024, | |
| device="cpu", | |
| ) | |
| def add_content(chatbot, input_data, input_type) -> gr.Chatbot: | |
| textflag, imageflag, audioflag = False, False, False | |
| if input_type == "text": | |
| chatbot.append((text, None)) | |
| textflag = True | |
| if input_type == "image": | |
| chatbot.append(((image,), None)) | |
| imageflag = True | |
| if input_type == "audio": | |
| chatbot.append(((audio_mic,), None)) | |
| audioflag = True | |
| # else: | |
| # if audio_upload is not None: | |
| # chatbot.append(((audio_upload,), None)) | |
| # audioflag = True | |
| if not any([textflag, imageflag, audioflag]): | |
| # Raise an error if neither text nor file is provided | |
| raise gr.Error("Enter a valid text, image or audio") | |
| return chatbot | |
| def clear_data(): | |
| return {prompt: None, image: None, audio_upload: None, audio_mic: None, chatbot: []} | |
| def run(history, text, image, audio_upload, audio_mic): | |
| if text in [None, ""]: | |
| text = None | |
| if audio_upload is not None: | |
| audio = audio_upload | |
| elif audio_mic is not None: | |
| audio = audio_mic | |
| else: | |
| audio = None | |
| print("text", text) | |
| print("image", image) | |
| print("audio", audio) | |
| if image is not None: | |
| image = Image.open(image) | |
| outputs = multimodal_phi2(text, audio, image) | |
| # outputs = "" | |
| history.append((None, outputs.title())) | |
| return history, None, None, None, None | |
| with gr.Blocks() as demo: | |
| chatbot = gr.Chatbot( | |
| [], | |
| elem_id="chatbot", | |
| bubble_full_width=False, | |
| avatar_images=(None, (os.path.join(os.path.dirname(__file__), "avatar.png"))), | |
| ) | |
| with gr.Row(): | |
| txt = gr.Textbox( | |
| scale=4, | |
| show_label=False, | |
| placeholder="Enter text and press enter", | |
| container=False, | |
| ) | |
| img_audio = gr.UploadButton("π", file_types=["image", "audio"], label="Upload Image or Audio") | |
| txt_msg = txt.submit(add_content, [chatbot, txt], [chatbot, txt, "text"], queue=False).then( | |
| bot, chatbot, chatbot, api_name="bot_response" | |
| ) | |
| img_audio_msg = img_audio.upload(add_input, [chatbot, img_audio], [chatbot, "image"], queue=False).then( | |
| bot, chatbot, chatbot | |
| ) | |
| # chatbot.like(print_like_dislike, None, None) | |
| submit.click( | |
| add_content, | |
| inputs=[chatbot, prompt, image, audio_upload, audio_mic], | |
| outputs=[chatbot], | |
| ).success( | |
| run, | |
| inputs=[chatbot, prompt, image, audio_upload, audio_mic], | |
| outputs=[chatbot, prompt, image, audio_upload, audio_mic], | |
| ) | |
| clear.click( | |
| clear_data, | |
| outputs=[prompt, image, audio_upload, audio_mic, chatbot], | |
| ) | |
| demo.launch() | |
| import gradio as gr | |
| from PIL import Image | |
| from inference.main import MultiModalPhi2 | |
| import os | |
| messages = [] | |
| multimodal_phi2 = MultiModalPhi2( | |
| modelname_or_path="Navyabhat/Llava-Phi2", | |
| temperature=0.2, | |
| max_new_tokens=1024, | |
| device="cpu", | |
| ) | |
| def add_content(chatbot, text, image, audio_upload, audio_mic) -> gr.Chatbot: | |
| textflag, imageflag, audioflag = False, False, False | |
| if text not in ["", None]: | |
| chatbot.append((text, None)) | |
| textflag = True | |
| if image is not None: | |
| chatbot.append(((image,), None)) | |
| imageflag = True | |
| if audio_mic is not None: | |
| chatbot.append(((audio_mic,), None)) | |
| audioflag = True | |
| else: | |
| if audio_upload is not None: | |
| chatbot.append(((audio_upload,), None)) | |
| audioflag = True | |
| if not any([textflag, imageflag, audioflag]): | |
| # Raise an error if neither text nor file is provided | |
| raise gr.Error("Enter a valid text, image or audio") | |
| return chatbot | |
| def clear_data(): | |
| return {"text": None, "image": None, "audio_upload": None, "audio_mic": None, "chatbot": []} | |
| def run(history, text, image, audio_upload, audio_mic): | |
| if text in [None, ""]: | |
| text = None | |
| if audio_upload is not None: | |
| audio = audio_upload | |
| elif audio_mic is not None: | |
| audio = audio_mic | |
| else: | |
| audio = None | |
| print("text", text) | |
| print("image", image) | |
| print("audio", audio) | |
| if image is not None: | |
| image = Image.open(image) | |
| outputs = multimodal_phi2(text, audio, image) | |
| history.append((None, outputs.title())) | |
| return history, None, None, None, None | |
| # def print_like_dislike(x: gr.LikeData): | |
| # print(x.index, x.value, x.liked) | |
| def add_text(history, text): | |
| history = history + [(text, None)] | |
| return history, gr.Textbox(value="", interactive=False) | |
| def add_file(history, file): | |
| history = history + [((file.name,), None)] | |
| return history | |
| def bot(history): | |
| response = "**That's cool!**" | |
| history[-1][1] = "" | |
| for character in response: | |
| history[-1][1] += character | |
| time.sleep(0.05) | |
| yield history | |
| with gr.Blocks() as demo: | |
| chatbot = gr.Chatbot( | |
| [], | |
| elem_id="chatbot", | |
| bubble_full_width=False, | |
| avatar_images=(None, (os.path.join(os.path.dirname(__file__), "avatar.png"))), | |
| ) | |
| with gr.Row(): | |
| txt = gr.Textbox( | |
| scale=4, | |
| show_label=False, | |
| placeholder="Enter text and press enter", | |
| container=False, | |
| ) | |
| img_audio = gr.UploadButton("π", file_types=["image", "audio"], label="Upload Image or Audio") | |
| with gr.Row(): | |
| # Adding a Button | |
| submit = gr.Button() | |
| clear = gr.Button(value="Clear") | |
| txt_msg = txt.submit(add_input, [chatbot, txt], [chatbot, txt, "text"], queue=False).then( | |
| bot, chatbot, chatbot, api_name="bot_response" | |
| ) | |
| img_audio_msg = img_audio.upload(add_input, [chatbot, img_audio], [chatbot, "image"], queue=False).then( | |
| bot, chatbot, chatbot | |
| ) | |
| # submit.click( | |
| # add_content, | |
| # inputs=[chatbot, txt, image, audio_upload, audio_mic], | |
| # outputs=[chatbot], | |
| # ).success( | |
| # run, | |
| # inputs=[chatbot, txt, image, audio_upload, audio_mic], | |
| # outputs=[chatbot, txt, image, audio_upload, audio_mic], | |
| # ) | |
| clear.click( | |
| clear_data, | |
| outputs=[prompt, image, audio_upload, audio_mic, chatbot], | |
| ) | |
| # chatbot.like(print_like_dislike, None, None) | |
| # demo.queue() | |
| demo.launch() | |