| import subprocess |
| |
| subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True) |
|
|
| import gradio as gr |
| from PIL import Image |
| from transformers import AutoModelForCausalLM |
| from transformers import AutoProcessor |
| from transformers import TextIteratorStreamer |
| import time |
| from threading import Thread |
| import torch |
| import spaces |
|
|
| model_id = "microsoft/Phi-3-vision-128k-instruct" |
| model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cuda", trust_remote_code=True, torch_dtype="auto") |
| processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True) |
| model.to("cuda:0") |
|
|
| PLACEHOLDER = """ |
| <div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;"> |
| <h1 style="font-size: 28px; margin-bottom: 2px; opacity: 0.55;">Microsoft's Phi3 Vision</h1> |
| <p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">Phi-3-Vision is a 4.2B parameter multimodal model that brings together language and vision capabilities.</p> |
| </div> |
| """ |
|
|
| @spaces.GPU |
| def bot_streaming(message, history): |
| print(f'message is - {message}') |
| print(f'history is - {history}') |
| if message["files"]: |
| |
| if type(message["files"][-1]) == dict: |
| image = message["files"][-1]["path"] |
| else: |
| image = message["files"][-1] |
| else: |
| |
| |
| for hist in history: |
| if type(hist[0]) == tuple: |
| image = hist[0][0] |
| try: |
| if image is None: |
| |
| raise gr.Error("You need to upload an image for Phi3-Vision to work. Close the error and try again with an Image.") |
| except NameError: |
| |
| raise gr.Error("You need to upload an image for Phi3-Vision to work. Close the error and try again with an Image.") |
|
|
| conversation = [] |
| flag=False |
| for user, assistant in history: |
| if assistant is None: |
| |
| flag=True |
| conversation.extend([{"role": "user", "content":""}]) |
| continue |
| if flag==True: |
| conversation[0]['content'] = f"<|image_1|>\n{user}" |
| conversation.extend([{"role": "assistant", "content": assistant}]) |
| flag=False |
| continue |
| conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}]) |
|
|
| if len(history) == 0: |
| conversation.append({"role": "user", "content": f"<|image_1|>\n{message['text']}"}) |
| else: |
| conversation.append({"role": "user", "content": message['text']}) |
| print(f"prompt is -\n{conversation}") |
| prompt = processor.tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True) |
| image = Image.open(image) |
| inputs = processor(prompt, image, return_tensors="pt").to("cuda:0") |
|
|
| streamer = TextIteratorStreamer(processor, **{"skip_special_tokens": True, "skip_prompt": True, 'clean_up_tokenization_spaces':False,}) |
| generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024, do_sample=False, temperature=0.0, eos_token_id=processor.tokenizer.eos_token_id,) |
|
|
| thread = Thread(target=model.generate, kwargs=generation_kwargs) |
| thread.start() |
|
|
| buffer = "" |
| for new_text in streamer: |
| buffer += new_text |
| yield buffer |
|
|
|
|
| chatbot=gr.Chatbot(scale=1, placeholder=PLACEHOLDER) |
| chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeholder="Enter message or upload file...", show_label=False) |
| with gr.Blocks(fill_height=True, ) as demo: |
| gr.ChatInterface( |
| fn=bot_streaming, |
| title="ChatGPTBots.net - Phi3 Vision 128K Instruct", |
| examples=[{"text": "Describe the image in details?", "files": ["./robo.jpg"]}, |
| {"text": "What does the chart display?", "files": ["./dataviz.png"]}, |
| {"text": "What is 3?", "files": ["./setofmark1.jpg"]}, |
| {"text": "Count the number of apples.", "files": ["./setofmark6.png"]}, |
| {"text": "I want to find a seat close to windows, where can I sit?", "files": ["./office1.jpg"]}, |
| ], |
| description="Upload an image and start chatting about it, or simply try one of the examples below. If you won't upload an image, you will receive an error.", |
| stop_btn="Stop Generation", |
| multimodal=True, |
| textbox=chat_input, |
| chatbot=chatbot, |
| cache_examples=False, |
| examples_per_page=3 |
| ) |
|
|
| demo.queue() |
| demo.launch(debug=True, quiet=True) |
|
|