Chat_With_Blip2

Runtime error

File size: 6,279 Bytes

import requests
from PIL import Image
import gradio as gr
from transformers import AutoProcessor, Blip2ForConditionalGeneration
import torch


css = """
#column_container {
  position: relative;
  height: 800px;
  max-width: 700px;
  display: flex;
  flex-direction: column;
  background-color: lightgray;
  border: 1px solid gray;
  border-radius: 5px;
  padding: 10px;
  box-shadow: 2px 2px 5px gray;
  margin-left: auto; 
  margin-right: auto;
}
#input_prompt {
  position: fixed;
  bottom: 0;
  max-width: 680px;
}
#chatbot-component {
  overflow: auto;
}
"""

processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")
model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16) 

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

def upload_button_config():
    return gr.update(visible=False)

def upload_textbox_config(text_in):
    return gr.update(visible=True)

#takes input and generates the Response
def predict(btn_upload, counter,image_hid, input, history):
    
    if counter == 0:
      image_in = Image.open(btn_upload)
      #Resizing the image
      basewidth = 512
      wpercent = (basewidth/float(image_in.size[0]))
      hsize = int((float(image_in.size[1])*float(wpercent)))
      image_in = image_in.resize((basewidth,hsize)) #, Image.Resampling.LANCZOS)
      # Save the image to the file-like object
      #seed = random.randint(0, 1000000)
      img_name = "uploaded_image.png" #f"./edited_image_{seed}.png"
      image_in.save(img_name)
      #add state
      history = history or []
      response = '<img src="/file=' + img_name + '">'
      history.append((input, response))
      counter += 1
      return history, history, img_name, counter, image_in

    #process the input prompt and image
    #image = Image.open(btn_upload)
    print(f"prompt is :{input}") #Question: Is this photo unusual? Answer:
    prompt = f"Question: {input} Answer: "
    inputs = processor(image_hid, text=prompt, return_tensors="pt").to(device, torch.float16)
    
    #generte the response
    generated_ids = model.generate(**inputs, max_new_tokens=10)
    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
    print(f"generated_text is : {generated_text}")

    #add state
    history = history or []
    response = generated_text #'<img src="/file=' + img_name + '">'
    history.append((input, response))
    counter += 1
    return history, history, "uploaded_image.png", counter, image_hid

#Blocks Layout
with gr.Blocks(css="#chatbot-component {height: 800px}") as demo:  
  with gr.Row():
    with gr.Column(scale=1):
        #with gr.Accordion("See details"):
        gr.HTML("""<div style="text-align: center; max-width: 700px; margin: 0 auto;">
                    <div
                style="
                    display: inline-flex;
                    align-items: center;
                    gap: 0.8rem;
                    font-size: 1.75rem;
                "
                >
                <h1 style="font-weight: 900; margin-bottom: 7px; margin-top: 5px;">
                    Bringing Visual Conversations to Life with BLIP2
                </h1>
                </div>
                <p style="margin-bottom: 10px; font-size: 94%">
                Blip2 is functioning as an <b>instructed zero-shot image-to-text generation</b> model using OPT-2.7B in this Space. 
                It shows a wide range of capabilities including visual conversation, visual knowledge reasoning, visual commensense reasoning, storytelling, 
                personalized image-to-text generation etc.<br>
                BLIP-2 by <a href="https://huggingface.co/Salesforce" target="_blank">Salesforce</a> is now available in🤗Transformers! 
                This model was contributed by <a href="https://twitter.com/NielsRogge" target="_blank">nielsr</a>. 
                The BLIP-2 model was proposed in <a href="https://arxiv.org/abs/2301.12597" target="_blank">BLIP-2: Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models</a> 
                by Junnan Li, Dongxu Li, Silvio Savarese, Steven Hoi.<br><br>
                </p></div>""")
  
    with gr.Column(elem_id = "column_container", scale=2):
        #text_in = gr.Textbox(value='', placeholder="Type your questions here and press enter", elem_id = "input_prompt", visible=False, label='Great! Now you can ask questions to get more information about the image')
        btn_upload = gr.UploadButton("Upload image!", file_types=["image"], file_count="single", elem_id="upload_button")
        chatbot = gr.Chatbot(elem_id = 'chatbot-component', label='Converse with Images')
        text_in = gr.Textbox(value='', placeholder="Type your questions here and press enter", elem_id = "input_prompt", visible=False, label='Great! Now you can ask questions to get more information about the image')
        state_in = gr.State()
        counter_out = gr.Number(visible=False, value=0, precision=0)
        text_out = gr.Textbox(visible=False)  #getting imag name out
        image_hid = gr.Image(visible=False) #, type='pil')

  #Using Event Listeners
  btn_upload.upload(predict, [btn_upload, counter_out, image_hid, text_in, state_in], [chatbot, state_in, text_out, counter_out, image_hid])
  btn_upload.upload(fn = upload_textbox_config, inputs=text_in, outputs = text_in)

  text_in.submit(predict, [btn_upload, counter_out, image_hid, text_in, state_in], [chatbot, state_in, text_out, counter_out, image_hid])
  #text_in.submit(previous, [image_hid], [image_oneup])
  
  chatbot.change(fn = upload_button_config, outputs=btn_upload) #, scroll_to_output = True)
  #text_in.submit(None, [], [], _js = "() => document.getElementById('#chatbot-component').scrollTop = document.getElementById('#chatbot-component').scrollHeight")

  #with gr.Accordion("Release Notes", open=False):
  #gr.Markdown(help_text)
  gr.HTML("""<a href="https://huggingface.co/spaces/ysharma/InstructPix2Pix_Chatbot?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>Duplicate Space with GPU Upgrade for fast Inference & no queue<br>""")
    
demo.queue(concurrency_count=10)
demo.launch(debug=True) #, width="80%", height=2000)