llava-4bit

Runtime error

App Files Files Community

merve HF Staff commited on Dec 8, 2023

Commit

95dbe7e

1 Parent(s): 35dad4a

Update app.py

Browse files

Files changed (1) hide show

app.py +82 -50

app.py CHANGED Viewed

@@ -1,11 +1,12 @@
 import os
 import string
 import gradio as gr
 import PIL.Image
 import torch
 from transformers import BitsAndBytesConfig, pipeline
 import re
 DESCRIPTION = "# LLaVA 🌋"
@@ -21,48 +22,79 @@ pipe = pipeline("image-to-text", model=model_id, model_kwargs={"quantization_con
 def extract_response_pairs(text):
     turns = re.split(r'(USER:|ASSISTANT:)', text)[1:]
     turns = [turn.strip() for turn in turns if turn.strip()]
-    print(turns[1::2])
     conv_list = []
     for i in range(0, len(turns[1::2]), 2):
         if i + 1 < len(turns[1::2]):
-            conv_list.append((turns[1::2][i].lstrip(":"), turns[1::2][i + 1].lstrip(":")))
     return conv_list
-def postprocess_output(output: str) -> str:
-    if output and output[-1] not in string.punctuation:
-        output += "."
-    return output
-def chat(image, text, temperature, length_penalty,
-         repetition_penalty, max_length, min_length, top_p,
-         history_chat):
-  prompt = " ".join(history_chat) + f"USER: <image>\n{text}\nASSISTANT:"
-  outputs = pipe(image, prompt=prompt,
                   generate_kwargs={"temperature":temperature,
                   "length_penalty":length_penalty,
                   "repetition_penalty":repetition_penalty,
                   "max_length":max_length,
                   "min_length":min_length,
                   "top_p":top_p})
-  history_chat.append(outputs[0]["generated_text"])
-  chat_val =  extract_response_pairs(" ".join(history_chat))
-  return chat_val, history_chat
 css = """
   #mkd {
-    height: 500px;
-    overflow: auto;
-    border: 1px solid #ccc;
   }
   """
 with gr.Blocks(css="style.css") as demo:
@@ -74,16 +106,12 @@ with gr.Blocks(css="style.css") as demo:
     chatbot = gr.Chatbot(label="Chat", show_label=False)
     gr.Markdown("Input image and text and start chatting 👇")
     with gr.Row():
       image = gr.Image(type="pil")
       text_input = gr.Text(label="Chat Input", show_label=False, max_lines=3, container=False)
     history_chat = gr.State(value=[])
-    with gr.Row():
-        clear_chat_button = gr.Button("Clear")
-        chat_button = gr.Button("Submit", variant="primary")
     with gr.Accordion(label="Advanced settings", open=False):
         temperature = gr.Slider(
             label="Temperature",
@@ -135,18 +163,7 @@ with gr.Blocks(css="style.css") as demo:
         chatbot,
         history_chat
     ]
-    chat_button.click(fn=chat, inputs=[image,
-            text_input,
-            temperature,
-            length_penalty,
-            repetition_penalty,
-            max_length,
-            min_length,
-            top_p,
-            history_chat],
-        outputs=chat_output,
-        api_name="Chat",
-    )
     chat_inputs = [
         image,
@@ -159,15 +176,31 @@ with gr.Blocks(css="style.css") as demo:
         top_p,
         history_chat
     ]
     text_input.submit(
-        fn=chat,
-        inputs=chat_inputs,
-        outputs=chat_output
-    ).success(
-        fn=lambda: "",
-        outputs=chat_inputs,
-        queue=False,
-        api_name=False,
     )
     clear_chat_button.click(
         fn=lambda: ([], []),
@@ -187,7 +220,6 @@ with gr.Blocks(css="style.css") as demo:
             history_chat
         ],
         queue=False)
     examples = [["./examples/baklava.png", "How to make this pastry?"],["./examples/bee.png","Describe this image."]]
     gr.Examples(examples=examples, inputs=[image, text_input, chat_inputs])
@@ -195,4 +227,4 @@ with gr.Blocks(css="style.css") as demo:
 if __name__ == "__main__":
-    demo.queue(max_size=10).launch()

 import os
 import string
+import copy
 import gradio as gr
 import PIL.Image
 import torch
 from transformers import BitsAndBytesConfig, pipeline
 import re
+import time
 DESCRIPTION = "# LLaVA 🌋"
 def extract_response_pairs(text):
     turns = re.split(r'(USER:|ASSISTANT:)', text)[1:]
     turns = [turn.strip() for turn in turns if turn.strip()]
     conv_list = []
     for i in range(0, len(turns[1::2]), 2):
         if i + 1 < len(turns[1::2]):
+            conv_list.append([turns[1::2][i].lstrip(":"), turns[1::2][i + 1].lstrip(":")])
     return conv_list
+def add_text(history, text):
+  history = history.append([text, None])
+  return history, text
+def infer(image, prompt,
+            temperature,
+            length_penalty,
+            repetition_penalty,
+            max_length,
+            min_length,
+            top_p):
+  outputs = pipe(images=image, prompt=prompt,
                   generate_kwargs={"temperature":temperature,
                   "length_penalty":length_penalty,
                   "repetition_penalty":repetition_penalty,
                   "max_length":max_length,
                   "min_length":min_length,
                   "top_p":top_p})
+  inference_output = outputs[0]["generated_text"]
+  return inference_output
+def bot(history_chat, text_input, image,
+            temperature,
+            length_penalty,
+            repetition_penalty,
+            max_length,
+            min_length,
+            top_p):
+  chat_history = " ".join(history_chat) # history as a str to be passed to model
+  chat_history = chat_history + f"USER: <image>\n{text_input}\nASSISTANT:" # add text input for prompting
+  inference_result = infer(image, chat_history,
+            temperature,
+            length_penalty,
+            repetition_penalty,
+            max_length,
+            min_length,
+            top_p)
+  # return inference and parse for new history
+  chat_val = extract_response_pairs(inference_result)
+  # create history list for yielding the last inference response
+  chat_state_list = copy.deepcopy(chat_val)
+  chat_state_list[-1][1] = "" # empty last response
+  # add characters iteratively
+  for character in chat_val[-1][1]:
+    chat_state_list[-1][1] += character
+    time.sleep(0.05)
+    # yield history but with last response being streamed
+    print(chat_state_list)
+    yield chat_state_list
 css = """
   #mkd {
+    height: 500px;
+    overflow: auto;
+    border: 1px solid #ccc;
   }
   """
 with gr.Blocks(css="style.css") as demo:
     chatbot = gr.Chatbot(label="Chat", show_label=False)
     gr.Markdown("Input image and text and start chatting 👇")
     with gr.Row():
       image = gr.Image(type="pil")
       text_input = gr.Text(label="Chat Input", show_label=False, max_lines=3, container=False)
     history_chat = gr.State(value=[])
     with gr.Accordion(label="Advanced settings", open=False):
         temperature = gr.Slider(
             label="Temperature",
         chatbot,
         history_chat
     ]
     chat_inputs = [
         image,
         top_p,
         history_chat
     ]
+    with gr.Row():
+      clear_chat_button = gr.Button("Clear")
+      chat_button = gr.Button("Submit", variant="primary")
+      chat_button.click(add_text, [chatbot, text_input], [chatbot, text_input]).then(bot, [chatbot, text_input,
+                                                                                           image, temperature,
+        length_penalty,
+        repetition_penalty,
+        max_length,
+        min_length,
+        top_p], chatbot)
     text_input.submit(
+        add_text,
+        [chatbot, text_input],
+        [chatbot, text_input]
+    ).then(
+        fn=bot,
+        inputs=[chatbot, text_input, image, temperature,
+        length_penalty,
+        repetition_penalty,
+        max_length,
+        min_length,
+        top_p],
+        outputs=chatbot
     )
     clear_chat_button.click(
         fn=lambda: ([], []),
             history_chat
         ],
         queue=False)
     examples = [["./examples/baklava.png", "How to make this pastry?"],["./examples/bee.png","Describe this image."]]
     gr.Examples(examples=examples, inputs=[image, text_input, chat_inputs])
 if __name__ == "__main__":
+    demo.queue(max_size=10).launch(debug=True)