llava-4bit

Runtime error

App Files Files Community

merve HF Staff commited on Dec 7, 2023

Commit

3d139ce

1 Parent(s): 47173ac

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -87

app.py CHANGED Viewed

@@ -1,7 +1,3 @@
-#!/usr/bin/env python
-from __future__ import annotations
 import os
 import string
@@ -21,9 +17,12 @@ quantization_config = BitsAndBytesConfig(
 pipe = pipeline("image-to-text", model=model_id, model_kwargs={"quantization_config": quantization_config})
 def extract_response_pairs(text):
     pattern = re.compile(r'(USER:.*?)ASSISTANT:(.*?)(?:$|USER:)', re.DOTALL)
     matches = pattern.findall(text)
     pairs = [(user.strip(), assistant.strip()) for user, assistant in matches]
@@ -37,26 +36,19 @@ def postprocess_output(output: str) -> str:
-def chat(image, text, temperature, length_penalty,
-         repetition_penalty, max_length, min_length, num_beams, top_p,
-         history_chat):
-  prompt = " ".join(history_chat)
-  prompt = f"USER: <image>\n{text}\nASSISTANT:"
   outputs = pipe(image, prompt=prompt,
-                  generate_kwargs={"temperature":temperature,
-                  "length_penalty":length_penalty,
-                  "repetition_penalty":repetition_penalty,
-                  "max_length":max_length,
-                  "min_length":min_length,
-                  "num_beams":num_beams,
-                  "top_p":top_p})
-  output = postprocess_output(outputs[0]["generated_text"])
-  history_chat.append(output)
   chat_val =  extract_response_pairs(" ".join(history_chat))
   return chat_val, history_chat
@@ -69,89 +61,31 @@ css = """
   """
 with gr.Blocks(css="style.css") as demo:
     gr.Markdown(DESCRIPTION)
-    gr.Markdown("**LLaVA, one of the greatest multimodal chat models is now available in transformers with 4-bit quantization! ⚡️  **")
-    gr.Markdown("**Try it in this demo 🤗 **")
     chatbot = gr.Chatbot(label="Chat", show_label=False)
-    gr.Markdown("Input image and text and start chatting 👇")
     with gr.Row():
       image = gr.Image(type="pil")
-      text_input = gr.Text(label="Chat Input", show_label=False, max_lines=3, container=False)
     history_chat = gr.State(value=[])
     with gr.Row():
         clear_chat_button = gr.Button("Clear")
         chat_button = gr.Button("Submit", variant="primary")
     with gr.Accordion(label="Advanced settings", open=False):
-        temperature = gr.Slider(
-            label="Temperature",
-            info="Used with nucleus sampling.",
-            minimum=0.5,
-            maximum=1.0,
-            step=0.1,
-            value=1.0,
-        )
-        length_penalty = gr.Slider(
-            label="Length Penalty",
-            info="Set to larger for longer sequence, used with beam search.",
-            minimum=-1.0,
-            maximum=2.0,
-            step=0.2,
-            value=1.0,
-        )
-        repetition_penalty = gr.Slider(
-            label="Repetition Penalty",
-            info="Larger value prevents repetition.",
-            minimum=1.0,
-            maximum=5.0,
-            step=0.5,
-            value=1.5,
-        )
         max_length = gr.Slider(
             label="Max Length",
             minimum=1,
-            maximum=512,
             step=1,
-            value=50,
-        )
-        min_length = gr.Slider(
-            label="Minimum Length",
-            minimum=1,
-            maximum=100,
-            step=1,
-            value=1,
-        )
-        num_beams = gr.Slider(
-            label="Number of Beams",
-            minimum=1,
-            maximum=10,
-            step=1,
-            value=5,
-        )
-        top_p = gr.Slider(
-            label="Top P",
-            info="Used with nucleus sampling.",
-            minimum=0.5,
-            maximum=1.0,
-            step=0.1,
-            value=0.9,
         )
     chat_output = [
         chatbot,
         history_chat
     ]
     chat_button.click(fn=chat, inputs=[image,
             text_input,
-            temperature,
-            length_penalty,
-            repetition_penalty,
             max_length,
-            min_length,
-            num_beams,
-            top_p,
             history_chat],
         outputs=chat_output,
         api_name="Chat",
@@ -160,13 +94,7 @@ with gr.Blocks(css="style.css") as demo:
     chat_inputs = [
         image,
         text_input,
-        temperature,
-        length_penalty,
-        repetition_penalty,
         max_length,
-        min_length,
-        num_beams,
-        top_p,
         history_chat
     ]
     text_input.submit(
@@ -201,4 +129,4 @@ with gr.Blocks(css="style.css") as demo:
 if __name__ == "__main__":
-    demo.queue(max_size=10).launch()

 import os
 import string
 pipe = pipeline("image-to-text", model=model_id, model_kwargs={"quantization_config": quantization_config})
+DESCRIPTION = "LLaVA is now available in transformers!"
 def extract_response_pairs(text):
     pattern = re.compile(r'(USER:.*?)ASSISTANT:(.*?)(?:$|USER:)', re.DOTALL)
     matches = pattern.findall(text)
+    print(matches)
     pairs = [(user.strip(), assistant.strip()) for user, assistant in matches]
+def chat(image, text, max_length, history_chat):
+  prompt = " ".join(history_chat) + f"USER: <image>\n{text}\nASSISTANT:"
   outputs = pipe(image, prompt=prompt,
+                  generate_kwargs={
+                  "max_length":max_length})
+  #output = postprocess_output(outputs[0]["generated_text"])
+  history_chat.append(outputs[0]["generated_text"])
   chat_val =  extract_response_pairs(" ".join(history_chat))
   return chat_val, history_chat
   """
 with gr.Blocks(css="style.css") as demo:
     gr.Markdown(DESCRIPTION)
     chatbot = gr.Chatbot(label="Chat", show_label=False)
     with gr.Row():
       image = gr.Image(type="pil")
+      text_input = gr.Text(label="Chat Input", show_label=False, max_lines=1, container=False)
     history_chat = gr.State(value=[])
     with gr.Row():
         clear_chat_button = gr.Button("Clear")
         chat_button = gr.Button("Submit", variant="primary")
     with gr.Accordion(label="Advanced settings", open=False):
         max_length = gr.Slider(
             label="Max Length",
             minimum=1,
+            maximum=200,
             step=1,
+            value=100,
         )
     chat_output = [
         chatbot,
         history_chat
     ]
     chat_button.click(fn=chat, inputs=[image,
             text_input,
             max_length,
             history_chat],
         outputs=chat_output,
         api_name="Chat",
     chat_inputs = [
         image,
         text_input,
         max_length,
         history_chat
     ]
     text_input.submit(
 if __name__ == "__main__":
+    demo.queue(max_size=10).launch(debug=True)