Spaces:

IsraelSalgado
/

image-text

Build error

App Files Files Community

IsraelSalgado commited on Nov 29, 2024

Commit

8efbdfa

verified ·

1 Parent(s): 88c1435

Upload app2.py

Browse files

Files changed (1) hide show

app2.py +107 -0

app2.py ADDED Viewed

	@@ -0,0 +1,107 @@

+import time
+from transformers import TextIteratorStreamer
+from threading import Thread
+import os
+from transformers import AutoModelForImageTextToText, QuantoConfig
+from PIL import Image
+import io
+import requests
+from transformers import AutoProcessor, AutoModelForImageTextToText
+import torch
+import streamlit as st
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+def reduce_image_size(img, scale_percent=50):
+    """Reduces the image size by a specified percentage."""
+    width, height = img.size
+    new_width = int(width * scale_percent / 100)
+    new_height = int(height * scale_percent / 100)
+    resized_img = img.resize((new_width, new_height))
+    return resized_img
+def model_inference(
+    user_prompt, chat_history, max_new_tokens, images
+):
+    """Performs model inference using the provided inputs."""
+    user_prompt = {
+        "role": "user",
+        "content": [
+            {"type": "image"},
+            {"type": "text", "text": user_prompt},
+        ],
+    }
+    chat_history.append(user_prompt)
+    streamer = TextIteratorStreamer(
+        processor.tokenizer, skip_prompt=True, timeout=5.0
+    )
+    generation_args = {
+        "max_new_tokens": max_new_tokens,
+        "streamer": streamer,
+        "do_sample": False,
+    }
+    prompt = processor.apply_chat_template(chat_history, add_generation_prompt=True)
+    inputs = processor(text=prompt, images=images, return_tensors="pt").to(device)
+    generation_args.update(inputs)
+    thread = Thread(target=model.generate, kwargs=generation_args)
+    thread.start()
+    acc_text = ""
+    for text_token in streamer:
+        time.sleep(0.04)
+        acc_text += text_token
+        if acc_text.endswith("<end_of_utterance>"):
+            acc_text = acc_text[:-18]
+            yield acc_text
+    thread.join()
+def main():
+    """Main function of the Streamlit app."""
+    st.title("Text and Image Input App")
+    # Load the model and processor outside the loop (once)
+    global model, processor
+    if "model" not in st.session_state:
+        model_id = "HuggingFaceM4/idefics2-8b"
+        quantization_config = QuantoConfig(weights="int8")
+        processor = AutoProcessor.from_pretrained(model_id)
+        model = AutoModelForImageTextToText.from_pretrained(
+            model_id, device_map="cuda", quantization_config=quantization_config
+        )
+        st.session_state["model"] = model
+        st.session_state["processor"] = processor
+    model = st.session_state["model"]
+    processor = st.session_state["processor"]
+    # Get text input
+    text_input = st.text_input("Enter your text:")
+    # Get image input
+    image_input = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
+    if image_input is not None:
+        image = Image.open(image_input)
+        st.image(image, caption='Uploaded Image')
+        processed_image = reduce_image_size(image)
+    else:
+        image_url = st.text_input("Enter image URL:")
+        if image_url:
+            response = requests.get(image_url)
+            img = Image.open(io.BytesIO(response.content))
+            st.image(img, caption='Image from URL')
+            processed_image = reduce_image_size(img)
+    if st.button("Predict"):
+        if text_input and processed_image:
+            prediction = model_inference(
+                user_prompt="And what is in this image?",
+                chat_history=[],  # Initialize chat history here
+                max_new_tokens=100,
+                images=processed_image)