IsraelSalgado commited on
Commit
8efbdfa
·
verified ·
1 Parent(s): 88c1435

Upload app2.py

Browse files
Files changed (1) hide show
  1. app2.py +107 -0
app2.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ from transformers import TextIteratorStreamer
3
+ from threading import Thread
4
+ import os
5
+ from transformers import AutoModelForImageTextToText, QuantoConfig
6
+ from PIL import Image
7
+ import io
8
+ import requests
9
+ from transformers import AutoProcessor, AutoModelForImageTextToText
10
+ import torch
11
+ import streamlit as st
12
+
13
+
14
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
15
+
16
+
17
+ def reduce_image_size(img, scale_percent=50):
18
+ """Reduces the image size by a specified percentage."""
19
+ width, height = img.size
20
+ new_width = int(width * scale_percent / 100)
21
+ new_height = int(height * scale_percent / 100)
22
+ resized_img = img.resize((new_width, new_height))
23
+ return resized_img
24
+
25
+
26
+ def model_inference(
27
+ user_prompt, chat_history, max_new_tokens, images
28
+ ):
29
+ """Performs model inference using the provided inputs."""
30
+ user_prompt = {
31
+ "role": "user",
32
+ "content": [
33
+ {"type": "image"},
34
+ {"type": "text", "text": user_prompt},
35
+ ],
36
+ }
37
+ chat_history.append(user_prompt)
38
+ streamer = TextIteratorStreamer(
39
+ processor.tokenizer, skip_prompt=True, timeout=5.0
40
+ )
41
+
42
+ generation_args = {
43
+ "max_new_tokens": max_new_tokens,
44
+ "streamer": streamer,
45
+ "do_sample": False,
46
+ }
47
+
48
+ prompt = processor.apply_chat_template(chat_history, add_generation_prompt=True)
49
+ inputs = processor(text=prompt, images=images, return_tensors="pt").to(device)
50
+ generation_args.update(inputs)
51
+
52
+ thread = Thread(target=model.generate, kwargs=generation_args)
53
+ thread.start()
54
+
55
+ acc_text = ""
56
+ for text_token in streamer:
57
+ time.sleep(0.04)
58
+ acc_text += text_token
59
+ if acc_text.endswith("<end_of_utterance>"):
60
+ acc_text = acc_text[:-18]
61
+ yield acc_text
62
+
63
+ thread.join()
64
+
65
+ def main():
66
+ """Main function of the Streamlit app."""
67
+ st.title("Text and Image Input App")
68
+
69
+ # Load the model and processor outside the loop (once)
70
+ global model, processor
71
+ if "model" not in st.session_state:
72
+ model_id = "HuggingFaceM4/idefics2-8b"
73
+ quantization_config = QuantoConfig(weights="int8")
74
+ processor = AutoProcessor.from_pretrained(model_id)
75
+ model = AutoModelForImageTextToText.from_pretrained(
76
+ model_id, device_map="cuda", quantization_config=quantization_config
77
+ )
78
+ st.session_state["model"] = model
79
+ st.session_state["processor"] = processor
80
+
81
+ model = st.session_state["model"]
82
+ processor = st.session_state["processor"]
83
+
84
+ # Get text input
85
+ text_input = st.text_input("Enter your text:")
86
+
87
+ # Get image input
88
+ image_input = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
89
+ if image_input is not None:
90
+ image = Image.open(image_input)
91
+ st.image(image, caption='Uploaded Image')
92
+ processed_image = reduce_image_size(image)
93
+ else:
94
+ image_url = st.text_input("Enter image URL:")
95
+ if image_url:
96
+ response = requests.get(image_url)
97
+ img = Image.open(io.BytesIO(response.content))
98
+ st.image(img, caption='Image from URL')
99
+ processed_image = reduce_image_size(img)
100
+
101
+ if st.button("Predict"):
102
+ if text_input and processed_image:
103
+ prediction = model_inference(
104
+ user_prompt="And what is in this image?",
105
+ chat_history=[], # Initialize chat history here
106
+ max_new_tokens=100,
107
+ images=processed_image)