Spaces:
Runtime error
Runtime error
123
Browse files- app.py +35 -20
- requirements.txt +3 -1
app.py
CHANGED
|
@@ -17,6 +17,8 @@ import cv2
|
|
| 17 |
|
| 18 |
from transformers import (
|
| 19 |
Qwen2_5_VLForConditionalGeneration,
|
|
|
|
|
|
|
| 20 |
AutoModel,
|
| 21 |
AutoTokenizer,
|
| 22 |
AutoProcessor,
|
|
@@ -29,6 +31,9 @@ from llm_json import parse_llm_json
|
|
| 29 |
from data_experiments import all_products, all_experiments, filter_experiments, get_experiment
|
| 30 |
import llm_messages_v1
|
| 31 |
|
|
|
|
|
|
|
|
|
|
| 32 |
llm_messages = llm_messages_v1
|
| 33 |
|
| 34 |
# Constants for text generation
|
|
@@ -38,24 +43,32 @@ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
|
|
| 38 |
|
| 39 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
| 40 |
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
trust_remote_code=True,
|
| 47 |
torch_dtype=torch.float16
|
| 48 |
).to(device).eval()
|
| 49 |
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
MODEL_ID_X,
|
| 55 |
trust_remote_code=True,
|
| 56 |
torch_dtype=torch.float16
|
| 57 |
).to(device).eval()
|
| 58 |
|
|
|
|
|
|
|
| 59 |
@spaces.GPU
|
| 60 |
def process_image(
|
| 61 |
model_name: str,
|
|
@@ -167,17 +180,19 @@ def process_image(
|
|
| 167 |
full_json_text = json.dumps(full_json, indent=2, ensure_ascii=False)
|
| 168 |
yield full_text, full_json_text
|
| 169 |
|
| 170 |
-
def get_processor_and_model(model_name: str) -> tuple[Union[AutoProcessor, None], Union[
|
| 171 |
-
if model_name ==
|
| 172 |
-
return
|
| 173 |
-
elif model_name ==
|
| 174 |
-
return
|
|
|
|
|
|
|
| 175 |
else:
|
| 176 |
raise (None, None)
|
| 177 |
|
| 178 |
def process_image_safety_state(
|
| 179 |
processor: AutoProcessor,
|
| 180 |
-
model:
|
| 181 |
image: Image.Image,
|
| 182 |
max_new_tokens: int,
|
| 183 |
generateion_config: GenerationConfig,
|
|
@@ -202,7 +217,7 @@ def process_image_safety_state(
|
|
| 202 |
|
| 203 |
def process_image_sharing_state(
|
| 204 |
processor: AutoProcessor,
|
| 205 |
-
model:
|
| 206 |
image: Image.Image,
|
| 207 |
max_new_tokens: int,
|
| 208 |
generateion_config: GenerationConfig,
|
|
@@ -227,7 +242,7 @@ def process_image_sharing_state(
|
|
| 227 |
|
| 228 |
def process_image_approval_state(
|
| 229 |
processor: AutoProcessor,
|
| 230 |
-
model:
|
| 231 |
experiment: dict,
|
| 232 |
image: Image.Image,
|
| 233 |
additional_text: Union[str, None],
|
|
@@ -386,9 +401,9 @@ with gr.Blocks() as demo:
|
|
| 386 |
image_upload = gr.Image(type="pil", label="Image")
|
| 387 |
submit_button = gr.Button("Submit", elem_classes="submit-btn")
|
| 388 |
model_choice = gr.Radio(
|
| 389 |
-
choices=[
|
| 390 |
label="Select Model",
|
| 391 |
-
value=
|
| 392 |
)
|
| 393 |
with gr.Accordion("Advanced options", open=False):
|
| 394 |
max_new_tokens = gr.Slider(label="Max new tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS)
|
|
|
|
| 17 |
|
| 18 |
from transformers import (
|
| 19 |
Qwen2_5_VLForConditionalGeneration,
|
| 20 |
+
MllamaForConditionalGeneration,
|
| 21 |
+
GenerationMixin,
|
| 22 |
AutoModel,
|
| 23 |
AutoTokenizer,
|
| 24 |
AutoProcessor,
|
|
|
|
| 31 |
from data_experiments import all_products, all_experiments, filter_experiments, get_experiment
|
| 32 |
import llm_messages_v1
|
| 33 |
|
| 34 |
+
from dotenv import load_dotenv
|
| 35 |
+
load_dotenv()
|
| 36 |
+
|
| 37 |
llm_messages = llm_messages_v1
|
| 38 |
|
| 39 |
# Constants for text generation
|
|
|
|
| 43 |
|
| 44 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
| 45 |
|
| 46 |
+
MODEL_ID_QWEN_7B = "Qwen/Qwen2.5-VL-7B-Instruct"
|
| 47 |
+
processor_qwen_7b = AutoProcessor.from_pretrained(MODEL_ID_QWEN_7B, trust_remote_code=True)
|
| 48 |
+
model_qwen_7b = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
| 49 |
+
MODEL_ID_QWEN_7B,
|
| 50 |
+
trust_remote_code=True,
|
| 51 |
+
torch_dtype=torch.float16
|
| 52 |
+
).to(device).eval()
|
| 53 |
+
|
| 54 |
+
MODEL_ID_QWEN_3B = "Qwen/Qwen2.5-VL-3B-Instruct"
|
| 55 |
+
processor_qwen_3b = AutoProcessor.from_pretrained(MODEL_ID_QWEN_3B, trust_remote_code=True)
|
| 56 |
+
model_qwen_3b = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
| 57 |
+
MODEL_ID_QWEN_3B,
|
| 58 |
trust_remote_code=True,
|
| 59 |
torch_dtype=torch.float16
|
| 60 |
).to(device).eval()
|
| 61 |
|
| 62 |
+
MODEL_ID_LLAMA = "meta-llama/Llama-3.2-11B-Vision-Instruct"
|
| 63 |
+
processor_llama = AutoProcessor.from_pretrained(MODEL_ID_LLAMA, trust_remote_code=True)
|
| 64 |
+
model_llama = MllamaForConditionalGeneration.from_pretrained(
|
| 65 |
+
MODEL_ID_LLAMA,
|
|
|
|
| 66 |
trust_remote_code=True,
|
| 67 |
torch_dtype=torch.float16
|
| 68 |
).to(device).eval()
|
| 69 |
|
| 70 |
+
openai_api_key = os.getenv("OPENAI_API_KEY")
|
| 71 |
+
|
| 72 |
@spaces.GPU
|
| 73 |
def process_image(
|
| 74 |
model_name: str,
|
|
|
|
| 180 |
full_json_text = json.dumps(full_json, indent=2, ensure_ascii=False)
|
| 181 |
yield full_text, full_json_text
|
| 182 |
|
| 183 |
+
def get_processor_and_model(model_name: str) -> tuple[Union[AutoProcessor, None], Union[GenerationMixin, None]]:
|
| 184 |
+
if model_name == MODEL_ID_QWEN_7B:
|
| 185 |
+
return processor_qwen_7b, model_qwen_7b
|
| 186 |
+
elif model_name == MODEL_ID_QWEN_3B:
|
| 187 |
+
return processor_qwen_3b, model_qwen_3b
|
| 188 |
+
elif model_name == MODEL_ID_LLAMA:
|
| 189 |
+
return processor_llama, model_llama
|
| 190 |
else:
|
| 191 |
raise (None, None)
|
| 192 |
|
| 193 |
def process_image_safety_state(
|
| 194 |
processor: AutoProcessor,
|
| 195 |
+
model: GenerationMixin,
|
| 196 |
image: Image.Image,
|
| 197 |
max_new_tokens: int,
|
| 198 |
generateion_config: GenerationConfig,
|
|
|
|
| 217 |
|
| 218 |
def process_image_sharing_state(
|
| 219 |
processor: AutoProcessor,
|
| 220 |
+
model: GenerationMixin,
|
| 221 |
image: Image.Image,
|
| 222 |
max_new_tokens: int,
|
| 223 |
generateion_config: GenerationConfig,
|
|
|
|
| 242 |
|
| 243 |
def process_image_approval_state(
|
| 244 |
processor: AutoProcessor,
|
| 245 |
+
model: GenerationMixin,
|
| 246 |
experiment: dict,
|
| 247 |
image: Image.Image,
|
| 248 |
additional_text: Union[str, None],
|
|
|
|
| 401 |
image_upload = gr.Image(type="pil", label="Image")
|
| 402 |
submit_button = gr.Button("Submit", elem_classes="submit-btn")
|
| 403 |
model_choice = gr.Radio(
|
| 404 |
+
choices=[MODEL_ID_QWEN_7B, MODEL_ID_QWEN_3B, MODEL_ID_LLAMA],
|
| 405 |
label="Select Model",
|
| 406 |
+
value=MODEL_ID_QWEN_3B
|
| 407 |
)
|
| 408 |
with gr.Accordion("Advanced options", open=False):
|
| 409 |
max_new_tokens = gr.Slider(label="Max new tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS)
|
requirements.txt
CHANGED
|
@@ -10,4 +10,6 @@ accelerate
|
|
| 10 |
pillow
|
| 11 |
opencv-python
|
| 12 |
av
|
| 13 |
-
demjson3
|
|
|
|
|
|
|
|
|
| 10 |
pillow
|
| 11 |
opencv-python
|
| 12 |
av
|
| 13 |
+
demjson3
|
| 14 |
+
dotenv
|
| 15 |
+
openai
|