FGDemo / local_model.py
Artem
added a print statement lol
0b79131
import torch
import gradio as gr
from PIL import Image
from consts import BASE_MODEL
from transformers import AutoProcessor, Qwen2VLForConditionalGeneration
from qwen_vl_utils import process_vision_info
import time
device = "cuda" if torch.cuda.is_available() else "cpu"
model = Qwen2VLForConditionalGeneration.from_pretrained(BASE_MODEL)
processor = AutoProcessor.from_pretrained(BASE_MODEL)
def query_local(image: Image.Image, question: str):
start_time = time.time()
print("starting local inference at: %s" %( start_time))
if not image:
raise ValueError("Missing image")
messages = [
{
"role": "user",
"content": [
{"type": "image", "image": image},
{"type": "text", "text": question}
]
}
]
text = processor.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
images, video_inputs = process_vision_info(messages)
inputs = processor(
text=text,
images=images,
videos=video_inputs,
padding=True,
return_tensors="pt")
generated_ids = model.generate(**inputs, max_new_tokens=256)
print("inputs generated")
generated_ids_trimmed = [
out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
]
print("trimmed")
output_text = processor.batch_decode(
generated_ids_trimmed,
skip_special_tokens=True,
clean_up_tokenization_spaces=False
)
print("decoded")
print("local %s --- " % (time.time() - start_time))
return output_text[0]