gen3-visual

Sleeping

sajofu commited on Aug 16, 2025

Commit

bea8c57

verified ·

1 Parent(s): 9661f49

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -32,6 +32,9 @@ def transcribe_image(image):
         return_tensors="pt"
     ).to(device)
     with torch.inference_mode():
         output = model.generate(
             **inputs,
@@ -67,6 +70,9 @@ def visual_question_answer(image, question):
         return_tensors="pt"
     ).to(device)
     with torch.inference_mode():
         output = model.generate(
             **inputs,

         return_tensors="pt"
     ).to(device)
+    # FIX: Ensure the image tensor's dtype matches the model's dtype
+    inputs["pixel_values"] = inputs["pixel_values"].to(model.dtype)
     with torch.inference_mode():
         output = model.generate(
             **inputs,
         return_tensors="pt"
     ).to(device)
+    # FIX: Ensure the image tensor's dtype matches the model's dtype
+    inputs["pixel_values"] = inputs["pixel_values"].to(model.dtype)
     with torch.inference_mode():
         output = model.generate(
             **inputs,