Spaces:

saa231
/

MutimodalVisionAssistant

Paused

saa231 commited on Apr 27, 2025

Commit

e2b61f7

verified ·

1 Parent(s): acdebd9

Update project_model.py

Files changed (1) hide show

project_model.py CHANGED Viewed

@@ -203,11 +203,19 @@ def process_inputs(
     session.add_question(question)
     # Create the structured input for Gemma, including image, question, and visual context
-    gemma_input = {
-        "images": session.annotated_image,  # Image context
-        "text": question + " " + session.visual_context  # Question and visual context combined as text
-    }
     # Process the input through Gemma
     gemma_output = gemma_pipe(gemma_input)

     session.add_question(question)
     # Create the structured input for Gemma, including image, question, and visual context
+    #gemma_input = {
+    #    "images": session.annotated_image,  # Image context
+    #    "text": question + " " + session.visual_context  # Question and visual context combined as text
+    #}
+    gemma_input = [{
+        "role": "user",
+        "content": [
+            {"type": "image", "image": session.annotated_image},   # Image context
+            {"type": "text", "text": question + " " + session.visual_context}  # Question and visual context combined as text
+            ]
+        }]
     # Process the input through Gemma
     gemma_output = gemma_pipe(gemma_input)