Spaces:

saa231
/

MutimodalVisionAssistant

Paused

saa231 commited on Apr 27, 2025

Commit

6a15e66

verified ·

1 Parent(s): 2826fcf

Update project_model.py

Files changed (1) hide show

project_model.py CHANGED Viewed

@@ -55,6 +55,7 @@ class VisualQAState:
     """
     def __init__(self):
         self.current_image: Image.Image = None
         self.visual_context: str = ""
         self.message_history = []
@@ -64,6 +65,7 @@ class VisualQAState:
         Resets context and starts new message history.
         """
         self.current_image = image
         self.visual_context = visual_context
         self.message_history = [
             {
@@ -186,8 +188,14 @@ def process_inputs(
     enable_tts: bool = True
 ):
     if image:
         visual_context, annotated_image = generate_visual_context(image)
         session.reset(image, visual_context)
     if audio_path:
         audio_text = whisper_pipe(audio_path)["text"]

     """
     def __init__(self):
         self.current_image: Image.Image = None
+        self.annotated_image: Image.Image = None
         self.visual_context: str = ""
         self.message_history = []
         Resets context and starts new message history.
         """
         self.current_image = image
+        self.annotated_image = annotated_image
         self.visual_context = visual_context
         self.message_history = [
             {
     enable_tts: bool = True
 ):
     if image:
+        # Generate visual context and annotated image
         visual_context, annotated_image = generate_visual_context(image)
+        # Reset session with the current image and visual context
         session.reset(image, visual_context)
+        # Set annotated image to session for future use (e.g., in follow-up)
+        session.annotated_image = annotated_image
     if audio_path:
         audio_text = whisper_pipe(audio_path)["text"]