Spaces:

m7mdal7aj
/

KB-VQA

Sleeping

App Files Files Community

m7mdal7aj commited on Feb 29, 2024

Commit

571fea7

verified ·

1 Parent(s): 1d707c1

Update my_model/tabs/run_inference.py

Browse files

Files changed (1) hide show

my_model/tabs/run_inference.py +95 -27

my_model/tabs/run_inference.py CHANGED Viewed

@@ -18,13 +18,12 @@ from my_model.config import inference_config as config
 class InferenceRunner(StateManager):
     """
-    InferenceRunner manages the user interface and interactions for a Streamlit-based
-    Knowledge-Based Visual Question Answering (KBVQA) application. It handles image uploads,
-    displays sample images, and facilitates the question-answering process using the KBVQA model.
-    it inherits the StateManager class.
     """
-    def __init__(self):
         """
         Initializes the InferenceRunner instance, setting up the necessary state.
         """
@@ -32,16 +31,17 @@ class InferenceRunner(StateManager):
         super().__init__()
-    def answer_question(self, caption, detected_objects_str, question):
         """
-        Generates an answer to a given question based on the image's caption and detected objects.
         Args:
-            caption (str): The caption generated for the image.
-            detected_objects_str (str): String representation of objects detected in the image.
-            question (str): The user's question about the image.
         Returns:
-            str: The generated answer to the question.
         """
         free_gpu_resources()
         answer = st.session_state.kbvqa.generate_answer(question, caption, detected_objects_str)
@@ -50,7 +50,11 @@ class InferenceRunner(StateManager):
         return answer, prompt_length
-    def display_sample_images(self):
         self.col1.write("Choose from sample images:")
         cols = self.col1.columns(len(config.SAMPLE_IMAGES))
         for idx, sample_image_path in enumerate(config.SAMPLE_IMAGES):
@@ -61,18 +65,39 @@ class InferenceRunner(StateManager):
                 if st.button(f'Select Sample Image {idx + 1}', key=f'sample_{idx}'):
                     self.process_new_image(sample_image_path, image)
-    def handle_image_upload(self):
         uploaded_image = self.col1.file_uploader("Or upload an Image", type=["png", "jpg", "jpeg"])
         if uploaded_image is not None:
             self.process_new_image(uploaded_image.name, Image.open(uploaded_image))
-    def display_image_and_analysis(self, image_key, image_data, nested_col21, nested_col22):
         image_for_display = self.resize_image(image_data['image'], 600)
         nested_col21.image(image_for_display, caption=f'Uploaded Image: {image_key[-11:]}')
         self.handle_analysis_button(image_key, image_data, nested_col22)
-    def handle_analysis_button(self, image_key, image_data, nested_col22):
         if not image_data['analysis_done'] or self.settings_changed or self.confidance_change:
             nested_col22.text("Please click 'Analyze Image'..")
             analyze_button_key = f'analyze_{image_key}_{st.session_state.detection_model}_{st.session_state.confidence_level}'
@@ -81,29 +106,63 @@ class InferenceRunner(StateManager):
                 self.update_image_data(image_key, caption, detected_objects_str, True)
             st.session_state['loading_in_progress'] = False
-    def handle_question_answering(self, image_key, image_data, nested_col22):
-        # Initialize qa_history for each image
-        #qa_history = image_data.get('qa_history', [])
         if image_data['analysis_done']:
             self.display_question_answering_interface(image_key, image_data, nested_col22)
         if self.settings_changed or self.confidance_change:
             nested_col22.warning("Confidence level changed, please click 'Analyze Image'.")
-    def display_question_answering_interface(self, image_key, image_data, nested_col22):
         sample_questions = config.SAMPLE_QUESTIONS.get(image_key, [])
         selected_question = nested_col22.selectbox("Select a sample question or type your own:", ["Custom question..."] + sample_questions, key=f'sample_question_{image_key}')
-        custom_question = nested_col22.text_input("Or ask your own question:", key=f'custom_question_{image_key}')
-        question = custom_question if selected_question == "Custom question..." else selected_question
         self.process_question(image_key, question, image_data, nested_col22)
         qa_history = image_data.get('qa_history', [])
         for num, (q, a, p) in enumerate(qa_history):
             nested_col22.text(f"Q{num+1}: {q}\nA{num+1}: {a}\nPrompt Length: {p}\n")
-    def process_question(self, image_key, question, image_data, nested_col22):
         qa_history = image_data.get('qa_history', [])
         if question and (question not in [q for q, _, _ in qa_history] or self.settings_changed or self.confidance_change):
             if nested_col22.button('Get Answer', key=f'answer_{image_key}', disabled=self.is_widget_disabled):
@@ -111,7 +170,14 @@ class InferenceRunner(StateManager):
                 self.add_to_qa_history(image_key, question, answer, prompt_length)
                # nested_col22.text(f"Q: {question}\nA: {answer}\nPrompt Length: {prompt_length}")
-    def image_qa_app(self):
         self.display_sample_images()
         self.handle_image_upload()
         self.display_session_state()
@@ -126,9 +192,10 @@ class InferenceRunner(StateManager):
     def run_inference(self):
         """
-        Sets up the widgets and manages the inference process. This method handles model loading,
-        reloading, and the overall flow of the inference process based on user interactions.
         """
         self.set_up_widgets()
@@ -195,6 +262,7 @@ class InferenceRunner(StateManager):
         if self.is_model_loaded:
             free_gpu_resources()
             st.session_state['loading_in_progress'] = False
-            self.image_qa_app()

 class InferenceRunner(StateManager):
     """
+    Manages the user interface and interactions for a Streamlit-based Knowledge-Based Visual Question Answering (KBVQA) application.
+    This class handles image uploads, displays sample images, and facilitates the question-answering process using the KBVQA model.
+    Inherits from the StateManager class.
     """
+    def __init__(self) -> None:
         """
         Initializes the InferenceRunner instance, setting up the necessary state.
         """
         super().__init__()
+    def answer_question(self, caption: str, detected_objects_str: str, question: str) -> Tuple[str, int]:
         """
+        Generates an answer to a user's question based on the image's caption and detected objects.
         Args:
+            caption (str): Caption generated for the image.
+            detected_objects_str (str): String representation of detected objects in the image.
+            question (str): User's question about the image.
         Returns:
+            tuple: A tuple containing the answer to the question and the prompt length.
         """
         free_gpu_resources()
         answer = st.session_state.kbvqa.generate_answer(question, caption, detected_objects_str)
         return answer, prompt_length
+    def display_sample_images(self) -> None:
+        """
+        Displays sample images as clickable thumbnails for the user to select.
+        """
         self.col1.write("Choose from sample images:")
         cols = self.col1.columns(len(config.SAMPLE_IMAGES))
         for idx, sample_image_path in enumerate(config.SAMPLE_IMAGES):
                 if st.button(f'Select Sample Image {idx + 1}', key=f'sample_{idx}'):
                     self.process_new_image(sample_image_path, image)
+    def handle_image_upload(self) -> None:
+        """
+        Provides an image uploader widget for the user to upload their own images.
+        """
         uploaded_image = self.col1.file_uploader("Or upload an Image", type=["png", "jpg", "jpeg"])
         if uploaded_image is not None:
             self.process_new_image(uploaded_image.name, Image.open(uploaded_image))
+    def display_image_and_analysis(self, image_key: str, image_data: dict, nested_col21, nested_col22) -> None:
+        """
+        Displays the uploaded or selected image and provides an option to analyze the image.
+        Args:
+            image_key (str): Unique key identifying the image.
+            image_data (dict): Data associated with the image.
+            nested_col21 (streamlit column): Column for displaying the image.
+            nested_col22 (streamlit column): Column for displaying the analysis button.
+        """
         image_for_display = self.resize_image(image_data['image'], 600)
         nested_col21.image(image_for_display, caption=f'Uploaded Image: {image_key[-11:]}')
         self.handle_analysis_button(image_key, image_data, nested_col22)
+    def handle_analysis_button(self, image_key: str, image_data: dict, nested_col22) -> None:
+        """
+        Provides an 'Analyze Image' button and processes the image analysis upon click.
+        Args:
+            image_key (str): Unique key identifying the image.
+            image_data (dict): Data associated with the image.
+            nested_col22 (streamlit column): Column for displaying the analysis button.
+        """
         if not image_data['analysis_done'] or self.settings_changed or self.confidance_change:
             nested_col22.text("Please click 'Analyze Image'..")
             analyze_button_key = f'analyze_{image_key}_{st.session_state.detection_model}_{st.session_state.confidence_level}'
                 self.update_image_data(image_key, caption, detected_objects_str, True)
             st.session_state['loading_in_progress'] = False
+    def handle_question_answering(self, image_key: str, image_data: dict, nested_col22) -> None:
+        """
+        Manages the question-answering interface for each image.
+        Args:
+            image_key (str): Unique key identifying the image.
+            image_data (dict): Data associated with the image.
+            nested_col22 (streamlit column): Column for displaying the question-answering interface.
+        """
         if image_data['analysis_done']:
             self.display_question_answering_interface(image_key, image_data, nested_col22)
         if self.settings_changed or self.confidance_change:
             nested_col22.warning("Confidence level changed, please click 'Analyze Image'.")
+    def display_question_answering_interface(self, image_key: str, image_data: Dict, nested_col22: st.columns) -> None:
+        """
+        Displays the interface for question answering, including sample questions and a custom question input.
+        Args:
+            image_key (str): Unique key identifying the image.
+            image_data (dict): Data associated with the image.
+            nested_col22 (streamlit column): The column where the interface will be displayed.
+        """
         sample_questions = config.SAMPLE_QUESTIONS.get(image_key, [])
         selected_question = nested_col22.selectbox("Select a sample question or type your own:", ["Custom question..."] + sample_questions, key=f'sample_question_{image_key}')
+        # Display custom question input only if "Custom question..." is selected
+        question = selected_question
+        if selected_question == "Custom question...":
+            custom_question = nested_col22.text_input("Or ask your own question:", key=f'custom_question_{image_key}')
+            question = custom_question
         self.process_question(image_key, question, image_data, nested_col22)
         qa_history = image_data.get('qa_history', [])
         for num, (q, a, p) in enumerate(qa_history):
             nested_col22.text(f"Q{num+1}: {q}\nA{num+1}: {a}\nPrompt Length: {p}\n")
+    def process_question(self, image_key: str, question: str, image_data: Dict, nested_col22: st.columns) -> None:
+        """
+        Processes the user's question, generates an answer, and updates the question-answer history.
+        Args:
+            image_key (str): Unique key identifying the image.
+            question (str): The question asked by the user.
+            image_data (Dict): Data associated with the image.
+            nested_col22 (streamlit column): The column where the answer will be displayed.
+        This method checks if the question is new or if settings have changed, and if so, generates an answer using the KBVQA model.
+        It then updates the question-answer history for the image.
+        """
         qa_history = image_data.get('qa_history', [])
         if question and (question not in [q for q, _, _ in qa_history] or self.settings_changed or self.confidance_change):
             if nested_col22.button('Get Answer', key=f'answer_{image_key}', disabled=self.is_widget_disabled):
                 self.add_to_qa_history(image_key, question, answer, prompt_length)
                # nested_col22.text(f"Q: {question}\nA: {answer}\nPrompt Length: {prompt_length}")
+    def image_qa_app(self) -> None:
+        """
+        Main application interface for image-based question answering.
+        This method orchestrates the display of sample images, handles image uploads, and facilitates the question-answering process.
+        It iterates through each image in the session state, displaying the image and providing interfaces for image analysis and question answering.
+        """
         self.display_sample_images()
         self.handle_image_upload()
         self.display_session_state()
     def run_inference(self):
         """
+        Sets up widgets and manages the inference process, including model loading and reloading,
+        based on user interactions.
+        This method orchestrates the overall flow of the inference process.
         """
         self.set_up_widgets()
         if self.is_model_loaded:
             free_gpu_resources()
             st.session_state['loading_in_progress'] = False
+            self.image_qa_app() # this is the main Q/A Application