mknolan
/

internvl25-image-analyzer

Model card Files Files and versions

xet

Community

mknolan commited on Mar 22, 2025

Commit

04de987

verified ·

1 Parent(s): f8ebad0

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +358 -8

app.py CHANGED Viewed

@@ -19,6 +19,123 @@ from pdf2image import convert_from_path, convert_from_bytes
 import tempfile
 import logging
 import traceback
 # Constants
 IMAGENET_MEAN = (0.485, 0.456, 0.406)
@@ -35,13 +152,19 @@ os.makedirs(OUTPUT_DIR, exist_ok=True)
 # Set up logging to write to saved_outputs directory
 log_file = os.path.join(OUTPUT_DIR, f"debug_log_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.log")
 # Configure logging
 logging.basicConfig(
     level=logging.DEBUG,
     format='%(asctime)s [%(levelname)s] %(message)s',
     handlers=[
         logging.FileHandler(log_file),
-        logging.StreamHandler(sys.stdout)
     ]
 )
@@ -580,6 +703,7 @@ def analyze_with_prompt(image_input, prompt):
             img = img.convert('RGB')
             # Get raw analysis from model
             result = process_image_with_text(img, prompt)
             results.append(result)
@@ -1216,6 +1340,11 @@ def analyze_folder_images(folder_path, prompt):
 # Function to process an image with text prompt
 def process_image_with_text(image, prompt):
     """Process a single image with the InternVL model and a text prompt."""
     try:
         logger.info(f"process_image_with_text called with image type: {type(image)}")
@@ -1299,8 +1428,13 @@ def process_image_with_text(image, prompt):
             logger.info(f"Final tensor prepared: shape={tensor.shape}, device={tensor.device}, dtype={tensor.dtype}")
         except Exception as tensor_err:
-            logger.error(f"Error in tensor creation: {str(tensor_err)}")
             logger.error(traceback.format_exc())
             return f"Error preparing image for analysis: {str(tensor_err)}"
         # Process the prompt
@@ -1332,10 +1466,19 @@ def process_image_with_text(image, prompt):
                 output = tokenizer.decode(output_ids[0], skip_special_tokens=True)
                 logger.debug(f"Decoded output length: {len(output)} chars")
                 return output.strip()
             except Exception as gen_error:
-                logger.error(f"Direct generation failed: {str(gen_error)}")
                 logger.error(traceback.format_exc())
                 # Approach 2: Try the chat method
                 try:
@@ -1360,10 +1503,19 @@ def process_image_with_text(image, prompt):
                     logger.info("Chat method successful")
                     logger.debug(f"Chat response length: {len(response)} chars")
                     return response.strip()
                 except Exception as chat_error:
-                    logger.error(f"Chat method failed: {str(chat_error)}")
                     logger.error(traceback.format_exc())
                     # Approach 3: Try direct model forward pass
                     try:
@@ -1399,22 +1551,47 @@ def process_image_with_text(image, prompt):
                                 response = tokenizer.decode(pred_ids[0], skip_special_tokens=True)
                                 logger.debug(f"Decoded response length: {len(response)} chars")
                                 return response.strip()
                             else:
-                                logger.error("Model output does not contain logits")
                                 return "Failed to analyze image - model output contains no usable data"
                         else:
-                            logger.error("Model does not have forward method")
                             return "Failed to analyze image - model doesn't support direct calling"
                     except Exception as forward_error:
-                        logger.error(f"Forward method failed: {str(forward_error)}")
                         logger.error(traceback.format_exc())
                         # All methods failed
                         return f"Error generating analysis: All methods failed to process the image"
     except Exception as e:
-        logger.error(f"Fatal error in process_image_with_text: {str(e)}")
         logger.error(traceback.format_exc())
         return f"Error processing image: {str(e)}"
 # Function to get log file content
@@ -1461,11 +1638,184 @@ def main():
         "Summarize what you see in this image in one paragraph."
     ]
     # Create tabs for different modes
     with gr.Blocks(title="InternVL2.5 Image Analyzer", theme=gr.themes.Soft()) as demo:
         gr.Markdown("# InternVL2.5 Image Analyzer")
         gr.Markdown("Analyze images using the InternVL2.5 model. You can upload individual images or analyze all images in a folder.")
         with gr.Tabs():
             # Tab for single image analysis
             with gr.Tab("Single Image Analysis"):

 import tempfile
 import logging
 import traceback
+import io
+import threading
+import queue
+from typing import List, Dict, Any
+# In-memory stats for GUI debug display
+gui_stats = {
+    'errors': 0,
+    'warnings': 0,
+    'last_error': 'None',
+    'last_warning': 'None',
+    'last_error_time': '',
+    'last_warning_time': '',
+    'operations_completed': 0,
+    'start_time': datetime.datetime.now(),
+    'tensor_issues': 0
+}
+# Function to get stats for UI display
+def get_debug_stats():
+    uptime = datetime.datetime.now() - gui_stats['start_time']
+    hours, remainder = divmod(uptime.seconds, 3600)
+    minutes, seconds = divmod(remainder, 60)
+    uptime_str = f"{hours}h {minutes}m {seconds}s"
+    return {
+        'errors': gui_stats['errors'],
+        'warnings': gui_stats['warnings'],
+        'last_error': gui_stats['last_error'],
+        'last_error_time': gui_stats['last_error_time'],
+        'last_warning': gui_stats['last_warning'],
+        'last_warning_time': gui_stats['last_warning_time'],
+        'operations': gui_stats['operations_completed'],
+        'uptime': uptime_str,
+        'tensor_issues': gui_stats['tensor_issues']
+    }
+# Function to format debug stats as HTML
+def format_debug_stats_html():
+    stats = get_debug_stats()
+    error_color = "#ff5555" if stats['errors'] > 0 else "#555555"
+    warning_color = "#ffaa00" if stats['warnings'] > 0 else "#555555"
+    html = f"""
+    <div style="margin: 10px 0; padding: 10px; border: 1px solid #ddd; border-radius: 4px; background-color: #f9f9f9;">
+        <div style="display: flex; justify-content: space-between;">
+            <div style="flex: 1;">
+                <p><strong>Errors:</strong> <span style="color: {error_color};">{stats['errors']}</span></p>
+                <p><strong>Warnings:</strong> <span style="color: {warning_color};">{stats['warnings']}</span></p>
+                <p><strong>Operations:</strong> {stats['operations']}</p>
+            </div>
+            <div style="flex: 1;">
+                <p><strong>Uptime:</strong> {stats['uptime']}</p>
+                <p><strong>Tensor Issues:</strong> {stats['tensor_issues']}</p>
+            </div>
+        </div>
+        <div style="margin-top: 10px; border-top: 1px solid #ddd; padding-top: 10px;">
+            <p><strong>Last Error:</strong> {stats['last_error_time']} - {stats['last_error']}</p>
+            <p><strong>Last Warning:</strong> {stats['last_warning_time']} - {stats['last_warning']}</p>
+        </div>
+    </div>
+    """
+    return html
+# Custom logging handler that captures logs for GUI display
+class GUILogHandler(logging.Handler):
+    def __init__(self, max_entries=100):
+        super().__init__()
+        self.log_queue = queue.Queue()
+        self.max_entries = max_entries
+        self.log_entries = []
+        self.lock = threading.Lock()
+    def emit(self, record):
+        try:
+            log_entry = self.format(record)
+            # Track error and warning counts
+            if record.levelno >= logging.ERROR:
+                gui_stats['errors'] += 1
+                gui_stats['last_error'] = record.getMessage()
+                gui_stats['last_error_time'] = datetime.datetime.now().strftime("%H:%M:%S")
+                # Check for specific error types
+                if "list" in record.getMessage() and "unsqueeze" in record.getMessage():
+                    gui_stats['tensor_issues'] += 1
+            elif record.levelno >= logging.WARNING:
+                gui_stats['warnings'] += 1
+                gui_stats['last_warning'] = record.getMessage()
+                gui_stats['last_warning_time'] = datetime.datetime.now().strftime("%H:%M:%S")
+            with self.lock:
+                self.log_entries.append(log_entry)
+                # Keep only the most recent entries
+                if len(self.log_entries) > self.max_entries:
+                    self.log_entries = self.log_entries[-self.max_entries:]
+            self.log_queue.put(log_entry)
+        except Exception:
+            self.handleError(record)
+    def get_logs(self, last_n=None):
+        with self.lock:
+            if last_n is not None:
+                return "\n".join(self.log_entries[-last_n:])
+            return "\n".join(self.log_entries)
+    def get_latest(self):
+        try:
+            return self.log_queue.get_nowait()
+        except queue.Empty:
+            return None
+    def clear(self):
+        with self.lock:
+            self.log_entries = []
 # Constants
 IMAGENET_MEAN = (0.485, 0.456, 0.406)
 # Set up logging to write to saved_outputs directory
 log_file = os.path.join(OUTPUT_DIR, f"debug_log_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.log")
+# Create a GUI log handler
+gui_log_handler = GUILogHandler(max_entries=500)
+gui_log_handler.setFormatter(logging.Formatter('%(asctime)s [%(levelname)s] %(message)s'))
+gui_log_handler.setLevel(logging.DEBUG)
 # Configure logging
 logging.basicConfig(
     level=logging.DEBUG,
     format='%(asctime)s [%(levelname)s] %(message)s',
     handlers=[
         logging.FileHandler(log_file),
+        logging.StreamHandler(sys.stdout),
+        gui_log_handler
     ]
 )
             img = img.convert('RGB')
             # Get raw analysis from model
             result = process_image_with_text(img, prompt)
             results.append(result)
 # Function to process an image with text prompt
 def process_image_with_text(image, prompt):
     """Process a single image with the InternVL model and a text prompt."""
+    start_time = time.time()
+    # Increment operations counter
+    gui_stats['operations_completed'] += 1
     try:
         logger.info(f"process_image_with_text called with image type: {type(image)}")
             logger.info(f"Final tensor prepared: shape={tensor.shape}, device={tensor.device}, dtype={tensor.dtype}")
         except Exception as tensor_err:
+            error_msg = f"Error in tensor creation: {str(tensor_err)}"
+            logger.error(error_msg)
             logger.error(traceback.format_exc())
+            # Update in-memory error statistics
+            gui_stats['errors'] += 1
+            gui_stats['last_error'] = error_msg
+            gui_stats['last_error_time'] = datetime.datetime.now().strftime("%H:%M:%S")
             return f"Error preparing image for analysis: {str(tensor_err)}"
         # Process the prompt
                 output = tokenizer.decode(output_ids[0], skip_special_tokens=True)
                 logger.debug(f"Decoded output length: {len(output)} chars")
+                # Log completion time
+                elapsed = time.time() - start_time
+                logger.info(f"Image processing completed in {elapsed:.2f} seconds")
                 return output.strip()
             except Exception as gen_error:
+                error_msg = f"Direct generation failed: {str(gen_error)}"
+                logger.error(error_msg)
                 logger.error(traceback.format_exc())
+                # Update in-memory error statistics
+                gui_stats['errors'] += 1
+                gui_stats['last_error'] = error_msg
+                gui_stats['last_error_time'] = datetime.datetime.now().strftime("%H:%M:%S")
                 # Approach 2: Try the chat method
                 try:
                     logger.info("Chat method successful")
                     logger.debug(f"Chat response length: {len(response)} chars")
+                    # Log completion time
+                    elapsed = time.time() - start_time
+                    logger.info(f"Image processing (fallback chat) completed in {elapsed:.2f} seconds")
                     return response.strip()
                 except Exception as chat_error:
+                    error_msg = f"Chat method failed: {str(chat_error)}"
+                    logger.error(error_msg)
                     logger.error(traceback.format_exc())
+                    # Update in-memory error statistics
+                    gui_stats['errors'] += 1
+                    gui_stats['last_error'] = error_msg
+                    gui_stats['last_error_time'] = datetime.datetime.now().strftime("%H:%M:%S")
                     # Approach 3: Try direct model forward pass
                     try:
                                 response = tokenizer.decode(pred_ids[0], skip_special_tokens=True)
                                 logger.debug(f"Decoded response length: {len(response)} chars")
+                                # Log completion time
+                                elapsed = time.time() - start_time
+                                logger.info(f"Image processing (fallback forward) completed in {elapsed:.2f} seconds")
                                 return response.strip()
                             else:
+                                error_msg = "Model output does not contain logits"
+                                logger.error(error_msg)
+                                gui_stats['errors'] += 1
+                                gui_stats['last_error'] = error_msg
+                                gui_stats['last_error_time'] = datetime.datetime.now().strftime("%H:%M:%S")
                                 return "Failed to analyze image - model output contains no usable data"
                         else:
+                            error_msg = "Model does not have forward method"
+                            logger.error(error_msg)
+                            gui_stats['errors'] += 1
+                            gui_stats['last_error'] = error_msg
+                            gui_stats['last_error_time'] = datetime.datetime.now().strftime("%H:%M:%S")
                             return "Failed to analyze image - model doesn't support direct calling"
                     except Exception as forward_error:
+                        error_msg = f"Forward method failed: {str(forward_error)}"
+                        logger.error(error_msg)
                         logger.error(traceback.format_exc())
+                        # Update in-memory error statistics
+                        gui_stats['errors'] += 1
+                        gui_stats['last_error'] = error_msg
+                        gui_stats['last_error_time'] = datetime.datetime.now().strftime("%H:%M:%S")
                         # All methods failed
                         return f"Error generating analysis: All methods failed to process the image"
     except Exception as e:
+        error_msg = f"Fatal error in process_image_with_text: {str(e)}"
+        logger.error(error_msg)
         logger.error(traceback.format_exc())
+        # Update in-memory error statistics
+        gui_stats['errors'] += 1
+        gui_stats['last_error'] = error_msg
+        gui_stats['last_error_time'] = datetime.datetime.now().strftime("%H:%M:%S")
         return f"Error processing image: {str(e)}"
 # Function to get log file content
         "Summarize what you see in this image in one paragraph."
     ]
+    # Function to get the most recent debug logs
+    def get_debug_logs(num_lines=50):
+        return gui_log_handler.get_logs(last_n=num_lines)
+    # Function to update logs in real-time
+    def update_logs(history):
+        latest = gui_log_handler.get_latest()
+        if latest:
+            history = history + "\n" + latest if history else latest
+            # Keep only the last 50 lines for performance
+            lines = history.split("\n")
+            if len(lines) > 50:
+                history = "\n".join(lines[-50:])
+        return history
+    # Function to clear logs
+    def clear_logs():
+        gui_log_handler.clear()
+        return ""
     # Create tabs for different modes
     with gr.Blocks(title="InternVL2.5 Image Analyzer", theme=gr.themes.Soft()) as demo:
         gr.Markdown("# InternVL2.5 Image Analyzer")
         gr.Markdown("Analyze images using the InternVL2.5 model. You can upload individual images or analyze all images in a folder.")
+        # Debug mode toggle and panel
+        with gr.Accordion("Debug Console", open=False) as debug_accordion:
+            with gr.Row():
+                with gr.Column(scale=4):
+                    debug_output = gr.Textbox(
+                        label="Real-time Debug Logs",
+                        value=get_debug_logs(20),
+                        lines=8,
+                        max_lines=15,
+                        autoscroll=True,
+                        elem_id="debug_output"
+                    )
+                with gr.Column(scale=1):
+                    with gr.Row():
+                        clear_btn = gr.Button("Clear Logs")
+                        refresh_btn = gr.Button("Refresh")
+                    debug_level = gr.Radio(
+                        ["ERROR", "WARNING", "INFO", "DEBUG"],
+                        label="Debug Level",
+                        value="INFO"
+                    )
+                    # Track error counts
+                    error_count = gr.Number(value=0, label="Errors", precision=0)
+                    warning_count = gr.Number(value=0, label="Warnings", precision=0)
+            # Stats display
+            debug_stats_html = gr.HTML(format_debug_stats_html())
+            # Add option to enable GUI logging for all operations
+            enable_full_logging = gr.Checkbox(label="Log All Operations to Console", value=False)
+            # Function to update stats display
+            def update_stats_display():
+                return format_debug_stats_html()
+            # Set up a timer to update stats every few seconds
+            gr.on(
+                triggers=[debug_accordion.open],
+                fn=update_stats_display,
+                outputs=[debug_stats_html],
+                every=5  # Update every 5 seconds when accordion is open
+            )
+            # Update counts periodically
+            def update_error_counts():
+                return gui_stats['errors'], gui_stats['warnings']
+            gr.on(
+                triggers=[debug_accordion.open],
+                fn=update_error_counts,
+                outputs=[error_count, warning_count],
+                every=2
+            )
+            # Debug info about model
+            with gr.Accordion("Model Information", open=False):
+                if torch.cuda.is_available():
+                    gpu_info = f"CUDA available: {torch.cuda.device_count()} GPU(s)\n"
+                    for i in range(torch.cuda.device_count()):
+                        gpu_info += f"- GPU {i}: {torch.cuda.get_device_name(i)}\n"
+                    gpu_info += f"Total memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB"
+                else:
+                    gpu_info = "CUDA not available - using CPU"
+                gr.Textbox(value=gpu_info, label="GPU Information", lines=4)
+                model_info = f"Model: {MODEL_NAME}\nImage size: {IMAGE_SIZE}x{IMAGE_SIZE}"
+                gr.Textbox(value=model_info, label="Model Configuration", lines=2)
+                # Function to get current memory usage
+                def get_memory_usage():
+                    if torch.cuda.is_available():
+                        allocated = torch.cuda.memory_allocated() / 1e9  # GB
+                        reserved = torch.cuda.memory_reserved() / 1e9    # GB
+                        max_memory = torch.cuda.max_memory_allocated() / 1e9  # GB
+                        return f"Allocated: {allocated:.2f} GB\nReserved: {reserved:.2f} GB\nMax used: {max_memory:.2f} GB"
+                    return "No GPU available"
+                memory_usage = gr.Textbox(
+                    value=get_memory_usage(),
+                    label="Current GPU Memory Usage",
+                    lines=3
+                )
+                # Refresh memory usage
+                refresh_memory_btn = gr.Button("Refresh Memory Info")
+                refresh_memory_btn.click(
+                    fn=get_memory_usage,
+                    inputs=[],
+                    outputs=[memory_usage]
+                )
+            # Download debug logs button
+            gr.Markdown("### Download Complete Debug Logs")
+            def get_log_file_path():
+                return log_file if os.path.exists(log_file) else None
+            download_log_btn = gr.Button("Download Full Log File")
+            log_file_output = gr.File(label="Log File for Download")
+            download_log_btn.click(
+                fn=get_log_file_path,
+                inputs=[],
+                outputs=[log_file_output]
+            )
+        # Set up log level change handler
+        def change_log_level(level):
+            if level == "ERROR":
+                gui_log_handler.setLevel(logging.ERROR)
+                logger.info(f"Debug display log level set to ERROR")
+            elif level == "WARNING":
+                gui_log_handler.setLevel(logging.WARNING)
+                logger.info(f"Debug display log level set to WARNING")
+            elif level == "INFO":
+                gui_log_handler.setLevel(logging.INFO)
+                logger.info(f"Debug display log level set to INFO")
+            else:  # DEBUG
+                gui_log_handler.setLevel(logging.DEBUG)
+                logger.info(f"Debug display log level set to DEBUG")
+            return f"Log level set to {level}"
+        debug_level.change(
+            fn=change_log_level,
+            inputs=[debug_level],
+            outputs=[]
+        )
+        # Button handlers
+        clear_btn.click(
+            fn=clear_logs,
+            inputs=[],
+            outputs=[debug_output]
+        )
+        refresh_btn.click(
+            fn=get_debug_logs,
+            inputs=[],
+            outputs=[debug_output]
+        )
+        # Set up automatic refresh of debug logs
+        debug_output.change(
+            fn=update_logs,
+            inputs=[debug_output],
+            outputs=[debug_output],
+            every=1  # Update every second
+        )
+        # Main tabs for functionality
         with gr.Tabs():
             # Tab for single image analysis
             with gr.Tab("Single Image Analysis"):