Spaces:

shreyas27
/

video_class

Runtime error

App Files Files Community

shreyas27 commited on May 30, 2025

Commit

8836c5c

verified ·

1 Parent(s): a49d68b

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -50

app.py CHANGED Viewed

@@ -5,47 +5,36 @@ import numpy as np
 import decord
 from decord import VideoReader
 import logging
-import os # Import os for path checking
-# --- Configure Logging ---
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
-logger = logging.getLogger(__name__) # Corrected from _name to __name__
-# --- Initialize decord bridge to PyTorch ---
-# This allows VideoReader.get_batch() to return PyTorch tensors directly
-# It's crucial for efficient GPU processing with Decord.
 try:
     decord.bridge.set_bridge('torch')
     logger.info("Decord bridge successfully set to PyTorch.")
 except RuntimeError as e:
     logger.warning(f"Failed to set decord bridge to PyTorch: {e}. "
                    "Ensure decord is compiled with PyTorch support (e.g., pip install decord[torch]). "
-                   "Falling back to default bridge (numpy/cpu) if not set correctly later, "
-                   "which might require manual tensor conversion.")
-    # If the bridge cannot be set, VideoReader might default to NumPy, requiring
-    # explicit .to(device) and potentially .permute() on the NumPy array before processing.
-    # However, the current code assumes a torch tensor output from get_batch().
-    pass # Continue, as the code attempts to move to device later
-# --- Determine device (GPU if available, otherwise CPU) ---
 if torch.cuda.is_available():
     device = torch.device("cuda")
     logger.info("CUDA is available. Using GPU.")
-    # decord context for GPU - use GPU 0 by default
     decord_ctx = decord.gpu(0)
-    logger.info(f"Decord will use GPU: {decord_ctx}")
 else:
     device = torch.device("cpu")
     logger.info("CUDA not available. Using CPU.")
-    decord_ctx = decord.cpu(0) # decord context for CPU
     logger.info(f"Decord will use CPU: {decord_ctx}")
-# --- Load Model and Processor ---
 try:
     logger.info(f"Loading VideoMAEForVideoClassification model: OPear/videomae-large-finetuned-UCF-Crime to device: {device}")
     model = VideoMAEForVideoClassification.from_pretrained("OPear/videomae-large-finetuned-UCF-Crime").to(device)
-    model.eval() # Set model to evaluation mode for inference
     logger.info("Model loaded successfully.")
     logger.info("Loading VideoMAEImageProcessor: MCG-NJU/videomae-base")
@@ -53,15 +42,12 @@ try:
     logger.info("Processor loaded successfully.")
 except Exception as e:
     logger.error(f"FATAL: Error loading model or processor during startup: {e}", exc_info=True)
-    # Re-raise the exception to prevent the app from starting if essential components fail to load
     raise
-# --- Video Classification Function ---
 def classify_video(video_filepath):
     logger.info(f"--- New classification request ---")
     logger.info(f"Received video_filepath: '{video_filepath}' (type: {type(video_filepath)})")
-    # Basic input validation for Gradio's video component output
     if not video_filepath or not os.path.exists(video_filepath):
         logger.error(f"Error: video_filepath is None, empty, or file does not exist: '{video_filepath}'")
         return "Error: No valid video file received by the server. Please ensure the file exists and try uploading again."
@@ -76,44 +62,33 @@ def classify_video(video_filepath):
             logger.error(f"Error: Video at '{video_filepath}' is empty or could not be read (duration is 0).")
             return "Error: The video is empty or cannot be processed. It might be corrupted or in an unsupported format."
-        num_frames_to_sample = 16 # Standard for VideoMAE
         if duration < num_frames_to_sample:
             logger.warning(f"Video duration ({duration} frames) is less than the desired {num_frames_to_sample} frames. Sampling all {duration} available frames.")
             indices = np.arange(duration)
         else:
-            # Sample `num_frames_to_sample` evenly spaced frames
             indices = np.linspace(0, duration - 1, num_frames_to_sample, dtype=int)
         logger.info(f"Selected frame indices for sampling: {indices}")
-        # .get_batch() will return PyTorch tensors on the specified decord_ctx (e.g., GPU)
-        # if decord.bridge.set_bridge('torch') was successful.
         video_frames_tensor = vr.get_batch(indices)
-        # Ensure frames are on the correct device, useful if decord bridge isn't set or context is CPU
         video_frames_tensor = video_frames_tensor.to(device)
         logger.info(f"Video frames successfully extracted and moved to device. Shape: {video_frames_tensor.shape}, Device: {video_frames_tensor.device}")
-        # The processor expects a list of frames (PyTorch tensors in this case).
-        # Assuming decord returns frames in (N, H, W, C) format (numpy default for 3D),
-        # or (N, C, H, W) if bridge is torch and correctly configured.
-        # VideoMAEImageProcessor expects (N, H, W, C) for its input list,
-        # and it will handle the permutation to (N, C, H, W) internally if needed.
         inputs = processor(list(video_frames_tensor), return_tensors="pt")
-        # Move processed inputs (e.g., pixel_values) to the same device as the model
         inputs = {k: v.to(device) for k, v in inputs.items()}
         logger.info(f"Frames processed by ImageProcessor and input tensors moved to device: {device}")
-        with torch.no_grad(): # Disable gradient calculation for inference
             outputs = model(**inputs)
             logits = outputs.logits
             predicted_class_idx = logits.argmax(-1).item()
         logger.info(f"Model inference complete. Predicted class index: {predicted_class_idx}")
-        # Get the human-readable label
         predicted_label = model.config.id2label[predicted_class_idx]
         logger.info(f"Predicted label: '{predicted_label}'")
@@ -123,38 +98,26 @@ def classify_video(video_filepath):
         logger.error(f"Error during video classification for '{video_filepath}': {e}", exc_info=True)
         return f"Error during classification: {str(e)}. Please check the video format, ensure decord dependencies are met, or review server logs for more details."
-# --- Gradio Interface Setup ---
 video_input_component = gr.Video(
     label="Upload Crime Video",
-    # type="filepath" is removed as it's deprecated or not needed in newer Gradio versions
-    # Gradio's gr.Video typically returns a filepath by default when uploaded.
 )
 text_output_component = gr.Textbox(
     label="Classification Result"
 )
-# Example video paths (replace with actual paths on your server if running locally
-# or ensure these paths are accessible in the deployment environment).
-# For deployment, often you provide a sample video file alongside your app.py.
 example_video_paths = [
-    # "examples/crime_video_1.mp4",
-    # "examples/crime_video_2.mp4",
-    # Add actual paths here if you have example videos
 ]
 iface = gr.Interface(
     fn=classify_video,
     inputs=video_input_component,
     outputs=text_output_component,
     title="Video Crime Classification (GPU Accelerated)",
     description="Upload a video to classify the type of crime depicted using a VideoMAE model fine-tuned on UCF-Crime. Processing runs on GPU if available.",
-    examples=example_video_paths, # Provide actual paths if you use examples
-    allow_flagging="never" # Disables the "Flag" button
 )
-# --- Launch Gradio Application ---
 if __name__ == "__main__":
     logger.info("Starting Gradio application...")
-    # server_name="0.0.0.0" makes the app accessible externally, useful for deployment
-    iface.launch(server_name="0.0.0.0")

 import decord
 from decord import VideoReader
 import logging
+import os
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
 try:
     decord.bridge.set_bridge('torch')
     logger.info("Decord bridge successfully set to PyTorch.")
 except RuntimeError as e:
     logger.warning(f"Failed to set decord bridge to PyTorch: {e}. "
                    "Ensure decord is compiled with PyTorch support (e.g., pip install decord[torch]). "
+                   "Processing might fall back to CPU-based NumPy arrays if not correctly configured, "
+                   "which will then be moved to the target device.")
+    pass
 if torch.cuda.is_available():
     device = torch.device("cuda")
     logger.info("CUDA is available. Using GPU.")
     decord_ctx = decord.gpu(0)
+    logger.info(f"Decord will attempt to use GPU: {decord_ctx}")
 else:
     device = torch.device("cpu")
     logger.info("CUDA not available. Using CPU.")
+    decord_ctx = decord.cpu(0)
     logger.info(f"Decord will use CPU: {decord_ctx}")
 try:
     logger.info(f"Loading VideoMAEForVideoClassification model: OPear/videomae-large-finetuned-UCF-Crime to device: {device}")
     model = VideoMAEForVideoClassification.from_pretrained("OPear/videomae-large-finetuned-UCF-Crime").to(device)
+    model.eval()
     logger.info("Model loaded successfully.")
     logger.info("Loading VideoMAEImageProcessor: MCG-NJU/videomae-base")
     logger.info("Processor loaded successfully.")
 except Exception as e:
     logger.error(f"FATAL: Error loading model or processor during startup: {e}", exc_info=True)
     raise
 def classify_video(video_filepath):
     logger.info(f"--- New classification request ---")
     logger.info(f"Received video_filepath: '{video_filepath}' (type: {type(video_filepath)})")
     if not video_filepath or not os.path.exists(video_filepath):
         logger.error(f"Error: video_filepath is None, empty, or file does not exist: '{video_filepath}'")
         return "Error: No valid video file received by the server. Please ensure the file exists and try uploading again."
             logger.error(f"Error: Video at '{video_filepath}' is empty or could not be read (duration is 0).")
             return "Error: The video is empty or cannot be processed. It might be corrupted or in an unsupported format."
+        num_frames_to_sample = 16
         if duration < num_frames_to_sample:
             logger.warning(f"Video duration ({duration} frames) is less than the desired {num_frames_to_sample} frames. Sampling all {duration} available frames.")
             indices = np.arange(duration)
         else:
             indices = np.linspace(0, duration - 1, num_frames_to_sample, dtype=int)
         logger.info(f"Selected frame indices for sampling: {indices}")
         video_frames_tensor = vr.get_batch(indices)
         video_frames_tensor = video_frames_tensor.to(device)
         logger.info(f"Video frames successfully extracted and moved to device. Shape: {video_frames_tensor.shape}, Device: {video_frames_tensor.device}")
         inputs = processor(list(video_frames_tensor), return_tensors="pt")
         inputs = {k: v.to(device) for k, v in inputs.items()}
         logger.info(f"Frames processed by ImageProcessor and input tensors moved to device: {device}")
+        with torch.no_grad():
             outputs = model(**inputs)
             logits = outputs.logits
             predicted_class_idx = logits.argmax(-1).item()
         logger.info(f"Model inference complete. Predicted class index: {predicted_class_idx}")
         predicted_label = model.config.id2label[predicted_class_idx]
         logger.info(f"Predicted label: '{predicted_label}'")
         logger.error(f"Error during video classification for '{video_filepath}': {e}", exc_info=True)
         return f"Error during classification: {str(e)}. Please check the video format, ensure decord dependencies are met, or review server logs for more details."
 video_input_component = gr.Video(
     label="Upload Crime Video",
 )
 text_output_component = gr.Textbox(
     label="Classification Result"
 )
 example_video_paths = [
 ]
 iface = gr.Interface(
     fn=classify_video,
     inputs=video_input_component,
     outputs=text_output_component,
     title="Video Crime Classification (GPU Accelerated)",
     description="Upload a video to classify the type of crime depicted using a VideoMAE model fine-tuned on UCF-Crime. Processing runs on GPU if available.",
+    examples=example_video_paths,
+    allow_flagging="never"
 )
 if __name__ == "__main__":
     logger.info("Starting Gradio application...")
+    iface.launch(server_name="0.0.0.0")