Spaces:

Ahmed-El-Sharkawy
/

Object-Detection-Faster-CNN-RESNET-OR-MobileNet

Sleeping

App Files Files Community

Ahmed-El-Sharkawy commited on May 10, 2025

Commit

56be1f0

verified ·

1 Parent(s): 72c5129

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -8

app.py CHANGED Viewed

@@ -8,18 +8,18 @@ from torchvision.models.detection import FasterRCNN
 from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
 # Load Models
-def load_model( backbone_name, num_classes):
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
     if backbone_name == "resnet50":
         model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False)
         in_features = model.roi_heads.box_predictor.cls_score.in_features
         model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
-        model.load_state_dict(torch.load("fasterrcnnResnet.pth", map_location=device))
     elif backbone_name == "mobilenet":
         model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn(pretrained=False)
         in_features = model.roi_heads.box_predictor.cls_score.in_features
         model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
-        model.load_state_dict(torch.load("fasterrcnnMobilenet", map_location=device))
     model.to(device)
     model.eval()
     return model
@@ -60,7 +60,15 @@ def predict_video(video_path, model):
                 cv2.putText(frame, f"{class_names[label]}: {score:.2f}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
         frames.append(frame)
     cap.release()
-    return frames[0] if frames else None
 # Gradio Interface for Image and Video Inference
@@ -70,14 +78,14 @@ inputs_image = [gr.Image(type="filepath", label="Upload Image"), model_selection
 outputs_image = gr.Image(type="numpy", label="Detection Output")
 inputs_video = [gr.Video(label="Upload Video"), model_selection]
-outputs_video = gr.Image(type="numpy", label="Detection Output")
 with gr.Blocks() as demo:
     with gr.TabItem("Image"):
         gr.Interface(
-            fn=lambda img, model_name: predict_image(img, load_model( model_name.lower(), num_classes=6)),
             inputs=inputs_image,
             outputs=outputs_image,
             title="Image Inference"
@@ -85,11 +93,10 @@ with gr.Blocks() as demo:
     with gr.TabItem("Video"):
         gr.Interface(
-            fn=lambda vid, model_name: predict_video(vid, load_model(model_name.lower(), num_classes=6)),
             inputs=inputs_video,
             outputs=outputs_video,
             title="Video Inference"
         )
     demo.launch()

 from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
 # Load Models
+def load_model(model_path, backbone_name, num_classes):
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
     if backbone_name == "resnet50":
         model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False)
         in_features = model.roi_heads.box_predictor.cls_score.in_features
         model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
+        model.load_state_dict(torch.load(model_path, map_location=device))
     elif backbone_name == "mobilenet":
         model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn(pretrained=False)
         in_features = model.roi_heads.box_predictor.cls_score.in_features
         model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
+        model.load_state_dict(torch.load(model_path, map_location=device))
     model.to(device)
     model.eval()
     return model
                 cv2.putText(frame, f"{class_names[label]}: {score:.2f}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
         frames.append(frame)
     cap.release()
+    output_path = 'output_video.mp4'
+    height, width, _ = frames[0].shape
+    out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), 20, (width, height))
+    for frame in frames:
+        out.write(frame)
+    out.release()
+    return output_path
 # Gradio Interface for Image and Video Inference
 outputs_image = gr.Image(type="numpy", label="Detection Output")
 inputs_video = [gr.Video(label="Upload Video"), model_selection]
+outputs_video = gr.Video(label="Detection Output")
 with gr.Blocks() as demo:
     with gr.TabItem("Image"):
         gr.Interface(
+            fn=lambda img, model_name: predict_image(img, load_model(f'fasterrcnn{model_name}.pth', model_name.lower(), num_classes=6)),
             inputs=inputs_image,
             outputs=outputs_image,
             title="Image Inference"
     with gr.TabItem("Video"):
         gr.Interface(
+            fn=lambda vid, model_name: predict_video(vid, load_model(f'fasterrcnn{model_name}.pth', model_name.lower(), num_classes=6)),
             inputs=inputs_video,
             outputs=outputs_video,
             title="Video Inference"
         )
     demo.launch()