salman508's picture
Update app.py
821cadc verified
raw
history blame
3.3 kB
import gradio as gr
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import cv2
# Define constants
IMG_SIZE = 224
MAX_SEQ_LENGTH = 30
NUM_FEATURES = 2048
# Load the trained model
model_filepath = "lstm_model.h5" # Replace with the actual path
loaded_model = keras.models.load_model(model_filepath)
train_df = pd.DataFrame({
'tag': ['BabyCrawling', 'CricketShot']
})
label_processor = keras.layers.StringLookup(num_oov_indices=0, vocabulary=np.unique(train_df["tag"]))
def crop_center_square(frame):
y, x = frame.shape[0:2]
min_dim = min(y, x)
start_x = (x // 2) - (min_dim // 2)
start_y = (y // 2) - (min_dim // 2)
return frame[start_y : start_y + min_dim, start_x : start_x + min_dim]
def load_video(path, max_frames=0, resize=(IMG_SIZE, IMG_SIZE)):
cap = cv2.VideoCapture(path)
frames = []
try:
while True:
ret, frame = cap.read()
if not ret:
break
frame = crop_center_square(frame)
frame = cv2.resize(frame, resize)
frame = frame[:, :, [2, 1, 0]]
frames.append(frame)
if len(frames) == max_frames:
break
finally:
cap.release()
return np.array(frames)
# Load the feature extractor
def build_feature_extractor():
feature_extractor = keras.applications.InceptionV3(
weights="imagenet",
include_top=False,
pooling="avg",
input_shape=(IMG_SIZE, IMG_SIZE, 3),
)
preprocess_input = keras.applications.inception_v3.preprocess_input
inputs = keras.Input((IMG_SIZE, IMG_SIZE, 3))
preprocessed = preprocess_input(inputs)
outputs = feature_extractor(preprocessed)
return keras.Model(inputs, outputs, name="feature_extractor")
feature_extractor = build_feature_extractor()
# Function for preparing a single video for prediction
def prepare_single_video(frames):
frames = frames[None, ...]
frame_mask = np.zeros(shape=(1, MAX_SEQ_LENGTH,), dtype="bool")
frame_features = np.zeros(shape=(1, MAX_SEQ_LENGTH, NUM_FEATURES), dtype="float32")
for i, batch in enumerate(frames):
video_length = batch.shape[0]
length = min(MAX_SEQ_LENGTH, video_length)
for j in range(length):
frame_features[i, j, :] = feature_extractor.predict(batch[None, j, :])
frame_mask[i, :length] = 1 # 1 = not masked, 0 = masked
return frame_features, frame_mask
# Function for making predictions
def sequence_prediction(video_file):
class_vocab = label_processor.get_vocabulary()
# Load the video frames
frames = load_video(video_file)
# Prepare the frames for prediction
frame_features, frame_mask = prepare_single_video(frames)
# Make predictions using the loaded model
probabilities = loaded_model.predict([frame_features, frame_mask])[0]
# Get the predicted label
predicted_label = class_vocab[np.argmax(probabilities)]
return predicted_label
example_list=[
["video1.mp4"],
["video2.mp4"],
]
# Gradio interface
iface = gr.Interface(
fn=sequence_prediction,
inputs=gr.Video(label="Upload a video file"),
outputs="text",
examples=example_list,
)
# Launch the Gradio app
iface.launch()