Spaces:
Build error
Build error
| import gradio as gr | |
| from mediapipe.python.solutions import holistic | |
| from torchvision.transforms.v2 import Compose, Lambda, Normalize | |
| from transformers import VideoMAEForVideoClassification, VideoMAEImageProcessor | |
| from utils import get_predictions, preprocess | |
| title = ''' | |
| ''' | |
| cite_markdown = ''' | |
| ''' | |
| description = ''' | |
| ''' | |
| examples = [ | |
| ['samples/000_con_cho.mp4', 'Con chó'], | |
| ['samples/001_con_meo.mp4', 'Con mèo'], | |
| ['samples/005_con_tho.mp4', 'Con thỏ'], | |
| ['samples/006_con_trau.mp4', 'Con trâu'], | |
| ['samples/007_con_bo.mp4', 'Con bò'], | |
| ['samples/008_con_de.mp4', 'Con dê'], | |
| ['samples/009_con_heo.mp4', 'Con heo'], | |
| ['samples/010_mau_den.mp4', 'Màu đen'], | |
| ['samples/021_qua_man.mp4', 'Quả mận'], | |
| ['samples/022_qua_dua.mp4', 'Quả dứa'], | |
| ['samples/023_qua_dao.mp4', 'Quả đào'], | |
| ['samples/029_qua_dua.mp4', 'Quả dưa'], | |
| ['samples/031_me.mp4', 'Mẹ'], | |
| ['samples/032_con_trai.mp4', 'Con trai'], | |
| ['samples/033_con_gai.mp4', 'Con gái'], | |
| ['samples/035_chong.mp4', 'Chồng'], | |
| ['samples/044_mach.mp4', 'Mách'], | |
| ['samples/051_chay.mp4', 'Chạy'], | |
| ['samples/054_mua.mp4', 'Múa'], | |
| ['samples/055_nau.mp4', 'Nấu'], | |
| ['samples/057_nham_lan.mp4', 'Nhầm lẫn'], | |
| ['samples/059_cam_trai.mp4', 'Cắm trại'], | |
| ['samples/060_cung_cap.mp4', 'Cung cấp'], | |
| ['samples/062_bat_buoc.mp4', 'Bắt buộc'], | |
| ['samples/064_mua_ban.mp4', 'Mua bán'], | |
| ['samples/066_khong_nen.mp4', 'Không nên'], | |
| ['samples/067_khong_can.mp4', 'Không cần'], | |
| ['samples/069_khong_nghe_loi.mp4', 'Không nghe lời'], | |
| ['samples/073_ngot.mp4', 'Ngọt'], | |
| ['samples/079_chat.mp4', 'Chật'], | |
| ['samples/080_hep.mp4', 'Hẹp'], | |
| ['samples/081_rong.mp4', 'Rộng'], | |
| ['samples/082_dai.mp4', 'Dài'], | |
| ['samples/085_om.mp4', 'Ốm'], | |
| ['samples/086_map.mp4', 'Mập'], | |
| ['samples/087_ngoan.mp4', 'Ngoan'], | |
| ['samples/089_khoe.mp4', 'Khoẻ'], | |
| ['samples/091_dau.mp4', 'Đau'], | |
| ['samples/095_tot_bung.mp4', 'Tốt bụng'], | |
| ['samples/097_thu_vi.mp4', 'Thú vị'], | |
| ] | |
| device = 'cpu' | |
| model_name = 'VieSignLang/videomae_skeleton_v1.0' | |
| image_processor = VideoMAEImageProcessor.from_pretrained(model_name) | |
| model = VideoMAEForVideoClassification.from_pretrained(model_name) | |
| model = model.eval().to(device) | |
| mean = image_processor.image_mean | |
| std = image_processor.image_std | |
| if 'shortest_edge' in image_processor.size: | |
| model_input_height = model_input_width = image_processor.size['shortest_edge'] | |
| else: | |
| model_input_height = image_processor.size['height'] | |
| model_input_width = image_processor.size['width'] | |
| transform = Compose( | |
| [ | |
| Lambda(lambda x: x / 255.0), | |
| Normalize(mean=mean, std=std), | |
| ] | |
| ) | |
| def inference( | |
| video: str, | |
| progress: gr.Progress = gr.Progress(), | |
| ) -> tuple: | |
| progress(0, desc='Preprocessing video') | |
| keypoints_detector = holistic.Holistic( | |
| static_image_mode=False, | |
| model_complexity=2, | |
| enable_segmentation=True, | |
| refine_face_landmarks=True, | |
| ) | |
| inputs = preprocess( | |
| model_num_frames=model.config.num_frames, | |
| keypoints_detector=keypoints_detector, | |
| source=video, | |
| model_input_height=model_input_height, | |
| model_input_width=model_input_width, | |
| device=device, | |
| transform=transform, | |
| ) | |
| progress(1/2, desc='Getting predictions') | |
| predictions = get_predictions(inputs=inputs, model=model) | |
| output_message = '' | |
| for i, prediction in enumerate(predictions): | |
| output_message += f'{i}. {prediction["label"]} ({prediction["score"]})\n' | |
| output_message = output_message.strip() | |
| progress(1/2, desc='Completed') | |
| return output_message | |
| iface = gr.Interface( | |
| fn=inference, | |
| inputs='video', | |
| outputs='text', | |
| examples=examples, | |
| title=title, | |
| description=description, | |
| ) | |
| iface.launch() | |