import os os.environ["CUDA_VISIBLE_DEVICES"] = "1" from gradio_client import Client, handle_file from typing import Any, Dict, List, Optional, Tuple, Union import json # Connect to the remote Space svision_client = Client("VeuReu/svision") def extract_scenes(video_path: str, threshold: float = 30.0, offset_frames: int = 5, crop_ratio: float = 0.1): """ Call the /scenes_extraction endpoint of the remote Space VeuReu/svision. Parameters ---------- video_path : str Path to the input video file. threshold : float, optional Scene change detection threshold; higher values make detection less sensitive. offset_frames : int, optional Number of frames to include before and after a detected scene boundary. crop_ratio : float, optional Ratio for cropping borders before performing scene detection. Returns ------- Any Response returned by the remote /scenes_extraction endpoint. """ result = svision_client.predict( video_file={"video": handle_file(video_path)}, threshold=threshold, offset_frames=offset_frames, crop_ratio=crop_ratio, api_name="/scenes_extraction" ) return result def keyframes_every_second_extraction(video_path: str): """ Call the /keyframes_every_second_extraction endpoint of the remote Space VeuReu/svision. Parameters ---------- video_path : str Path to the input video file. Returns ------- Any Response returned by the remote /keyframes_every_second_extraction endpoint. """ result = svision_client.predict( video_path={"video": handle_file(video_path)}, api_name="/keyframes_every_second_extraction" ) return result def add_ocr_and_faces(imagen_path: str, informacion_image: Dict[str, Any], face_col: List[Dict[str, Any]]) -> Dict[str, Any]: """ Call the /add_ocr_and_faces endpoint of the remote Space VeuReu/svision. This function sends an image together with metadata and face collection data to perform OCR, face detection, and annotation enhancement. Parameters ---------- imagen_path : str Path to the input image file. informacion_image : Dict[str, Any] Dictionary containing image-related metadata. face_col : List[Dict[str, Any]] List of dictionaries representing detected faces or face metadata. Returns ------- Dict[str, Any] Processed output containing OCR results, face detection data, and annotations. """ informacion_image_str = json.dumps(informacion_image) face_col_str = json.dumps(face_col) result = svision_client.predict( image=handle_file(imagen_path), informacion_image=informacion_image_str, face_col=face_col_str, api_name="/add_ocr_and_faces" ) return result def extract_descripcion_escena(imagen_path: str) -> str: """ Call the /describe_images endpoint of the remote Space VeuReu/svision. This function sends an image to receive a textual description of its visual content. Parameters ---------- imagen_path : str Path to the input image file. Returns ------- str Description generated for the given image. """ print("Calling svision to describe the scene...") result = svision_client.predict( images=[{"image": handle_file(imagen_path)}], api_name="/describe_images" ) return result