| import os |
| os.environ["CUDA_VISIBLE_DEVICES"] = "1" |
|
|
| from gradio_client import Client, handle_file |
| from typing import Any, Dict, List |
| from PIL import Image |
| import json |
|
|
| |
| asr_client = Client("VeuReu/asr") |
|
|
|
|
| def extract_audio_from_video(video_path: str) -> str: |
| """ |
| Call the /extract_audio_ffmpeg endpoint of the remote VeuReu/asr Space. |
| |
| This function uploads a video file to the remote ASR service and extracts its audio track. |
| |
| Parameters |
| ---------- |
| video_path : str |
| Path to the input video file from which audio will be extracted. |
| |
| Returns |
| ------- |
| str |
| Path or identifier of the extracted audio file returned by the remote service. |
| """ |
| result = asr_client.predict( |
| video_file={"video": handle_file(video_path)}, |
| api_name="/extract_audio_ffmpeg" |
| ) |
| return result |
|
|
|
|
| def diarize_audio(audio_path: str) -> str: |
| """ |
| Call the /diaritzar_audio endpoint of the remote VeuReu/asr Space. |
| |
| This function performs speaker diarization, identifying segments of speech |
| belonging to different speakers in the audio file. |
| |
| Parameters |
| ---------- |
| audio_path : str |
| Path to the audio file to be diarized. |
| |
| Returns |
| ------- |
| str |
| JSON-like diarization output containing speaker segments and timings. |
| """ |
| result = asr_client.predict( |
| wav_archivo=handle_file(audio_path), |
| api_name="/diaritzar_audio" |
| ) |
| return result |
|
|
|
|
| def transcribe_long_audio(audio_path: str) -> str: |
| """ |
| Call the /transcribe_long_audio endpoint of the remote VeuReu/asr Space. |
| |
| Designed for long audio recordings, this function sends the audio to the ASR model |
| optimized for processing extended durations. |
| |
| Parameters |
| ---------- |
| audio_path : str |
| Path to the long audio file to be transcribed. |
| |
| Returns |
| ------- |
| str |
| Transcribed text returned by the remote ASR service. |
| """ |
| result = asr_client.predict( |
| wav_path=handle_file(audio_path), |
| api_name="/transcribe_long_audio" |
| ) |
| return result |
|
|
|
|
| def transcribe_short_audio(audio_path: str) -> str: |
| """ |
| Call the /transcribe_wav endpoint of the remote VeuReu/asr Space. |
| |
| This function is optimized for short-duration audio samples and produces fast transcriptions. |
| |
| Parameters |
| ---------- |
| audio_path : str |
| Path to the short audio file to be transcribed. |
| |
| Returns |
| ------- |
| str |
| Transcribed text returned by the remote service. |
| """ |
| result = asr_client.predict( |
| wav_path=handle_file(audio_path), |
| api_name="/transcribe_wav" |
| ) |
| return result |
|
|
|
|
| def identificar_veu(clip_path: str, voice_col: List[Dict[str, Any]]): |
| """ |
| Call the /identificar_veu endpoint of the remote VeuReu/asr Space. |
| |
| This function attempts to identify which known speaker (from a provided |
| collection of voice profiles) appears in the given audio clip. |
| |
| Parameters |
| ---------- |
| clip_path : str |
| Path to the audio clip whose speaker is to be identified. |
| voice_col : List[Dict[str, Any]] |
| List of dictionaries containing metadata or embeddings for known voices. |
| |
| Returns |
| ------- |
| Any |
| Output returned by the remote speaker identification model. |
| """ |
| voice_col_str = json.dumps(voice_col) |
| result = asr_client.predict( |
| wav_archivo=handle_file(clip_path), |
| voice_col=voice_col_str, |
| api_name="/identificar_veu" |
| ) |
| return result |
|
|