jeanma's picture
Omnilingual ASR transcription demo
ae238b3 verified
// API service for transcription functionality
// Common API error handling and utilities
class ApiError extends Error {
constructor(message: string, public status?: number, public isServerBusy: boolean = false) {
super(message);
this.name = 'ApiError';
}
}
const getServerUrl = (): string => import.meta.env.VITE_SERVER_URL || "";
// Centralized fetch wrapper with consistent error handling
const fetchApi = async (
endpoint: string,
options: RequestInit = {},
expectJson: boolean = true
): Promise<any> => {
const response = await fetch(endpoint, options);
if (!response.ok) {
let errorMessage = `HTTP error! status: ${response.status}`;
let isServerBusy = false;
// Try to extract error details from response
try {
const errorData = await response.json();
errorMessage = errorData?.error || errorMessage;
if (response.status === 503) {
isServerBusy = true;
errorMessage = `Server busy: ${errorData?.error || 'Server is currently processing another request'}`;
}
} catch {
// If JSON parsing fails, use default error message
if (response.status === 503) {
isServerBusy = true;
errorMessage = 'Server busy: Server is currently processing another request';
}
}
throw new ApiError(errorMessage, response.status, isServerBusy);
}
if (expectJson) {
return response.json();
}
return response;
};
// Create form data helper
const createFormData = (data: Record<string, string | File | Blob>): FormData => {
const formData = new FormData();
Object.entries(data).forEach(([key, value]) => {
formData.append(key, value);
});
return formData;
};
export interface AlignedSegment {
duration: number;
end: number;
start: number;
text: string;
chunk_index?: number;
speech_segment_index?: number;
// Merge history to allow intelligent splitting
mergedFrom?: AlignedSegment[];
mergeThreshold?: number; // The threshold used when this merge was created
}
export interface ChunkInfo {
chunk_index: number;
start_time: number;
end_time: number;
duration: number;
num_segments: number;
transcription: string;
}
export interface PreprocessedAudio {
data: string; // base64 encoded audio data
format: string; // "wav"
sample_rate: number;
duration: number;
size_bytes: number;
}
export interface TranscriptionResponse {
aligned_segments: AlignedSegment[];
alignment_available?: boolean;
device?: string;
model: string;
num_segments: number;
status: string;
total_duration: number;
transcription: string;
// Long-form specific fields
chunks?: ChunkInfo[];
num_chunks?: number;
// Preprocessed audio data
preprocessed_audio?: PreprocessedAudio;
}
export interface ServerStatus {
is_busy: boolean;
current_operation?: string;
current_filename?: string;
progress?: number;
duration_seconds?: number;
total_completed: number;
}
export interface HealthResponse {
status: string;
message: string;
version: string;
service: string;
device: string;
cuda_available: boolean;
ffmpeg_available: boolean;
transcription_status: ServerStatus;
gpu_count?: number;
current_device?: number;
gpu_name?: string;
gpu_memory_allocated_mb?: number;
gpu_memory_reserved_mb?: number;
gpu_memory_total_mb?: number;
gpu_memory_free_mb?: number;
}
// Main transcription API function
export const transcribeAudio = async (
file: File,
languageCode?: string | null,
scriptCode?: string | null,
onVideoProcessing?: (isProcessing: boolean) => void
): Promise<TranscriptionResponse> => {
// Determine if this is a video file for UI feedback
const isVideoFile = file.type.startsWith("video/");
if (isVideoFile) {
onVideoProcessing?.(true);
console.log("Processing video file on server side:", file.name);
}
try {
// Create form data with unified 'media' field
const formData = createFormData({
media: file // Single 'media' parameter for all file types
});
// Combine language and script codes for server if both are specified
if (languageCode && scriptCode) {
const combinedLanguage = `${languageCode}_${scriptCode}`;
formData.append("language", combinedLanguage);
}
// Request preprocessed audio for waveform generation
formData.append("include_preprocessed", "true");
console.log('transcribeAudio - About to make API call with formData:', {
fileName: file.name,
fileType: file.type,
fileSize: file.size,
hasLanguage: !!languageCode && !!scriptCode,
combinedLanguage: languageCode && scriptCode ? `${languageCode}_${scriptCode}` : null
});
// Debug: Check if the File object is still valid
if (file instanceof File) {
console.log('transcribeAudio - File is valid File object');
// Try to read a small portion to ensure it's accessible
try {
const slice = file.slice(0, 100);
const arrayBuffer = await slice.arrayBuffer();
console.log('transcribeAudio - File slice readable, first 100 bytes length:', arrayBuffer.byteLength);
} catch (error) {
console.error('transcribeAudio - File slice read failed:', error);
}
} else {
console.error('transcribeAudio - File is not a valid File object:', file);
}
const result = await fetchApi(`${getServerUrl()}/transcribe`, {
method: "POST",
body: formData,
});
if (result.status !== "success") {
throw new Error("Transcription failed");
}
return result;
} finally {
if (isVideoFile) {
onVideoProcessing?.(false);
}
}
};
// Server status API functions
export const getServerStatus = async (): Promise<ServerStatus> => {
return fetchApi(`${getServerUrl()}/status`);
};
export const getServerHealth = async (): Promise<HealthResponse> => {
return fetchApi(`${getServerUrl()}/health`);
};
// Video + Subtitles combination API function
export const combineVideoWithSubtitles = async (
videoFile: File,
subtitleContent: string,
language: string = 'eng',
format: 'srt' | 'webvtt' = 'srt',
outputFormat: 'mp4' | 'mkv' = 'mp4'
): Promise<Blob> => {
const formData = createFormData({
video: videoFile,
subtitles: subtitleContent,
format,
output_format: outputFormat,
language
});
const response = await fetchApi(
`${getServerUrl()}/combine-video-subtitles`,
{ method: 'POST', body: formData },
false // Don't expect JSON, expect blob
);
return response.blob();
};
// Cache for supported languages
let supportedLanguagesCache: string[] | null = null;
let supportedLanguagesPromise: Promise<string[]> | null = null;
// Get supported languages with caching
export const getSupportedLanguages = async (): Promise<string[]> => {
// Return from cache if available
if (supportedLanguagesCache) {
return supportedLanguagesCache;
}
// Return existing promise if already in flight
if (supportedLanguagesPromise) {
return supportedLanguagesPromise;
}
// Create new promise and cache it
supportedLanguagesPromise = (async () => {
try {
const response = await fetchApi(`${getServerUrl()}/supported-languages`);
const languages = response.supported_languages;
// Cache the result
supportedLanguagesCache = languages;
return languages;
} catch (error) {
// Reset promise on error so we can retry
supportedLanguagesPromise = null;
throw error;
}
})();
return supportedLanguagesPromise;
};