Spaces:

leekwoon
/

Whisper-FastAPI

Sleeping

File size: 4,145 Bytes

6bac6fb

import os
import shutil
from fastapi import FastAPI, File, UploadFile, Form
from fastapi.responses import FileResponse, JSONResponse
from typing import Optional
from modules.whisper.whisper_factory import WhisperFactory
from modules.whisper.whisper_parameter import WhisperParameters

app = FastAPI()

# Initialize Whisper inference engine
whisper_inf = WhisperFactory.create_whisper_inference(
    whisper_type="faster-whisper",  # Choose between "whisper", "faster-whisper", "insanely-fast-whisper"
    whisper_model_dir=os.path.join("models", "Whisper"),
    faster_whisper_model_dir=os.path.join("models", "Whisper", "faster-whisper"),
    insanely_fast_whisper_model_dir=os.path.join("models", "Whisper", "insanely-fast-whisper"),
    output_dir=os.path.join("outputs"),
)

@app.post("/transcribe/")
async def transcribe_video(
    file: UploadFile = File(...),
    model_size: str = Form("large-v2"),
    language: str = Form("en"),
    translate: bool = Form(False),
    file_format: str = Form("SRT"),  # Options: "SRT", "WebVTT", "txt"
    add_timestamp: bool = Form(True)
):
    """
    Upload a video/audio file and get the generated subtitle file as a response.
    """
    try:
        # Create temporary directories
        temp_dir = "temp"
        os.makedirs(temp_dir, exist_ok=True)
        
        # Save the uploaded file temporarily
        input_file_path = os.path.join(temp_dir, file.filename)
        with open(input_file_path, "wb") as buffer:
            shutil.copyfileobj(file.file, buffer)
        
        # Prepare whisper parameters
        whisper_params = WhisperParameters(
            model_size=model_size,
            lang=language,
            is_translate=translate,
            beam_size=5,
            log_prob_threshold=-1.0,
            no_speech_threshold=0.6,
            compute_type="float16",  # or "int8_float16", etc.
            best_of=5,
            patience=1.0,
            condition_on_previous_text=True,
            initial_prompt=None,
            temperature=0.0,
            compression_ratio_threshold=2.4,
            vad_filter=False,
            threshold=0.5,
            min_speech_duration_ms=250,
            max_speech_duration_s=9999,
            min_silence_duration_ms=2000,
            speech_pad_ms=400,
            chunk_length_s=None,
            batch_size=None,
            is_diarize=False,
            hf_token=None,
            diarization_device=None,
            length_penalty=1.0,
            repetition_penalty=1.0,
            no_repeat_ngram_size=0,
            prefix=None,
            suppress_blank=True,
            suppress_tokens="[-1]",
            max_initial_timestamp=1.0,
            word_timestamps=False,
            prepend_punctuations="\"'“¿([{-",
            append_punctuations="\"'.。,，!！?？:：”)]}、",
            max_new_tokens=None,
            chunk_length=None,
            hallucination_silence_threshold=None,
            hotwords=None,
            language_detection_threshold=None,
            language_detection_segments=1,
            prompt_reset_on_temperature=0.5
        )
        
        # Transcribe the file
        result_str, result_files = whisper_inf.transcribe_file(
            files=[input_file_path],
            input_folder_path="",
            file_format=file_format,
            add_timestamp=add_timestamp,
            *whisper_params.as_list()  # Expand whisper_params as individual arguments
        )
        
        # Check if transcription was successful
        if not result_files:
            return JSONResponse(status_code=500, content={"message": "Transcription failed."})
        
        # Return the first result file
        output_file_path = result_files[0]
        return FileResponse(
            path=output_file_path,
            filename=os.path.basename(output_file_path),
            media_type='application/octet-stream'
        )
    except Exception as e:
        return JSONResponse(status_code=500, content={"message": str(e)})
    finally:
        # Clean up temporary files
        if os.path.exists(input_file_path):
            os.remove(input_file_path)