Spaces:

leekwoon
/

Whisper-FastAPI

Sleeping

App Files Files Community

dahyedahye commited on Sep 2, 2024

Commit

c2d3acc

1 Parent(s): ae3884d

Add application file

Browse files

Files changed (1) hide show

main.py +96 -22

main.py CHANGED Viewed

@@ -1,38 +1,112 @@
-from fastapi import FastAPI, File, UploadFile
-from fastapi.responses import FileResponse
 import os
 import shutil
 from modules.whisper.whisper_factory import WhisperFactory
 app = FastAPI()
 # Initialize Whisper inference engine
 whisper_inf = WhisperFactory.create_whisper_inference(
-    whisper_type="faster-whisper",
     whisper_model_dir=os.path.join("models", "Whisper"),
     faster_whisper_model_dir=os.path.join("models", "Whisper", "faster-whisper"),
     insanely_fast_whisper_model_dir=os.path.join("models", "Whisper", "insanely-fast-whisper"),
     output_dir=os.path.join("outputs"),
 )
-@app.post("/upload-video/")
-async def upload_video(file: UploadFile = File(...)):
     """
-    Upload a video file and get the generated SRT file as a response.
     """
-    # Save the uploaded video file temporarily
-    input_video_path = os.path.join("temp", file.filename)
-    os.makedirs("temp", exist_ok=True)
-    with open(input_video_path, "wb") as buffer:
-        shutil.copyfileobj(file.file, buffer)
-    # Generate the subtitle file
-    output_srt_path = whisper_inf.transcribe_file(
-        input_video_path,
-        file_format="SRT",
-        add_timestamp=True
-    )
-    # Return the SRT file as a response
-    return FileResponse(path=output_srt_path, filename=os.path.basename(output_srt_path))

 import os
 import shutil
+from fastapi import FastAPI, File, UploadFile, Form
+from fastapi.responses import FileResponse, JSONResponse
+from typing import Optional
 from modules.whisper.whisper_factory import WhisperFactory
+from modules.whisper.whisper_parameter import WhisperParameters
 app = FastAPI()
 # Initialize Whisper inference engine
 whisper_inf = WhisperFactory.create_whisper_inference(
+    whisper_type="faster-whisper",  # Choose between "whisper", "faster-whisper", "insanely-fast-whisper"
     whisper_model_dir=os.path.join("models", "Whisper"),
     faster_whisper_model_dir=os.path.join("models", "Whisper", "faster-whisper"),
     insanely_fast_whisper_model_dir=os.path.join("models", "Whisper", "insanely-fast-whisper"),
     output_dir=os.path.join("outputs"),
 )
+@app.post("/transcribe/")
+async def transcribe_video(
+    file: UploadFile = File(...),
+    model_size: str = Form("large-v2"),
+    language: str = Form("en"),
+    translate: bool = Form(False),
+    file_format: str = Form("SRT"),  # Options: "SRT", "WebVTT", "txt"
+    add_timestamp: bool = Form(True)
+):
     """
+    Upload a video/audio file and get the generated subtitle file as a response.
     """
+    try:
+        # Create temporary directories
+        temp_dir = "temp"
+        os.makedirs(temp_dir, exist_ok=True)
+        # Save the uploaded file temporarily
+        input_file_path = os.path.join(temp_dir, file.filename)
+        with open(input_file_path, "wb") as buffer:
+            shutil.copyfileobj(file.file, buffer)
+        # Prepare whisper parameters
+        whisper_params = WhisperParameters(
+            model_size=model_size,
+            lang=language,
+            is_translate=translate,
+            beam_size=5,
+            log_prob_threshold=-1.0,
+            no_speech_threshold=0.6,
+            compute_type="float16",  # or "int8_float16", etc.
+            best_of=5,
+            patience=1.0,
+            condition_on_previous_text=True,
+            initial_prompt=None,
+            temperature=0.0,
+            compression_ratio_threshold=2.4,
+            vad_filter=False,
+            threshold=0.5,
+            min_speech_duration_ms=250,
+            max_speech_duration_s=9999,
+            min_silence_duration_ms=2000,
+            speech_pad_ms=400,
+            chunk_length_s=None,
+            batch_size=None,
+            is_diarize=False,
+            hf_token=None,
+            diarization_device=None,
+            length_penalty=1.0,
+            repetition_penalty=1.0,
+            no_repeat_ngram_size=0,
+            prefix=None,
+            suppress_blank=True,
+            suppress_tokens="[-1]",
+            max_initial_timestamp=1.0,
+            word_timestamps=False,
+            prepend_punctuations="\"'“¿([{-",
+            append_punctuations="\"'.。,，!！?？:：”)]}、",
+            max_new_tokens=None,
+            chunk_length=None,
+            hallucination_silence_threshold=None,
+            hotwords=None,
+            language_detection_threshold=None,
+            language_detection_segments=1,
+            prompt_reset_on_temperature=0.5
+        )
+        # Transcribe the file
+        result_str, result_files = whisper_inf.transcribe_file(
+            files=[input_file_path],
+            input_folder_path="",
+            file_format=file_format,
+            add_timestamp=add_timestamp,
+            whisper_params=whisper_params
+        )
+        # Check if transcription was successful
+        if not result_files:
+            return JSONResponse(status_code=500, content={"message": "Transcription failed."})
+        # Return the first result file
+        output_file_path = result_files[0]
+        return FileResponse(
+            path=output_file_path,
+            filename=os.path.basename(output_file_path),
+            media_type='application/octet-stream'
+        )
+    except Exception as e:
+        return JSONResponse(status_code=500, content={"message": str(e)})
+    finally:
+        # Clean up temporary files
+        if os.path.exists(input_file_path):
+            os.remove(input_file_path)