Spaces:

leekwoon
/

Whisper-FastAPI

Sleeping

App Files Files Community

dahyedahye commited on Sep 2, 2024

Commit

6bac6fb

1 Parent(s): 13fcab8

Add application file

Browse files

Files changed (4) hide show

main.py +4 -4
main_backup.py +112 -0
ui/__init__.py +0 -0
ui/htmls.py +97 -0

main.py CHANGED Viewed

@@ -84,12 +84,12 @@ async def transcribe_video(
             prompt_reset_on_temperature=0.5
         )
         # Transcribe the file
         result_str, result_files = whisper_inf.transcribe_file(
-            files=[input_file_path],
-            input_folder_path="",
-            file_format=file_format,
-            add_timestamp=add_timestamp,
             *whisper_params.as_list()  # Expand whisper_params as individual arguments
         )

             prompt_reset_on_temperature=0.5
         )
+        # Prepare params and whisper parameters as a single list
+        params = [input_file_path, "", file_format, add_timestamp]
         # Transcribe the file
         result_str, result_files = whisper_inf.transcribe_file(
+            *params,  # Expand the params list
             *whisper_params.as_list()  # Expand whisper_params as individual arguments
         )

main_backup.py ADDED Viewed

	@@ -0,0 +1,112 @@

+import os
+import shutil
+from fastapi import FastAPI, File, UploadFile, Form
+from fastapi.responses import FileResponse, JSONResponse
+from typing import Optional
+from modules.whisper.whisper_factory import WhisperFactory
+from modules.whisper.whisper_parameter import WhisperParameters
+app = FastAPI()
+# Initialize Whisper inference engine
+whisper_inf = WhisperFactory.create_whisper_inference(
+    whisper_type="faster-whisper",  # Choose between "whisper", "faster-whisper", "insanely-fast-whisper"
+    whisper_model_dir=os.path.join("models", "Whisper"),
+    faster_whisper_model_dir=os.path.join("models", "Whisper", "faster-whisper"),
+    insanely_fast_whisper_model_dir=os.path.join("models", "Whisper", "insanely-fast-whisper"),
+    output_dir=os.path.join("outputs"),
+)
+@app.post("/transcribe/")
+async def transcribe_video(
+    file: UploadFile = File(...),
+    model_size: str = Form("large-v2"),
+    language: str = Form("en"),
+    translate: bool = Form(False),
+    file_format: str = Form("SRT"),  # Options: "SRT", "WebVTT", "txt"
+    add_timestamp: bool = Form(True)
+):
+    """
+    Upload a video/audio file and get the generated subtitle file as a response.
+    """
+    try:
+        # Create temporary directories
+        temp_dir = "temp"
+        os.makedirs(temp_dir, exist_ok=True)
+        # Save the uploaded file temporarily
+        input_file_path = os.path.join(temp_dir, file.filename)
+        with open(input_file_path, "wb") as buffer:
+            shutil.copyfileobj(file.file, buffer)
+        # Prepare whisper parameters
+        whisper_params = WhisperParameters(
+            model_size=model_size,
+            lang=language,
+            is_translate=translate,
+            beam_size=5,
+            log_prob_threshold=-1.0,
+            no_speech_threshold=0.6,
+            compute_type="float16",  # or "int8_float16", etc.
+            best_of=5,
+            patience=1.0,
+            condition_on_previous_text=True,
+            initial_prompt=None,
+            temperature=0.0,
+            compression_ratio_threshold=2.4,
+            vad_filter=False,
+            threshold=0.5,
+            min_speech_duration_ms=250,
+            max_speech_duration_s=9999,
+            min_silence_duration_ms=2000,
+            speech_pad_ms=400,
+            chunk_length_s=None,
+            batch_size=None,
+            is_diarize=False,
+            hf_token=None,
+            diarization_device=None,
+            length_penalty=1.0,
+            repetition_penalty=1.0,
+            no_repeat_ngram_size=0,
+            prefix=None,
+            suppress_blank=True,
+            suppress_tokens="[-1]",
+            max_initial_timestamp=1.0,
+            word_timestamps=False,
+            prepend_punctuations="\"'“¿([{-",
+            append_punctuations="\"'.。,，!！?？:：”)]}、",
+            max_new_tokens=None,
+            chunk_length=None,
+            hallucination_silence_threshold=None,
+            hotwords=None,
+            language_detection_threshold=None,
+            language_detection_segments=1,
+            prompt_reset_on_temperature=0.5
+        )
+        # Transcribe the file
+        result_str, result_files = whisper_inf.transcribe_file(
+            files=[input_file_path],
+            input_folder_path="",
+            file_format=file_format,
+            add_timestamp=add_timestamp,
+            *whisper_params.as_list()  # Expand whisper_params as individual arguments
+        )
+        # Check if transcription was successful
+        if not result_files:
+            return JSONResponse(status_code=500, content={"message": "Transcription failed."})
+        # Return the first result file
+        output_file_path = result_files[0]
+        return FileResponse(
+            path=output_file_path,
+            filename=os.path.basename(output_file_path),
+            media_type='application/octet-stream'
+        )
+    except Exception as e:
+        return JSONResponse(status_code=500, content={"message": str(e)})
+    finally:
+        # Clean up temporary files
+        if os.path.exists(input_file_path):
+            os.remove(input_file_path)

ui/__init__.py ADDED Viewed

File without changes

ui/htmls.py ADDED Viewed

	@@ -0,0 +1,97 @@

+CSS = """
+.bmc-button {
+    padding: 2px 5px;
+    border-radius: 5px;
+    background-color: #FF813F;
+    color: white;
+    box-shadow: 0px 1px 2px rgba(0, 0, 0, 0.3);
+    text-decoration: none;
+    display: inline-block;
+    font-size: 20px;
+    margin: 2px;
+    cursor: pointer;
+    -webkit-transition: background-color 0.3s ease;
+    -ms-transition: background-color 0.3s ease;
+    transition: background-color 0.3s ease;
+}
+.bmc-button:hover,
+.bmc-button:active,
+.bmc-button:focus {
+    background-color: #FF5633;
+}
+.markdown {
+    margin-bottom: 0;
+    padding-bottom: 0;
+}
+.tabs {
+    margin-top: 0;
+    padding-top: 0;
+}
+#md_project a {
+  color: black;
+  text-decoration: none;
+}
+#md_project a:hover {
+  text-decoration: underline;
+}
+"""
+MARKDOWN = """
+### [Whisper Web-UI](https://github.com/jhj0517/Whsiper-WebUI)
+"""
+NLLB_VRAM_TABLE = """
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <style>
+    table {
+      border-collapse: collapse;
+      width: 100%;
+    }
+    th, td {
+      border: 1px solid #dddddd;
+      text-align: left;
+      padding: 8px;
+    }
+    th {
+      background-color: #f2f2f2;
+    }
+  </style>
+</head>
+<body>
+<details>
+  <summary>VRAM usage for each model</summary>
+  <table>
+    <thead>
+      <tr>
+        <th>Model name</th>
+        <th>Required VRAM</th>
+      </tr>
+    </thead>
+    <tbody>
+      <tr>
+        <td>nllb-200-3.3B</td>
+        <td>~16GB</td>
+      </tr>
+      <tr>
+        <td>nllb-200-1.3B</td>
+        <td>~8GB</td>
+      </tr>
+      <tr>
+        <td>nllb-200-distilled-600M</td>
+        <td>~4GB</td>
+      </tr>
+    </tbody>
+  </table>
+  <p><strong>Note:</strong> Be mindful of your VRAM! The table above provides an approximate VRAM usage for each model.</p>
+</details>
+</body>
+</html>
+"""