dahyedahye commited on
Commit
6bac6fb
·
1 Parent(s): 13fcab8

Add application file

Browse files
Files changed (4) hide show
  1. main.py +4 -4
  2. main_backup.py +112 -0
  3. ui/__init__.py +0 -0
  4. ui/htmls.py +97 -0
main.py CHANGED
@@ -84,12 +84,12 @@ async def transcribe_video(
84
  prompt_reset_on_temperature=0.5
85
  )
86
 
 
 
 
87
  # Transcribe the file
88
  result_str, result_files = whisper_inf.transcribe_file(
89
- files=[input_file_path],
90
- input_folder_path="",
91
- file_format=file_format,
92
- add_timestamp=add_timestamp,
93
  *whisper_params.as_list() # Expand whisper_params as individual arguments
94
  )
95
 
 
84
  prompt_reset_on_temperature=0.5
85
  )
86
 
87
+ # Prepare params and whisper parameters as a single list
88
+ params = [input_file_path, "", file_format, add_timestamp]
89
+
90
  # Transcribe the file
91
  result_str, result_files = whisper_inf.transcribe_file(
92
+ *params, # Expand the params list
 
 
 
93
  *whisper_params.as_list() # Expand whisper_params as individual arguments
94
  )
95
 
main_backup.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ from fastapi import FastAPI, File, UploadFile, Form
4
+ from fastapi.responses import FileResponse, JSONResponse
5
+ from typing import Optional
6
+ from modules.whisper.whisper_factory import WhisperFactory
7
+ from modules.whisper.whisper_parameter import WhisperParameters
8
+
9
+ app = FastAPI()
10
+
11
+ # Initialize Whisper inference engine
12
+ whisper_inf = WhisperFactory.create_whisper_inference(
13
+ whisper_type="faster-whisper", # Choose between "whisper", "faster-whisper", "insanely-fast-whisper"
14
+ whisper_model_dir=os.path.join("models", "Whisper"),
15
+ faster_whisper_model_dir=os.path.join("models", "Whisper", "faster-whisper"),
16
+ insanely_fast_whisper_model_dir=os.path.join("models", "Whisper", "insanely-fast-whisper"),
17
+ output_dir=os.path.join("outputs"),
18
+ )
19
+
20
+ @app.post("/transcribe/")
21
+ async def transcribe_video(
22
+ file: UploadFile = File(...),
23
+ model_size: str = Form("large-v2"),
24
+ language: str = Form("en"),
25
+ translate: bool = Form(False),
26
+ file_format: str = Form("SRT"), # Options: "SRT", "WebVTT", "txt"
27
+ add_timestamp: bool = Form(True)
28
+ ):
29
+ """
30
+ Upload a video/audio file and get the generated subtitle file as a response.
31
+ """
32
+ try:
33
+ # Create temporary directories
34
+ temp_dir = "temp"
35
+ os.makedirs(temp_dir, exist_ok=True)
36
+
37
+ # Save the uploaded file temporarily
38
+ input_file_path = os.path.join(temp_dir, file.filename)
39
+ with open(input_file_path, "wb") as buffer:
40
+ shutil.copyfileobj(file.file, buffer)
41
+
42
+ # Prepare whisper parameters
43
+ whisper_params = WhisperParameters(
44
+ model_size=model_size,
45
+ lang=language,
46
+ is_translate=translate,
47
+ beam_size=5,
48
+ log_prob_threshold=-1.0,
49
+ no_speech_threshold=0.6,
50
+ compute_type="float16", # or "int8_float16", etc.
51
+ best_of=5,
52
+ patience=1.0,
53
+ condition_on_previous_text=True,
54
+ initial_prompt=None,
55
+ temperature=0.0,
56
+ compression_ratio_threshold=2.4,
57
+ vad_filter=False,
58
+ threshold=0.5,
59
+ min_speech_duration_ms=250,
60
+ max_speech_duration_s=9999,
61
+ min_silence_duration_ms=2000,
62
+ speech_pad_ms=400,
63
+ chunk_length_s=None,
64
+ batch_size=None,
65
+ is_diarize=False,
66
+ hf_token=None,
67
+ diarization_device=None,
68
+ length_penalty=1.0,
69
+ repetition_penalty=1.0,
70
+ no_repeat_ngram_size=0,
71
+ prefix=None,
72
+ suppress_blank=True,
73
+ suppress_tokens="[-1]",
74
+ max_initial_timestamp=1.0,
75
+ word_timestamps=False,
76
+ prepend_punctuations="\"'“¿([{-",
77
+ append_punctuations="\"'.。,,!!??::”)]}、",
78
+ max_new_tokens=None,
79
+ chunk_length=None,
80
+ hallucination_silence_threshold=None,
81
+ hotwords=None,
82
+ language_detection_threshold=None,
83
+ language_detection_segments=1,
84
+ prompt_reset_on_temperature=0.5
85
+ )
86
+
87
+ # Transcribe the file
88
+ result_str, result_files = whisper_inf.transcribe_file(
89
+ files=[input_file_path],
90
+ input_folder_path="",
91
+ file_format=file_format,
92
+ add_timestamp=add_timestamp,
93
+ *whisper_params.as_list() # Expand whisper_params as individual arguments
94
+ )
95
+
96
+ # Check if transcription was successful
97
+ if not result_files:
98
+ return JSONResponse(status_code=500, content={"message": "Transcription failed."})
99
+
100
+ # Return the first result file
101
+ output_file_path = result_files[0]
102
+ return FileResponse(
103
+ path=output_file_path,
104
+ filename=os.path.basename(output_file_path),
105
+ media_type='application/octet-stream'
106
+ )
107
+ except Exception as e:
108
+ return JSONResponse(status_code=500, content={"message": str(e)})
109
+ finally:
110
+ # Clean up temporary files
111
+ if os.path.exists(input_file_path):
112
+ os.remove(input_file_path)
ui/__init__.py ADDED
File without changes
ui/htmls.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CSS = """
2
+ .bmc-button {
3
+ padding: 2px 5px;
4
+ border-radius: 5px;
5
+ background-color: #FF813F;
6
+ color: white;
7
+ box-shadow: 0px 1px 2px rgba(0, 0, 0, 0.3);
8
+ text-decoration: none;
9
+ display: inline-block;
10
+ font-size: 20px;
11
+ margin: 2px;
12
+ cursor: pointer;
13
+ -webkit-transition: background-color 0.3s ease;
14
+ -ms-transition: background-color 0.3s ease;
15
+ transition: background-color 0.3s ease;
16
+ }
17
+ .bmc-button:hover,
18
+ .bmc-button:active,
19
+ .bmc-button:focus {
20
+ background-color: #FF5633;
21
+ }
22
+ .markdown {
23
+ margin-bottom: 0;
24
+ padding-bottom: 0;
25
+ }
26
+ .tabs {
27
+ margin-top: 0;
28
+ padding-top: 0;
29
+ }
30
+
31
+ #md_project a {
32
+ color: black;
33
+ text-decoration: none;
34
+ }
35
+ #md_project a:hover {
36
+ text-decoration: underline;
37
+ }
38
+ """
39
+
40
+ MARKDOWN = """
41
+ ### [Whisper Web-UI](https://github.com/jhj0517/Whsiper-WebUI)
42
+ """
43
+
44
+
45
+ NLLB_VRAM_TABLE = """
46
+ <!DOCTYPE html>
47
+ <html lang="en">
48
+ <head>
49
+ <meta charset="UTF-8">
50
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
51
+ <style>
52
+ table {
53
+ border-collapse: collapse;
54
+ width: 100%;
55
+ }
56
+ th, td {
57
+ border: 1px solid #dddddd;
58
+ text-align: left;
59
+ padding: 8px;
60
+ }
61
+ th {
62
+ background-color: #f2f2f2;
63
+ }
64
+ </style>
65
+ </head>
66
+ <body>
67
+
68
+ <details>
69
+ <summary>VRAM usage for each model</summary>
70
+ <table>
71
+ <thead>
72
+ <tr>
73
+ <th>Model name</th>
74
+ <th>Required VRAM</th>
75
+ </tr>
76
+ </thead>
77
+ <tbody>
78
+ <tr>
79
+ <td>nllb-200-3.3B</td>
80
+ <td>~16GB</td>
81
+ </tr>
82
+ <tr>
83
+ <td>nllb-200-1.3B</td>
84
+ <td>~8GB</td>
85
+ </tr>
86
+ <tr>
87
+ <td>nllb-200-distilled-600M</td>
88
+ <td>~4GB</td>
89
+ </tr>
90
+ </tbody>
91
+ </table>
92
+ <p><strong>Note:</strong> Be mindful of your VRAM! The table above provides an approximate VRAM usage for each model.</p>
93
+ </details>
94
+
95
+ </body>
96
+ </html>
97
+ """