dahyedahye commited on
Commit
c2d3acc
·
1 Parent(s): ae3884d

Add application file

Browse files
Files changed (1) hide show
  1. main.py +96 -22
main.py CHANGED
@@ -1,38 +1,112 @@
1
- from fastapi import FastAPI, File, UploadFile
2
- from fastapi.responses import FileResponse
3
  import os
4
  import shutil
 
 
 
5
  from modules.whisper.whisper_factory import WhisperFactory
 
6
 
7
  app = FastAPI()
8
 
9
  # Initialize Whisper inference engine
10
  whisper_inf = WhisperFactory.create_whisper_inference(
11
- whisper_type="faster-whisper",
12
  whisper_model_dir=os.path.join("models", "Whisper"),
13
  faster_whisper_model_dir=os.path.join("models", "Whisper", "faster-whisper"),
14
  insanely_fast_whisper_model_dir=os.path.join("models", "Whisper", "insanely-fast-whisper"),
15
  output_dir=os.path.join("outputs"),
16
  )
17
 
18
- @app.post("/upload-video/")
19
- async def upload_video(file: UploadFile = File(...)):
 
 
 
 
 
 
 
20
  """
21
- Upload a video file and get the generated SRT file as a response.
22
  """
23
- # Save the uploaded video file temporarily
24
- input_video_path = os.path.join("temp", file.filename)
25
- os.makedirs("temp", exist_ok=True)
26
-
27
- with open(input_video_path, "wb") as buffer:
28
- shutil.copyfileobj(file.file, buffer)
29
-
30
- # Generate the subtitle file
31
- output_srt_path = whisper_inf.transcribe_file(
32
- input_video_path,
33
- file_format="SRT",
34
- add_timestamp=True
35
- )
36
-
37
- # Return the SRT file as a response
38
- return FileResponse(path=output_srt_path, filename=os.path.basename(output_srt_path))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import shutil
3
+ from fastapi import FastAPI, File, UploadFile, Form
4
+ from fastapi.responses import FileResponse, JSONResponse
5
+ from typing import Optional
6
  from modules.whisper.whisper_factory import WhisperFactory
7
+ from modules.whisper.whisper_parameter import WhisperParameters
8
 
9
  app = FastAPI()
10
 
11
  # Initialize Whisper inference engine
12
  whisper_inf = WhisperFactory.create_whisper_inference(
13
+ whisper_type="faster-whisper", # Choose between "whisper", "faster-whisper", "insanely-fast-whisper"
14
  whisper_model_dir=os.path.join("models", "Whisper"),
15
  faster_whisper_model_dir=os.path.join("models", "Whisper", "faster-whisper"),
16
  insanely_fast_whisper_model_dir=os.path.join("models", "Whisper", "insanely-fast-whisper"),
17
  output_dir=os.path.join("outputs"),
18
  )
19
 
20
+ @app.post("/transcribe/")
21
+ async def transcribe_video(
22
+ file: UploadFile = File(...),
23
+ model_size: str = Form("large-v2"),
24
+ language: str = Form("en"),
25
+ translate: bool = Form(False),
26
+ file_format: str = Form("SRT"), # Options: "SRT", "WebVTT", "txt"
27
+ add_timestamp: bool = Form(True)
28
+ ):
29
  """
30
+ Upload a video/audio file and get the generated subtitle file as a response.
31
  """
32
+ try:
33
+ # Create temporary directories
34
+ temp_dir = "temp"
35
+ os.makedirs(temp_dir, exist_ok=True)
36
+
37
+ # Save the uploaded file temporarily
38
+ input_file_path = os.path.join(temp_dir, file.filename)
39
+ with open(input_file_path, "wb") as buffer:
40
+ shutil.copyfileobj(file.file, buffer)
41
+
42
+ # Prepare whisper parameters
43
+ whisper_params = WhisperParameters(
44
+ model_size=model_size,
45
+ lang=language,
46
+ is_translate=translate,
47
+ beam_size=5,
48
+ log_prob_threshold=-1.0,
49
+ no_speech_threshold=0.6,
50
+ compute_type="float16", # or "int8_float16", etc.
51
+ best_of=5,
52
+ patience=1.0,
53
+ condition_on_previous_text=True,
54
+ initial_prompt=None,
55
+ temperature=0.0,
56
+ compression_ratio_threshold=2.4,
57
+ vad_filter=False,
58
+ threshold=0.5,
59
+ min_speech_duration_ms=250,
60
+ max_speech_duration_s=9999,
61
+ min_silence_duration_ms=2000,
62
+ speech_pad_ms=400,
63
+ chunk_length_s=None,
64
+ batch_size=None,
65
+ is_diarize=False,
66
+ hf_token=None,
67
+ diarization_device=None,
68
+ length_penalty=1.0,
69
+ repetition_penalty=1.0,
70
+ no_repeat_ngram_size=0,
71
+ prefix=None,
72
+ suppress_blank=True,
73
+ suppress_tokens="[-1]",
74
+ max_initial_timestamp=1.0,
75
+ word_timestamps=False,
76
+ prepend_punctuations="\"'“¿([{-",
77
+ append_punctuations="\"'.。,,!!??::”)]}、",
78
+ max_new_tokens=None,
79
+ chunk_length=None,
80
+ hallucination_silence_threshold=None,
81
+ hotwords=None,
82
+ language_detection_threshold=None,
83
+ language_detection_segments=1,
84
+ prompt_reset_on_temperature=0.5
85
+ )
86
+
87
+ # Transcribe the file
88
+ result_str, result_files = whisper_inf.transcribe_file(
89
+ files=[input_file_path],
90
+ input_folder_path="",
91
+ file_format=file_format,
92
+ add_timestamp=add_timestamp,
93
+ whisper_params=whisper_params
94
+ )
95
+
96
+ # Check if transcription was successful
97
+ if not result_files:
98
+ return JSONResponse(status_code=500, content={"message": "Transcription failed."})
99
+
100
+ # Return the first result file
101
+ output_file_path = result_files[0]
102
+ return FileResponse(
103
+ path=output_file_path,
104
+ filename=os.path.basename(output_file_path),
105
+ media_type='application/octet-stream'
106
+ )
107
+ except Exception as e:
108
+ return JSONResponse(status_code=500, content={"message": str(e)})
109
+ finally:
110
+ # Clean up temporary files
111
+ if os.path.exists(input_file_path):
112
+ os.remove(input_file_path)