rafat0421 commited on
Commit
b74bc53
·
1 Parent(s): 22ee735

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -23,7 +23,7 @@ def get_timestamp(seconds):
23
  seconds = int(seconds % 60)
24
  return f"{str(minutes).zfill(2)}:{str(seconds).zfill(2)}"
25
 
26
- def divide_into_30s_segments(audio_fpath, seconds_max):
27
  """
28
  Divides the audio file into 30s segments and returns the paths to the segments and the start times of the segments.
29
  :param audio_fpath: Path to the audio file.
@@ -82,7 +82,7 @@ def transcribe(audio, url, seconds_max):
82
  """
83
  if url:
84
  fpath = download_from_youtube(url)
85
- segment_paths, segment_start_times = divide_into_30s_segments(fpath, seconds_max)
86
 
87
  audio_dataset = Dataset.from_dict({"audio": segment_paths}).cast_column("audio", Audio(sampling_rate=16000))
88
  pred = pipe(audio_dataset["audio"])
@@ -106,8 +106,8 @@ iface = gr.Interface(
106
  gr.Slider(minimum=30, maximum=300, value=30, step=30, label="Number of seconds to transcribe from YouTube URL")
107
  ],
108
  outputs="text",
109
- title="Whisper Small Swedish",
110
- description="Realtime demo for Swedish speech recognition using a fine-tuned Whisper small model.",
111
  )
112
 
113
  iface.launch()
 
23
  seconds = int(seconds % 60)
24
  return f"{str(minutes).zfill(2)}:{str(seconds).zfill(2)}"
25
 
26
+ def create_segments(audio_fpath, seconds_max):
27
  """
28
  Divides the audio file into 30s segments and returns the paths to the segments and the start times of the segments.
29
  :param audio_fpath: Path to the audio file.
 
82
  """
83
  if url:
84
  fpath = download_from_youtube(url)
85
+ segment_paths, segment_start_times = create_segments(fpath, seconds_max)
86
 
87
  audio_dataset = Dataset.from_dict({"audio": segment_paths}).cast_column("audio", Audio(sampling_rate=16000))
88
  pred = pipe(audio_dataset["audio"])
 
106
  gr.Slider(minimum=30, maximum=300, value=30, step=30, label="Number of seconds to transcribe from YouTube URL")
107
  ],
108
  outputs="text",
109
+ title="Whisper: transcribe Swedish language audio to text",
110
+ description="Swedish Text Transcription using Transformers.",
111
  )
112
 
113
  iface.launch()