liuyang commited on
Commit
c97acaf
·
1 Parent(s): 6c3a671

modify params

Browse files
Files changed (1) hide show
  1. app.py +7 -5
app.py CHANGED
@@ -948,11 +948,11 @@ class WhisperTranscriber:
948
  # Step 1: Preprocess per chunk JSON
949
  print("Preprocessing chunk JSON...")
950
  pre_meta = self.preprocess_from_task_json(task_json)
951
- transcribe_options = pre_meta.get("options", None)
952
  if isinstance(pre_meta, list):
953
- return self.transcribe_segments(pre_meta, language, translate, prompt, batch_size, model_name, transcribe_options)
954
  elif isinstance(pre_meta, dict) and "chunk" in pre_meta:
955
- return self.transcribe_chunk(pre_meta, language, translate, prompt, batch_size, model_name, transcribe_options)
956
  except Exception as e:
957
  import traceback
958
  traceback.print_exc()
@@ -961,12 +961,13 @@ class WhisperTranscriber:
961
 
962
  @spaces.GPU
963
  def transcribe_chunk(self, pre_meta, language=None,
964
- translate=False, prompt=None, batch_size=8, model_name: str = DEFAULT_MODEL, transcribe_options: dict = None):
965
  """Main processing function with diarization using task JSON for a single chunk.
966
 
967
  Transcribes full (preprocessed) audio once, performs diarization, merges speakers into transcription.
968
  """
969
  try:
 
970
  print("Transcribing chunk...")
971
  # Step 1: Preprocess per chunk JSON
972
  if pre_meta["chunk"].get("skip"):
@@ -1008,7 +1009,7 @@ class WhisperTranscriber:
1008
 
1009
  @spaces.GPU
1010
  def transcribe_segments(self, pre_metas, language=None,
1011
- translate=False, prompt=None, batch_size=8, model_name: str = DEFAULT_MODEL, transcribe_options: dict = None):
1012
  """Main processing function with diarization using task JSON for a single chunk.
1013
 
1014
  Transcribes full (preprocessed) audio once, performs diarization, merges speakers into transcription.
@@ -1018,6 +1019,7 @@ class WhisperTranscriber:
1018
  transcription_results = []
1019
  # Step 1: Preprocess per chunk JSON
1020
  for pre_meta in pre_metas:
 
1021
  chunk = pre_meta["chunk"]
1022
  if chunk.get("skip"):
1023
  return {"segments": [], "language": "unknown", "num_speakers": 0, "transcription_method": "diarized_segments_batched", "batch_size": batch_size}
 
948
  # Step 1: Preprocess per chunk JSON
949
  print("Preprocessing chunk JSON...")
950
  pre_meta = self.preprocess_from_task_json(task_json)
951
+ #transcribe_options = pre_meta.get("options", None)
952
  if isinstance(pre_meta, list):
953
+ return self.transcribe_segments(pre_meta, language, translate, prompt, batch_size, model_name)
954
  elif isinstance(pre_meta, dict) and "chunk" in pre_meta:
955
+ return self.transcribe_chunk(pre_meta, language, translate, prompt, batch_size, model_name)
956
  except Exception as e:
957
  import traceback
958
  traceback.print_exc()
 
961
 
962
  @spaces.GPU
963
  def transcribe_chunk(self, pre_meta, language=None,
964
+ translate=False, prompt=None, batch_size=8, model_name: str = DEFAULT_MODEL):
965
  """Main processing function with diarization using task JSON for a single chunk.
966
 
967
  Transcribes full (preprocessed) audio once, performs diarization, merges speakers into transcription.
968
  """
969
  try:
970
+ transcribe_options = pre_meta.get("options", None)
971
  print("Transcribing chunk...")
972
  # Step 1: Preprocess per chunk JSON
973
  if pre_meta["chunk"].get("skip"):
 
1009
 
1010
  @spaces.GPU
1011
  def transcribe_segments(self, pre_metas, language=None,
1012
+ translate=False, prompt=None, batch_size=8, model_name: str = DEFAULT_MODEL):
1013
  """Main processing function with diarization using task JSON for a single chunk.
1014
 
1015
  Transcribes full (preprocessed) audio once, performs diarization, merges speakers into transcription.
 
1019
  transcription_results = []
1020
  # Step 1: Preprocess per chunk JSON
1021
  for pre_meta in pre_metas:
1022
+ transcribe_options = pre_meta.get("options", None)
1023
  chunk = pre_meta["chunk"]
1024
  if chunk.get("skip"):
1025
  return {"segments": [], "language": "unknown", "num_speakers": 0, "transcription_method": "diarized_segments_batched", "batch_size": batch_size}