FFomy commited on
Commit
d666310
·
verified ·
1 Parent(s): 061cbc3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -21
app.py CHANGED
@@ -438,8 +438,7 @@ def get_model_options(pipeline_type):
438
  # else:
439
  # return []
440
 
441
- # Dictionary to store loaded models
442
- loaded_models = {}
443
 
444
  @spaces.GPU(duration=40)
445
  def transcribe_audio(audio_input, audio_url, proxy_url, proxy_username, proxy_password, pipeline_type, model_id, download_method, start_time=None, end_time=None, verbose=False):
@@ -511,24 +510,18 @@ def transcribe_audio(audio_input, audio_url, proxy_url, proxy_username, proxy_pa
511
  if verbose:
512
  yield verbose_messages, "", None
513
 
514
- # Model caching
515
- model_key = (pipeline_type, model_id)
516
- if model_key in loaded_models:
517
- model = loaded_models[model_key]
518
- logging.info("Loaded model from cache")
 
 
519
  else:
520
- if pipeline_type == "fun-asr-nano":
521
- model = MODEL_FUN_ASR
522
- logging.info("Using pre-initialized Fun-ASR-Nano model")
523
- elif pipeline_type == "sensevoice":
524
- model = MODEL_SENSE_VOICE
525
- logging.info("Using pre-initialized SenseVoice model")
526
- else:
527
- error_msg = "Invalid pipeline type. Only 'sensevoice' is supported."
528
- logging.error(error_msg)
529
- yield verbose_messages + error_msg, "", None
530
- return
531
- loaded_models[model_key] = model
532
 
533
  # Perform the transcription
534
  start_time_perf = time.time()
@@ -538,8 +531,6 @@ def transcribe_audio(audio_input, audio_url, proxy_url, proxy_username, proxy_pa
538
  input=[audio_path],
539
  use_itn=True,
540
  batch_size=1,
541
- merge_vad=True,
542
- merge_length_s=15,
543
  )
544
  elif pipeline_type == "sensevoice":
545
  res = model.generate(
 
438
  # else:
439
  # return []
440
 
441
+
 
442
 
443
  @spaces.GPU(duration=40)
444
  def transcribe_audio(audio_input, audio_url, proxy_url, proxy_username, proxy_password, pipeline_type, model_id, download_method, start_time=None, end_time=None, verbose=False):
 
510
  if verbose:
511
  yield verbose_messages, "", None
512
 
513
+
514
+ if pipeline_type == "fun-asr-nano":
515
+ model = MODEL_FUN_ASR
516
+ logging.info("Using pre-initialized Fun-ASR-Nano model")
517
+ elif pipeline_type == "sensevoice":
518
+ model = MODEL_SENSE_VOICE
519
+ logging.info("Using pre-initialized SenseVoice model")
520
  else:
521
+ error_msg = "Invalid pipeline type. Only 'sensevoice' is supported."
522
+ logging.error(error_msg)
523
+ yield verbose_messages + error_msg, "", None
524
+ return
 
 
 
 
 
 
 
 
525
 
526
  # Perform the transcription
527
  start_time_perf = time.time()
 
531
  input=[audio_path],
532
  use_itn=True,
533
  batch_size=1,
 
 
534
  )
535
  elif pipeline_type == "sensevoice":
536
  res = model.generate(