FFomy commited on
Commit
27e6518
·
verified ·
1 Parent(s): b54b744

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -30
app.py CHANGED
@@ -1,22 +1,31 @@
1
  import os
2
- # from huggingface_hub import snapshot_download
3
- from modelscope.hub.snapshot_download import snapshot_download
 
 
 
 
 
 
 
 
 
4
 
5
 
6
  # 1. 定义本地路径和远程仓库ID
7
  FUN_ASR_NANO_LOCAL_PATH = "./Fun-ASR/model"
8
- FUN_ASR_NANO_REPO_ID = "FunAudioLLM/Fun-ASR-Nano-2512"
9
  SENSE_VOICE_SMALL_LOCAL_PATH = "./Fun-ASR/model/SenseVoiceSmall"
10
- # SENSE_VOICE_SMALL_REPO_ID = "FunAudioLLM/SenseVoiceSmall"
11
- # REPO_TYPE = "hf" # "hf" for Hugging Face, "ms" for ModelScope
12
- SENSE_VOICE_SMALL_REPO_ID = "iic/SenseVoiceSmall"
13
- REPO_TYPE = "ms"
 
 
 
 
14
 
15
  # 2. 检查本地是否存在,不存在则下载
16
  if not os.path.exists(FUN_ASR_NANO_LOCAL_PATH):
17
- from modelscope import HubApi
18
- api= HubApi()
19
- api.login(os.getenv("MODELSCOPE_TOKEN"))
20
  print(f"正在下载模型 Fun-ASR-Nano 到 {FUN_ASR_NANO_LOCAL_PATH} ...")
21
  snapshot_download(
22
  repo_id=FUN_ASR_NANO_REPO_ID,
@@ -60,14 +69,10 @@ from funasr.utils.postprocess_utils import rich_transcription_postprocess
60
  # Model configurations for Hugging Face deployment
61
  FUN_ASR_NANO_MODEL_PATH_LIST = [
62
  "Fun-ASR/model", # local path, ms
63
- "FunAudioLLM/fun-asr-nano", # huggingface model repo, hf
64
- "FunAudioLLM/fun-asr-nano" # ModelScope model repo, ms
65
  ]
66
 
67
  SENSEVOICE_MODEL_PATH_LIST = [
68
  "Fun-ASR/model/SenseVoiceSmall", # local path together with this hf space
69
- "FunAudioLLM/SenseVoiceSmall", # huggingface model repo
70
- "iic/SenseVoiceSmall" # ModelScope model repo
71
  ]
72
 
73
  class LogCapture(io.StringIO):
@@ -401,6 +406,7 @@ def get_model_options(pipeline_type):
401
  # Dictionary to store loaded models
402
  loaded_models = {}
403
 
 
404
  def transcribe_audio(audio_input, audio_url, proxy_url, proxy_username, proxy_password, pipeline_type, model_id, download_method, start_time=None, end_time=None, verbose=False):
405
  """
406
  Transcribes audio from a given source using SenseVoice.
@@ -485,7 +491,7 @@ def transcribe_audio(audio_input, audio_url, proxy_url, proxy_username, proxy_pa
485
  vad_kwargs={"max_single_segment_time": 30000},
486
  device=device,
487
  disable_update=True,
488
- hub=REPO_TYPE,
489
  )
490
  elif pipeline_type == "sensevoice":
491
  model = AutoModel(
@@ -495,7 +501,7 @@ def transcribe_audio(audio_input, audio_url, proxy_url, proxy_username, proxy_pa
495
  vad_kwargs={"max_single_segment_time": 30000},
496
  device=device,
497
  disable_update=True,
498
- hub=REPO_TYPE,
499
  )
500
  else:
501
  error_msg = "Invalid pipeline type. Only 'sensevoice' is supported."
@@ -650,20 +656,7 @@ with gr.Blocks() as iface:
650
  2. **Select Pipeline Type**: Choose from available pipelines:
651
  - **Fun-ASR-Nano** (default) - Large language model based ASR model
652
  - **SenseVoice** - CTC-based based ASR model with VAD
653
-
654
- 3. **Available Model Options**:
655
-
656
- **For Fun-ASR-Nano:**
657
- - `Fun-ASR/model` (local path, default)
658
- - `FunAudioLLM/fun-asr-nano` (HuggingFace)
659
- - `FunAudioLLM/fun-asr-nano` (ModelScope)
660
-
661
- **For SenseVoice:**
662
- - `Fun-ASR/model/SenseVoiceSmall` (local path, default for this pipeline)
663
- - `FunAudioLLM/SenseVoiceSmall` (HuggingFace)
664
- - `iic/SenseVoiceSmall` (ModelScope)
665
-
666
- 4. **Local Testing**: For development, you can use local paths as shown above
667
 
668
  Supported languages:
669
  - Fun-ASR-Nano: more than 50 languages and Chinese dialects.
 
1
  import os
2
+ import spaces
3
+
4
+ REPO_TYPE = "hf"
5
+ if REPO_TYPE not in ["hf", "ms"]:
6
+ raise ValueError("REPO_TYPE must be either 'hf' for Hugging Face or 'ms' for ModelScope.")
7
+
8
+ if REPO_TYPE == "hf":
9
+ from huggingface_hub import snapshot_download
10
+ else:
11
+ from modelscope.hub.snapshot_download import snapshot_download
12
+
13
 
14
 
15
  # 1. 定义本地路径和远程仓库ID
16
  FUN_ASR_NANO_LOCAL_PATH = "./Fun-ASR/model"
 
17
  SENSE_VOICE_SMALL_LOCAL_PATH = "./Fun-ASR/model/SenseVoiceSmall"
18
+
19
+
20
+ if REPO_TYPE == "ms":
21
+ FUN_ASR_NANO_REPO_ID = "FunAudioLLM/Fun-ASR-Nano-2512"
22
+ SENSE_VOICE_SMALL_REPO_ID = "iic/SenseVoiceSmall"
23
+ else:
24
+ FUN_ASR_NANO_REPO_ID = "FunAudioLLM/Fun-ASR-Nano-2512"
25
+ SENSE_VOICE_SMALL_REPO_ID = "FunAudioLLM/SenseVoiceSmall"
26
 
27
  # 2. 检查本地是否存在,不存在则下载
28
  if not os.path.exists(FUN_ASR_NANO_LOCAL_PATH):
 
 
 
29
  print(f"正在下载模型 Fun-ASR-Nano 到 {FUN_ASR_NANO_LOCAL_PATH} ...")
30
  snapshot_download(
31
  repo_id=FUN_ASR_NANO_REPO_ID,
 
69
  # Model configurations for Hugging Face deployment
70
  FUN_ASR_NANO_MODEL_PATH_LIST = [
71
  "Fun-ASR/model", # local path, ms
 
 
72
  ]
73
 
74
  SENSEVOICE_MODEL_PATH_LIST = [
75
  "Fun-ASR/model/SenseVoiceSmall", # local path together with this hf space
 
 
76
  ]
77
 
78
  class LogCapture(io.StringIO):
 
406
  # Dictionary to store loaded models
407
  loaded_models = {}
408
 
409
+ @spaces.GPU()
410
  def transcribe_audio(audio_input, audio_url, proxy_url, proxy_username, proxy_password, pipeline_type, model_id, download_method, start_time=None, end_time=None, verbose=False):
411
  """
412
  Transcribes audio from a given source using SenseVoice.
 
491
  vad_kwargs={"max_single_segment_time": 30000},
492
  device=device,
493
  disable_update=True,
494
+ hub='ms',
495
  )
496
  elif pipeline_type == "sensevoice":
497
  model = AutoModel(
 
501
  vad_kwargs={"max_single_segment_time": 30000},
502
  device=device,
503
  disable_update=True,
504
+ hub='ms',
505
  )
506
  else:
507
  error_msg = "Invalid pipeline type. Only 'sensevoice' is supported."
 
656
  2. **Select Pipeline Type**: Choose from available pipelines:
657
  - **Fun-ASR-Nano** (default) - Large language model based ASR model
658
  - **SenseVoice** - CTC-based based ASR model with VAD
659
+ 3. **Local Testing**: For development, you can use local paths as shown above
 
 
 
 
 
 
 
 
 
 
 
 
 
660
 
661
  Supported languages:
662
  - Fun-ASR-Nano: more than 50 languages and Chinese dialects.