fdugyt commited on
Commit
ef285e9
·
verified ·
1 Parent(s): 4d05c28

Upload MOSS Audio Tokenizer v2

Browse files
.gitattributes CHANGED
@@ -46,4 +46,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
46
  *.wasm filter=lfs diff=lfs merge=lfs -text
47
  *.wav filter=lfs diff=lfs merge=lfs -text
48
  *.zst filter=lfs diff=lfs merge=lfs -text
49
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
46
  *.wasm filter=lfs diff=lfs merge=lfs -text
47
  *.wav filter=lfs diff=lfs merge=lfs -text
48
  *.zst filter=lfs diff=lfs merge=lfs -text
49
+ *tfevents* filter=lfs diff=lfs merge=lfs -text__pycache__/modeling_moss_audio_tokenizer.cpython-312.pyc filter=lfs diff=lfs merge=lfs -text
.gitignore CHANGED
@@ -1,2 +1,3 @@
1
  dev/*
2
  demo/demo_rec*.wav
 
 
1
  dev/*
2
  demo/demo_rec*.wav
3
+ __pycache__
__pycache__/configuration_moss_audio_tokenizer.cpython-312.pyc ADDED
Binary file (10.1 kB). View file
 
__pycache__/modeling_moss_audio_tokenizer.cpython-312.pyc ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67855dab364c1051dfb49491332e58e33b76db9151bf83a29af67b1cff8901aa
3
+ size 131137
configuration_moss_audio_tokenizer.py CHANGED
@@ -32,7 +32,7 @@ class MossAudioTokenizerConfig(PreTrainedConfig):
32
  MossAudioTokenizer model according to the specified arguments, defining the model architecture.
33
 
34
  Instantiating a configuration with the defaults will yield a similar configuration to that of the
35
- [VoiceAgentGroup/moss_audio_tokenizer](https://huggingface.co/VoiceAgentGroup/moss_audio_tokenizer) architecture.
36
 
37
  Configuration objects inherit from [`PreTrainedConfig`] and can be used to control the model outputs. Read the
38
  documentation from [`PreTrainedConfig`] for more information.
 
32
  MossAudioTokenizer model according to the specified arguments, defining the model architecture.
33
 
34
  Instantiating a configuration with the defaults will yield a similar configuration to that of the
35
+ [OpenMOSS-Team/MOSS-Audio-Tokenizer-v2](https://huggingface.co/OpenMOSS-Team/MOSS-Audio-Tokenizer-v2) architecture.
36
 
37
  Configuration objects inherit from [`PreTrainedConfig`] and can be used to control the model outputs. Read the
38
  documentation from [`PreTrainedConfig`] for more information.
modeling_moss_audio_tokenizer.py CHANGED
@@ -2472,7 +2472,7 @@ class MossAudioTokenizerModel(MossAudioTokenizerPreTrainedModel):
2472
  >>> model = MossAudioTokenizerModel.from_pretrained("OpenMOSS-Team/MOSS-Audio-Tokenizer-v2/")
2473
 
2474
  >>> # Create dummy audio input
2475
- >>> audio = torch.randn(1, 1, 24000) # 1 second of audio at 24kHz
2476
 
2477
  >>> outputs = model(input_values=audio)
2478
  >>> audio_codes = outputs.audio_codes
 
2472
  >>> model = MossAudioTokenizerModel.from_pretrained("OpenMOSS-Team/MOSS-Audio-Tokenizer-v2/")
2473
 
2474
  >>> # Create dummy audio input
2475
+ >>> audio = torch.randn(1, 2, 48000) # 1 second of audio at 48kHz stereo
2476
 
2477
  >>> outputs = model(input_values=audio)
2478
  >>> audio_codes = outputs.audio_codes