shethjenil commited on Jul 26, 2025

Commit

0013177

verified ·

1 Parent(s): 7a459a0

Upload folder using huggingface_hub

Browse files

Files changed (41) hide show

.gitattributes +10 -0
omnizart/beat/beat_blstm/configurations.yaml +58 -0
omnizart/beat/beat_blstm/saved_model.pb +3 -0
omnizart/beat/beat_blstm/variables/variables.data-00000-of-00001 +3 -0
omnizart/beat/beat_blstm/variables/variables.index +0 -0
omnizart/chord/chord_v1/configurations.yaml +119 -0
omnizart/chord/chord_v1/saved_model.pb +3 -0
omnizart/chord/chord_v1/variables/variables.data-00000-of-00001 +3 -0
omnizart/chord/chord_v1/variables/variables.index +0 -0
omnizart/drum/drum_keras/configurations.yaml +115 -0
omnizart/drum/drum_keras/saved_model.pb +3 -0
omnizart/drum/drum_keras/variables/variables.data-00000-of-00001 +3 -0
omnizart/drum/drum_keras/variables/variables.index +0 -0
omnizart/music/music_note_stream/configurations.yaml +86 -0
omnizart/music/music_note_stream/saved_model.pb +3 -0
omnizart/music/music_note_stream/variables/variables.data-00000-of-00001 +3 -0
omnizart/music/music_note_stream/variables/variables.index +0 -0
omnizart/music/music_piano-v2/configurations.yaml +88 -0
omnizart/music/music_piano-v2/saved_model.pb +3 -0
omnizart/music/music_piano-v2/variables/variables.data-00000-of-00001 +3 -0
omnizart/music/music_piano-v2/variables/variables.index +0 -0
omnizart/music/music_piano/configurations.yaml +166 -0
omnizart/music/music_piano/saved_model.pb +3 -0
omnizart/music/music_piano/variables/variables.data-00000-of-00001 +3 -0
omnizart/music/music_piano/variables/variables.index +0 -0
omnizart/music/music_pop/configurations.yaml +88 -0
omnizart/music/music_pop/saved_model.pb +3 -0
omnizart/music/music_pop/variables/variables.data-00000-of-00001 +3 -0
omnizart/music/music_pop/variables/variables.index +0 -0
omnizart/patch_cnn/patch_cnn_melody/configurations.yaml +118 -0
omnizart/patch_cnn/patch_cnn_melody/saved_model.pb +3 -0
omnizart/patch_cnn/patch_cnn_melody/variables/variables.data-00000-of-00001 +3 -0
omnizart/patch_cnn/patch_cnn_melody/variables/variables.index +0 -0
omnizart/vocal/vocal_contour/configurations.yaml +42 -0
omnizart/vocal/vocal_contour/saved_model.pb +3 -0
omnizart/vocal/vocal_contour/variables/variables.data-00000-of-00001 +3 -0
omnizart/vocal/vocal_contour/variables/variables.index +0 -0
omnizart/vocal/vocal_semi/configurations.yaml +150 -0
omnizart/vocal/vocal_semi/saved_model.pb +3 -0
omnizart/vocal/vocal_semi/variables/variables.data-00000-of-00001 +3 -0
omnizart/vocal/vocal_semi/variables/variables.index +0 -0

.gitattributes CHANGED Viewed

@@ -35,3 +35,13 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 basicpitch/nmp/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
 basicpitch/nmp_vamp_plugin/ircambasicpitch.so filter=lfs diff=lfs merge=lfs -text

 *tfevents* filter=lfs diff=lfs merge=lfs -text
 basicpitch/nmp/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
 basicpitch/nmp_vamp_plugin/ircambasicpitch.so filter=lfs diff=lfs merge=lfs -text
+omnizart/beat/beat_blstm/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
+omnizart/chord/chord_v1/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
+omnizart/drum/drum_keras/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
+omnizart/music/music_note_stream/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
+omnizart/music/music_piano/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
+omnizart/music/music_piano-v2/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
+omnizart/music/music_pop/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
+omnizart/patch_cnn/patch_cnn_melody/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
+omnizart/vocal/vocal_contour/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
+omnizart/vocal/vocal_semi/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text

omnizart/beat/beat_blstm/configurations.yaml ADDED Viewed

	@@ -0,0 +1,58 @@

+General:
+  CheckpointPath:
+    Value:
+      BLSTM: checkpoints/beat/beat_blstm
+  Dataset:
+    Settings:
+      FeatureSavePath:
+        Value: +
+      SavePath:
+        Value: ./
+  Feature:
+    Settings:
+      TimeUnit:
+        Value: 0.01
+  Inference:
+    Settings:
+      BeatThreshold:
+        Value: 0.5
+      DownBeatThreshold:
+        Value: 0.3
+      MinDistance:
+        Value: 0.3
+  Model:
+    Settings:
+      AttnHiddenDim:
+        Value: 256
+      LstmHiddenDim:
+        Value: 25
+      ModelType:
+        Value: blstm
+      NumLstmLayers:
+        Value: 2
+      SavePath:
+        Value: ./checkpoints/beat
+      SavePrefix:
+        Value: beat
+      Timesteps:
+        Value: 1000
+  Training:
+    Settings:
+      BatchSize:
+        Value: 64
+      DownBeatWeight:
+        Value: 5
+      EarlyStop:
+        Value: 4
+      Epoch:
+        Value: 10
+      InitLearningRate:
+        Value: 0.001
+      Steps:
+        Value: 2000
+      ValBatchSize:
+        Value: 64
+      ValSteps:
+        Value: 200
+  TranscriptionMode:
+    Value: BLSTM

omnizart/beat/beat_blstm/saved_model.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a0bff208bd559b49df510f2e5177e28b755a29ae36ac690377e00529971417b
+size 5077079

omnizart/beat/beat_blstm/variables/variables.data-00000-of-00001 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:673692c2d4cff145bcaad5396d61e480aa442c86f1ac928be2da18568a306057
+size 400241555

omnizart/beat/beat_blstm/variables/variables.index ADDED Viewed

Binary file (1.23 kB). View file

omnizart/chord/chord_v1/configurations.yaml ADDED Viewed

	@@ -0,0 +1,119 @@

+General:
+    TranscriptionMode:
+        Description: Mode of transcription by executing the `omnizart_fix chord transcribe` command.
+        Type: String
+        Value: ChordV1
+    CheckpointPath:
+        Description: Path to the pre-trained models.
+        Type: Map
+        SubType: [String, String]
+        Value:
+            ChordV1: checkpoints/chord/chord_v1
+    Feature:
+        Description: Default settings of feature extraction for drum transcription.
+        Settings:
+            SegmentWidth:
+                Description: Width of segments. Each frame last for 0.046 seconds, and thus each segment would last for around 0.5 seconds.
+                Type: Integer
+                Value: 21
+            SegmentHop:
+                Description: Hop size of the segment.
+                Type: Integer
+                Value: 5
+            NumSteps:
+                Description: Number of total steps. Default setting would have around 23 seconds.
+                Type: Integer
+                Value: 100
+    Dataset:
+        Description: Settings of datasets.
+        Settings:
+            SavePath:
+                Description: Path for storing the downloaded datasets.
+                Type: String
+                Value: ./
+            FeatureSavePath:
+                Description: Path for storing the extracted feature. Default to the path under the dataset folder.
+                Type: String
+                Value: +
+    Model:
+        Description: Default settings of training / testing the model.
+        Settings:
+            SavePrefix:
+                Description: Prefix of the trained model's name to be saved.
+                Type: String
+                Value: chord
+            SavePath:
+                Description: Path to save the trained model.
+                Type: String
+                Value: ./checkpoints/chord
+            NumEncAttnBlocks:
+                Description: Number of attention blocks for encoder.
+                Type: Integer
+                Value: 2
+            NumDecAttnBlocks:
+                Description: Number of attention blocks for decoder.
+                Type: Integer
+                Value: 2
+            FreqSize:
+                Description: Available size on the frequency axis to be seen.
+                Type: Integer
+                Value: 24
+            EncInputEmbSize:
+                Description: Embedding size of the encoder's input.
+                Type: Integer
+                Value: 512
+            DecInputEmbSize:
+                Description: Embedding size of the decoder's input.
+                Type: Integer
+                Value: 512
+            DropoutRate:
+                Description: Dropout rate of all dropout layers.
+                Type: Float
+                Value: 0.6
+            AnnealingRate:
+                Description: To be added...
+                Type: Float
+                Value: 1.1
+    Inference:
+        Description: Default settings when infering notes.
+        Settings:
+            MinDura:
+                Description: Minimum duration (in seconds) for each chord. If shorter than expected, will append the duration to the previous chord.
+                Type: Float
+                Value: 0.1
+    Training:
+        Description: Hyper parameters for training
+        Settings:
+            Epoch:
+                Description: Maximum number of epochs for training.
+                Type: Integer
+                Value: 10
+            Steps:
+                Description: Number of training steps for each epoch.
+                Type: Integer
+                Value: 1000
+            ValSteps:
+                Description: Number of validation steps after each training epoch.
+                Type: Integer
+                Value: 500
+            BatchSize:
+                Description: Batch size of each training step.
+                Type: Integer
+                Value: 32
+            ValBatchSize:
+                Description: Batch size of each validation step.
+                Type: Integer
+                Value: 32
+            EarlyStop:
+                Description: Terminate the training if the validation performance doesn't imrove after n epochs.
+                Type: Integer
+                Value: 4
+            InitLearningRate:
+                Descriptoin: Initial learning rate.
+                Type: Float
+                Value: 0.0001
+            LearningRateDecay:
+                Description: Decaying rate of learning rate per epoch.
+                Type: Float
+                Value: 0.96

omnizart/chord/chord_v1/saved_model.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:489a3db29d528d67d1b7743a0301a4ab81f0803bfcbb0e819615eb64ec67203b
+size 23098326

omnizart/chord/chord_v1/variables/variables.data-00000-of-00001 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ba35defa2a043c3cf9d3ef9512a83a6fe6bb62cd247026de50f58bdc0e71317d
+size 44242072

omnizart/chord/chord_v1/variables/variables.index ADDED Viewed

Binary file (7.13 kB). View file

omnizart/drum/drum_keras/configurations.yaml ADDED Viewed

	@@ -0,0 +1,115 @@

+General:
+    TranscriptionMode:
+        Description: Mode of transcription by executing the `omnizart_fix transribe drum` command.
+        Type: String
+        Value: keras
+    CheckpointPath:
+        Description: Path to the pre-trained models.
+        Type: Map
+        SubType: [String, String]
+        Value:
+            keras: ./checkpoints/drum/drum_keras
+    Feature:
+        Description: Default settings of feature extraction for drum transcription.
+        Settings:
+            SamplingRate:
+                Description: Adjust input sampling rate to this value.
+                Type: Integer
+                Value: 44100
+            PaddingSeconds:
+                Description: Padding length to the begin and the end of the raw audio data.
+                Type: Float
+                Value: 1.0
+            LowestNote:
+                Description: Lowest MIDI note number to be considered.
+                Type: Integer
+                Value: 16
+            NumberOfNotes:
+                Description: Number of total notes to extract.
+                Type: Integer
+                Value: 120
+            HopSize:
+                Description: Hop size for computing CQT feature.
+                Type: Integer
+                Value: 256
+            MiniBeatPerBar:
+                Description: Number of mini beats in a single 4/4 measure.
+                Type: Integer
+                Value: 32
+            MiniBeatPerSegment:
+                Description: Number of mini beats in a single 4/4 measure.
+                Type: Integer
+                Value: 4
+    Dataset:
+        Description: Settings of datasets.
+        Settings:
+            SavePath:
+                Description: Path for storing the downloaded datasets.
+                Type: String
+                Value: ./
+            FeatureSavePath:
+                Description: Path for storing the extracted feature. Default to the path under the dataset folder.
+                Type: String
+                Value: +
+    Model:
+        Description: Default settings of training / testing the model.
+        Settings:
+            SavePrefix:
+                Description: Prefix of the trained model's name to be saved.
+                Type: String
+                Value: drum
+            SavePath:
+                Description: Path to save the trained model.
+                Type: String
+                Value: ./checkpoints/drum
+    Inference:
+        Description: Default settings when infereing notes.
+        Settings:
+            BassDrumTh:
+                Description: Threshold for the bass drum.
+                Type: Float
+                Value: 0.85
+            SnareTh:
+                Description: Threshold for the snare.
+                Type: Float
+                Value: 1.2
+            HihatTh:
+                Description: Threshold for the hihat.
+                Type: Float
+                Value: 0.17
+    Training:
+        Description: Hyper parameters for training
+        Settings:
+            Epoch:
+                Description: Maximum number of epochs for training.
+                Type: Integer
+                Value: 50
+            Steps:
+                Description: Number of training steps for each epoch.
+                Type: Integer
+                Value: 1000
+            ValSteps:
+                Description: Number of validation steps after each training epoch.
+                Type: Integer
+                Value: 100
+            BatchSize:
+                Description: Batch size of each training step.
+                Type: Integer
+                Value: 32
+            ValBatchSize:
+                Description: Batch size of each validation step.
+                Type: Integer
+                Value: 32
+            EarlyStop:
+                Description: Terminate the training if the validation performance doesn't imrove after n epochs.
+                Type: Integer
+                Value: 6
+            InitLearningRate:
+                Descriptoin: Initial learning rate.
+                Type: Float
+                Value: 0.00002
+            ResBlockNum:
+                Description: Number of residual blocks.
+                Type: Integer
+                Value: 3

omnizart/drum/drum_keras/saved_model.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6536f2fd67472cf8fcaea89888410eb6c09b4e3ac72664cb9fffe7441aa6559b
+size 1829175

omnizart/drum/drum_keras/variables/variables.data-00000-of-00001 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e5105215ec8bdee5ebba57c2eef8655a310de99da9f3169b3f857e2185ab464
+size 31090686

omnizart/drum/drum_keras/variables/variables.index ADDED Viewed

Binary file (5.02 kB). View file

omnizart/music/music_note_stream/configurations.yaml ADDED Viewed

	@@ -0,0 +1,86 @@

+General:
+  CheckpointPath:
+    Value:
+      Piano: ./checkpoints/music/music_piano
+      Pop: ./checkpoints/music/music_pop
+  Dataset:
+    Settings:
+      FeatureSavePath:
+        Value: +
+      FeatureType:
+        Value: CFP
+      SavePath:
+        Value: ./
+  Feature:
+    Settings:
+      BinsPerOctave:
+        Value: 48
+      FrequencyCenter:
+        Value: 27.5
+      FrequencyResolution:
+        Value: 2.0
+      Gamma:
+        Value:
+        - 0.24
+        - 0.6
+        - 1.0
+      Harmonic:
+        Value: false
+      HarmonicNumber:
+        Value: 6
+      HopSize:
+        Value: 0.02
+      SamplingRate:
+        Value: 44100
+      TimeCenter:
+        Value: 0.00022287
+      WindowSize:
+        Value: 7939
+  Inference:
+    Settings:
+      DuraTh:
+        Value: 0.5
+      FrameTh:
+        Value: 0.5
+      InstTh:
+        Value: 1.1
+      MinLength:
+        Value: 0.05
+      OnsetTh:
+        Value: 6.0
+  Model:
+    Settings:
+      ModelType:
+        Value: attn
+      SavePath:
+        Value: ./checkpoints/music
+      SavePrefix:
+        Value: music
+  Training:
+    Settings:
+      BatchSize:
+        Value: 8
+      Channels:
+        Value:
+        - Spec
+        - Ceps
+      EarlyStop:
+        Value: 6
+      Epoch:
+        Value: 20
+      LabelType:
+        Value: note-stream
+      LossFunction:
+        Value: smooth
+      Steps:
+        Value: 3000
+      Timesteps:
+        Value: 128
+      ValBatchSize:
+        Value: 8
+      ValSteps:
+        Value: 500
+      FeatureNum:
+        Value: 352
+  TranscriptionMode:
+    Value: Stream

omnizart/music/music_note_stream/saved_model.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b629d1fe06db9bb056d74857376616cdec7d67884bec4ba08aa48235989e7ea1
+size 4448469

omnizart/music/music_note_stream/variables/variables.data-00000-of-00001 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a3f1dce36c255b2e34d69e1faba1e0822723240880b5fdd444613fdb2f29eb15
+size 33495027

omnizart/music/music_note_stream/variables/variables.index ADDED Viewed

Binary file (18.4 kB). View file

omnizart/music/music_piano-v2/configurations.yaml ADDED Viewed

	@@ -0,0 +1,88 @@

+General:
+  CheckpointPath:
+    Value:
+      Piano: checkpoints/music/music_piano
+      Pop: checkpoints/music/music_pop
+      Stream: checkpoints/music/music_note_stream
+  Dataset:
+    Settings:
+      FeatureSavePath:
+        Value: +
+      FeatureType:
+        Value: CFP
+      SavePath:
+        Value: ./
+  Feature:
+    Settings:
+      BinsPerOctave:
+        Value: 48
+      FrequencyCenter:
+        Value: 27.5
+      FrequencyResolution:
+        Value: 2.0
+      Gamma:
+        Value:
+        - 0.24
+        - 0.6
+        - 1.0
+      Harmonic:
+        Value: false
+      HarmonicNumber:
+        Value: 6
+      HopSize:
+        Value: 0.02
+      SamplingRate:
+        Value: 44100
+      TimeCenter:
+        Value: 0.00022287
+      WindowSize:
+        Value: 7939
+  Inference:
+    Settings:
+      DuraTh:
+        Value: 0.5
+      FrameTh:
+        Value: 0.5
+      InstTh:
+        Value: 1.1
+      MinLength:
+        Value: 0.05
+      OnsetTh:
+        Value: 4
+  Model:
+    Settings:
+      ModelType:
+        Value: aspp
+      SavePath:
+        Value: ./checkpoints/music
+      SavePrefix:
+        Value: music
+  Training:
+    Settings:
+      BatchSize:
+        Value: 16
+      Channels:
+        Value:
+        - Spec
+        - Ceps
+        - GCoS
+      EarlyStop:
+        Value: 6
+      Epoch:
+        Value: 30
+      FeatureNum:
+        Value: 352
+      LabelType:
+        Value: note
+      LossFunction:
+        Value: smooth
+      Steps:
+        Value: 1500
+      Timesteps:
+        Value: 256
+      ValBatchSize:
+        Value: 8
+      ValSteps:
+        Value: 150
+  TranscriptionMode:
+    Value: Piano

omnizart/music/music_piano-v2/saved_model.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2244e3929e88ae32ad32ad46541eab7ce6803d34c6c37cb9445b50ffc89e9281
+size 4402822

omnizart/music/music_piano-v2/variables/variables.data-00000-of-00001 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a3316d5fed8054bb7f6aa18021d6787484808894a1eceacea0f9a9f2fe1fb684
+size 84656908

omnizart/music/music_piano-v2/variables/variables.index ADDED Viewed

Binary file (21.1 kB). View file

omnizart/music/music_piano/configurations.yaml ADDED Viewed

	@@ -0,0 +1,166 @@

+# Self-documented configurable settings, with description, type hint, and available
+# options. All the parameters can be overriden by another specified configuration file
+# with selected parameters.
+General:
+    TranscriptionMode:
+        Description: Mode of transcription by executing the `omnizart_fix transribe music` command.
+        Type: String
+        Value: Piano
+    CheckpointPath:
+        Description: Path to the pre-trained models.
+        Type: Map
+        SubType: [String, String]
+        Value:
+            Piano: ./checkpoints/music/music_piano
+            Pop: ./checkpoints/music/music_pop
+            Stream: ./checkpoints/music/music_note_stream
+    Feature:
+        Description: Default settings of feature extraction
+        Settings:
+            HopSize:
+                Description: Hop size in seconds with respect to sampling rate.
+                Type: Float
+                Value: 0.02
+            SamplingRate:
+                Description: Adjust input sampling rate to this value.
+                Type: Integer
+                Value: 44100
+            WindowSize:
+                Type: Integer
+                Value: 7939
+            FrequencyResolution:
+                Type: Float
+                Value: 2.0
+            FrequencyCenter:
+                Description: Lowest frequency to extract.
+                Type: Float
+                Value: 27.5
+            TimeCenter:
+                Description: Highest frequency to extract (1/time_center).
+                Type: Float
+                Value: 0.00022287
+            Gamma:
+                Type: List
+                SubType: Float
+                Value: [0.24, 0.6, 1.0]
+            BinsPerOctave:
+                Description: Number of bins for each octave.
+                Type: Integer
+                Value: 48
+            HarmonicNumber:
+                Description: Number of harmonic bins of HCFP feature.
+                Type: Integer
+                Value: 6
+            Harmonic:
+                Description: Whether to use harmonic version of the input feature for training.
+                Type: Bool
+                Value: False
+    Dataset:
+        Description: Settings of datasets.
+        Settings:
+            SavePath:
+                Description: Path for storing the downloaded datasets.
+                Type: String
+                Value: ./
+            FeatureType:
+                Description: Type of feature to extract.
+                Type: String
+                Value: CFP
+                Choices: ["CFP", "HCFP"]
+            FeatureSavePath:
+                Description: Path for storing the extracted feature. Default to the path under the dataset folder.
+                Type: String
+                Value: +
+    Model:
+        Description: Default settings of training / testing the model.
+        Settings:
+            SavePrefix:
+                Description: Prefix of the trained model's name to be saved.
+                Type: String
+                Value: music
+            SavePath:
+                Description: Path to save the trained model.
+                Type: String
+                Value: ./checkpoints/music
+            ModelType:
+                Description: Default model type to be used for training
+                Type: String
+                Value: attn
+                Choices: ["aspp", "attn"]
+    Inference:
+        Description: Default settings when infering notes.
+        Settings:
+            MinLength:
+                Description: Minimum length of a note in seconds.
+                Type: Float
+                Value: 0.05
+            InstTh:
+                Description: Threshold for filtering instruments.
+                Type: Float
+                Value: 1.1
+            OnsetTh:
+                Description: Threshold of predicted onset channel.
+                Type: Float
+                Value: 4
+            DuraTh:
+                Description: Threshold of predicted duration channel.
+                Type: Float
+                Value: 0.5
+            FrameTh:
+                Description: Threshold of frame-level predictions.
+                Type: Float
+                Value: 0.5
+    Training:
+        Description: Parameters for training
+        Settings:
+            Epoch:
+                Description: Maximum number of epochs for training.
+                Type: Integer
+                Value: 20
+            Steps:
+                Description: Number of training steps for each epoch.
+                Type: Integer
+                Value: 3000
+            ValSteps:
+                Description: Number of validation steps after each training epoch.
+                Type: Integer
+                Value: 500
+            BatchSize:
+                Description: Batch size of each training step.
+                Type: Integer
+                Value: 8
+            ValBatchSize:
+                Description: Batchs size of each validation step.
+                Type: Integer
+                Value: 8
+            EarlyStop:
+                Description: Terminate the training if the validation performance doesn't imrove after n epochs.
+                Type: Integer
+                Value: 6
+            LossFunction:
+                Description: Loss function for computing the objectives.
+                Type: String
+                Value: smooth
+                Choices: ["smooth", "focal", "bce"]
+            LabelType:
+                Description: Determines the training target to be single- or multi-instrument scenario, and more options.
+                Type: String
+                Value: note
+                Choices: ["note-stream", "frame-stream", "note", "frame"]
+            Channels:
+                Description: Use different types of feature for training.
+                Type: List
+                SubType: String
+                Value: ["Spec", "Ceps"]
+                Choices: ["Spec", "GCoS", "Ceps"]
+            Timesteps:
+                Description: Length of time axis of the input feature.
+                Type: Integer
+                Value: 128
+            FeatureNum:
+                Description: The target size of feature dimension.
+                Type: Integer
+                Value: 384

omnizart/music/music_piano/saved_model.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f90191ba7c4dd251db9600119581a7a71daf35266da57097694861b325429e23
+size 4027989

omnizart/music/music_piano/variables/variables.data-00000-of-00001 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bd93da1d0b4e2a59f8c473580380d2388df7ec86d0c8d7803a2863c530d005bf
+size 50407445

omnizart/music/music_piano/variables/variables.index ADDED Viewed

Binary file (19.2 kB). View file

omnizart/music/music_pop/configurations.yaml ADDED Viewed

	@@ -0,0 +1,88 @@

+General:
+  CheckpointPath:
+    Value:
+      Piano: checkpoints/music/music_piano
+      Pop: checkpoints/music/music_pop
+      Stream: checkpoints/music/music_note_stream
+  Dataset:
+    Settings:
+      FeatureSavePath:
+        Value: +
+      FeatureType:
+        Value: CFP
+      SavePath:
+        Value: ./
+  Feature:
+    Settings:
+      BinsPerOctave:
+        Value: 48
+      FrequencyCenter:
+        Value: 27.5
+      FrequencyResolution:
+        Value: 2.0
+      Gamma:
+        Value:
+        - 0.24
+        - 0.6
+        - 1.0
+      Harmonic:
+        Value: false
+      HarmonicNumber:
+        Value: 6
+      HopSize:
+        Value: 0.02
+      SamplingRate:
+        Value: 44100
+      TimeCenter:
+        Value: 0.00022287
+      WindowSize:
+        Value: 7939
+  Inference:
+    Settings:
+      DuraTh:
+        Value: 0.5
+      FrameTh:
+        Value: 0.5
+      InstTh:
+        Value: 1.1
+      MinLength:
+        Value: 0.05
+      OnsetTh:
+        Value: 6.0
+  Model:
+    Settings:
+      ModelType:
+        Value: attn
+      SavePath:
+        Value: ./checkpoints/music
+      SavePrefix:
+        Value: music
+  Training:
+    Settings:
+      BatchSize:
+        Value: 8
+      Channels:
+        Value:
+        - Spec
+        - Ceps
+        - GCoS
+      EarlyStop:
+        Value: 6
+      Epoch:
+        Value: 20
+      FeatureNum:
+        Value: 352
+      LabelType:
+        Value: pop-note-stream
+      LossFunction:
+        Value: smooth
+      Steps:
+        Value: 3000
+      Timesteps:
+        Value: 128
+      ValBatchSize:
+        Value: 8
+      ValSteps:
+        Value: 500
+  TranscriptionMode:
+    Value: Pop

omnizart/music/music_pop/saved_model.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:80374501dc49c07296419ca8850bbf8008d6fd76aa3f447ad91add482a9a42a7
+size 4448052

omnizart/music/music_pop/variables/variables.data-00000-of-00001 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7cf77d6b5e95f4034d824049104cc93830a8a2c40399eb45dc8488ad072aeeb4
+size 33498559

omnizart/music/music_pop/variables/variables.index ADDED Viewed

Binary file (18.4 kB). View file

omnizart/patch_cnn/patch_cnn_melody/configurations.yaml ADDED Viewed

	@@ -0,0 +1,118 @@

+General:
+    TranscriptionMode:
+        Description: Mode of transcription by executing the `omnizart_fix patch-cnn transcribe` command.
+        Type: String
+        Value: Melody
+    CheckpointPath:
+        Description: Path to the pre-trained models.
+        Type: Map
+        SubType: [String, String]
+        Value:
+            Melody: checkpoints/patch_cnn/patch_cnn_melody
+    Feature:
+        Description: Default settings of feature extraction
+        Settings:
+            PatchSize:
+                Description: Input size of feature dimension.
+                Type: Integer
+                Value: 25
+            PeakThreshold:
+                Description: Threshold used to filter out peaks with small value.
+                Type: Float
+                Value: 0.5
+            HopSize:
+                Description: Hop size in seconds with respect to sampling rate.
+                Type: Float
+                Value: 0.02
+            SamplingRate:
+                Description: Adjust input sampling rate to this value.
+                Type: Integer
+                Value: 16000
+            WindowSize:
+                Type: Integer
+                Value: 2049
+            FrequencyResolution:
+                Type: Float
+                Value: 2.0
+            FrequencyCenter:
+                Description: Lowest frequency to extract.
+                Type: Float
+                Value: 80
+            TimeCenter:
+                Description: Highest frequency to extract (1/time_center).
+                Type: Float
+                Value: 0.001
+            Gamma:
+                Type: List
+                SubType: Float
+                Value: [0.24, 0.6, 1.0]
+            BinsPerOctave:
+                Description: Number of bins for each octave.
+                Type: Integer
+                Value: 48
+    Model:
+        Description: Default settings of training / testing the model.
+        Settings:
+            SavePrefix:
+                Description: Prefix of the trained model's name to be saved.
+                Type: String
+                Value: patch_cnn
+            SavePath:
+                Description: Path to save the trained model.
+                Type: String
+                Value: ./checkpoints/patch_cnn
+    Dataset:
+        Description: Settings of datasets.
+        Settings:
+            SavePath:
+                Description: Path for storing the downloaded datasets.
+                Type: String
+                Value: ./
+            FeatureSavePath:
+                Description: Path for storing the extracted feature. Default to the path under the dataset folder.
+                Type: String
+                Value: +
+    Inference:
+        Description: Default settings when infering notes.
+        Settings:
+            Threshold:
+                Description: Threshold of the prediction value.
+                Type: Float
+                Value: 0.5
+            MaxMethod:
+                Description: Method of determine the position of the max prediction value.
+                Type: String
+                Value: posterior
+                Choices: ["posterior", "prior"]
+    Training:
+        Description: Hyper parameters for training
+        Settings:
+            Epoch:
+                Description: Maximum number of epochs for training.
+                Type: Integer
+                Value: 10
+            Steps:
+                Description: Number of training steps for each epoch.
+                Type: Integer
+                Value: 1000
+            ValSteps:
+                Description: Number of validation steps after each training epoch.
+                Type: Integer
+                Value: 500
+            BatchSize:
+                Description: Batch size of each training step.
+                Type: Integer
+                Value: 32
+            ValBatchSize:
+                Description: Batch size of each validation step.
+                Type: Integer
+                Value: 32
+            EarlyStop:
+                Description: Terminate the training if the validation performance doesn't imrove after n epochs.
+                Type: Integer
+                Value: 4
+            InitLearningRate:
+                Descriptoin: Initial learning rate.
+                Type: Float
+                Value: 0.0001

omnizart/patch_cnn/patch_cnn_melody/saved_model.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:78a90299f4b24484dbf4638df16f4bc25af3f2f2b36d37b5197f7de72525f720
+size 155120

omnizart/patch_cnn/patch_cnn_melody/variables/variables.data-00000-of-00001 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bbbd11fc0b60a5f3986c39f7bb3985205d719031f70f916edc560f8b4d01b51a
+size 708098

omnizart/patch_cnn/patch_cnn_melody/variables/variables.index ADDED Viewed

Binary file (760 Bytes). View file

omnizart/vocal/vocal_contour/configurations.yaml ADDED Viewed

	@@ -0,0 +1,42 @@

+General:
+  TranscriptionMode:
+    Value: VocalContour
+  CheckpointPath:
+    Value:
+      VocalContour: checkpoints/vocal/contour
+  Dataset:
+    Settings:
+      FeatureSavePath:
+        Value: +
+      SavePath:
+        Value: ./
+  Feature:
+    Settings:
+      HopSize:
+        Value: 0.02
+      SamplingRate:
+        Value: 16000
+      WindowSize:
+        Value: 2049
+  Model:
+    Settings:
+      SavePath:
+        Value: ./checkpoints/vocal_frame
+      SavePrefix:
+        Value: vocal-frame
+  Training:
+    Settings:
+      BatchSize:
+        Value: 12
+      EarlyStop:
+        Value: 3
+      Epoch:
+        Value: 5
+      Steps:
+        Value: 6000
+      Timesteps:
+        Value: 128
+      ValBatchSize:
+        Value: 12
+      ValSteps:
+        Value: 200

omnizart/vocal/vocal_contour/saved_model.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8c71303199c361155c4cfac2ea9e5aeb9945ad63f5f9f38961aac59ab29c0a5b
+size 4029577

omnizart/vocal/vocal_contour/variables/variables.data-00000-of-00001 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6b5c76f749ab552d7735fdcd472ebf29dd31d41609fe8b1160b9c037d6919339
+size 50401019

omnizart/vocal/vocal_contour/variables/variables.index ADDED Viewed

Binary file (19.2 kB). View file

omnizart/vocal/vocal_semi/configurations.yaml ADDED Viewed

	@@ -0,0 +1,150 @@

+General:
+    TranscriptionMode:
+        Description: Mode of transcription by executing the `omnizart_fix vocal transcribe` command.
+        Type: String
+        Value: Semi
+    CheckpointPath:
+        Description: Path to the pre-trained models.
+        Type: Map
+        SubType: [String, String]
+        Value:
+            Super: checkpoints/vocal/vocal_super
+            Semi: checkpoints/vocal/vocal_semi
+    Feature:
+        Description: Default settings of feature extraction for drum transcription.
+        Settings:
+            HopSize:
+                Description: Hop size in seconds with respect to sampling rate.
+                Type: Float
+                Value: 0.02
+            SamplingRate:
+                Description: Adjust input sampling rate to this value.
+                Type: Integer
+                Value: 16000
+            FrequencyResolution:
+                Type: Float
+                Value: 2.0
+            FrequencyCenter:
+                Description: Lowest frequency to extract.
+                Type: Float
+                Value: 80
+            TimeCenter:
+                Description: Highest frequency to extract (1/time_center).
+                Type: Float
+                Value: 0.001
+            Gamma:
+                Type: List
+                SubType: Float
+                Value: [0.24, 0.6, 1.0]
+            BinsPerOctave:
+                Description: Number of bins for each octave.
+                Type: Integer
+                Value: 48
+    Dataset:
+        Description: Settings of datasets.
+        Settings:
+            SavePath:
+                Description: Path for storing the downloaded datasets.
+                Type: String
+                Value: ./
+            FeatureSavePath:
+                Description: Path for storing the extracted feature. Default to the path under the dataset folder.
+                Type: String
+                Value: +
+    Model:
+        Description: Default settings of training / testing the model.
+        Settings:
+            SavePrefix:
+                Description: Prefix of the trained model's name to be saved.
+                Type: String
+                Value: vocal
+            SavePath:
+                Description: Path to save the trained model.
+                Type: String
+                Value: ./checkpoints/vocal
+            MinKernelSize:
+                Description: Minimum kernel size of convolution layers in each pyramid block.
+                Type: Integer
+                Value: 16
+            Depth:
+                Description: Total number of pyramid blocks will be -> (Depth - 2) / 2 .
+                Type: Integer
+                Value: 110
+            Alpha:
+                Type: Integer
+                Value: 270
+            ShakeDrop:
+                Description: Whether to leverage Shake Drop normalization when back propagation.
+                Type: Bool
+                Value: True
+            SemiLossWeight:
+                Description: Weighting factor of the semi-supervise loss. Supervised loss will not be affected by this parameter.
+                Type: Float
+                Value: 1.0
+            SemiXi:
+                Description: A small constant value for weighting the adverarial perturbation.
+                Type: Float
+                Value: 0.000001
+            SemiEpsilon:
+                Description: Weighting factor of the output adversarial perturbation.
+                Type: Float
+                Value: 8.0
+            SemiIterations:
+                Description: Number of iterations when generating the adversarial perturbation.
+                Type: Integer
+                Value: 2
+    Inference:
+        Description: Default settings when infering notes.
+        Settings:
+            ContextLength:
+                Description: Length of context that will be used to find the peaks.
+                Type: Integer
+                Value: 2
+            Threshold:
+                Description: Threshold that will be applied to clip the predicted values to either 0 or 1.
+                Type: Float
+                Value: 0.5
+            MinDuration:
+                Description: Minimum required length of each note, in seconds.
+                Type: Float
+                Value: 0.1
+            PitchModel:
+                Description: The model for predicting the pitch contour. Default to use vocal-contour modeul. Could be path or mode name.
+                Type: String
+                Value: VocalContour
+    Training:
+        Description: Hyper parameters for training
+        Settings:
+            Epoch:
+                Description: Maximum number of epochs for training.
+                Type: Integer
+                Value: 10
+            Steps:
+                Description: Number of training steps for each epoch.
+                Type: Integer
+                Value: 1000
+            ValSteps:
+                Description: Number of validation steps after each training epoch.
+                Type: Integer
+                Value: 50
+            BatchSize:
+                Description: Batch size of each training step.
+                Type: Integer
+                Value: 64
+            ValBatchSize:
+                Description: Batch size of each validation step.
+                Type: Integer
+                Value: 64
+            EarlyStop:
+                Description: Terminate the training if the validation performance doesn't imrove after n epochs.
+                Type: Integer
+                Value: 8
+            InitLearningRate:
+                Descriptoin: Initial learning rate.
+                Type: Float
+                Value: 0.0001
+            ContextLength:
+                Description: Context to be considered before and after current timestamp.
+                Type: Integer
+                Value: 9

omnizart/vocal/vocal_semi/saved_model.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ec5296106686e602b7fa1fc0462911234c3203c5f553b59b99a0d34671466043
+size 14281573

omnizart/vocal/vocal_semi/variables/variables.data-00000-of-00001 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c60c2e441adbe00f7c8c204a1b923e16bb35efa364ed76320bd70356496a9cc3
+size 114617623

omnizart/vocal/vocal_semi/variables/variables.index ADDED Viewed

Binary file (45.8 kB). View file