update inference script

Browse files

Files changed (3) hide show

infer_indicmos.py +14 -13
sample_manifest/manifest.txt +2 -2
sample_manifest/manifest_lang.txt +1 -1

infer_indicmos.py CHANGED Viewed

@@ -33,6 +33,7 @@ BASE_PREDICTOR = "joint_indicw2v_base.pt"
 CER_PREDICTOR = "joint_indicw2v_base_cer.pt"
 LANG_ID_PREDICTOR = "joint_indicw2v_base_lang.pt"
 CER_LANG_ID_PREDICTOR = "joint_indicw2v_base_cer_lang.pt"
 LANG_ID_MAPPING = {
     "hi": 0,
@@ -209,7 +210,7 @@ class Collate():
         return audio_padded, cers, lengths, langs, filenames
 class PreProcessBatch(torch.utils.data.Dataset):
-    def __init__(self, manifest_path, cer, langid):
         with open(manifest_path, "r") as f:
             data = f.read().split("\n")
         delim = "\t"
@@ -248,7 +249,7 @@ class PreProcessBatch(torch.utils.data.Dataset):
         audio, sr = torchaudio.load(audio_path)
         return audio.squeeze(), cer, langid, key
-def score(audio_path, cer=None, langid=None, use_cer=False, use_langid=False, download_path="hf_inference_models", device="cpu"):
     """
     Single audio mos prediction
     """
@@ -258,11 +259,11 @@ def score(audio_path, cer=None, langid=None, use_cer=False, use_langid=False, do
         score = mos_model(audio, cer_data=cer, lang_data=langid).squeeze().cpu().item()
     return score
-def batch_score(manifest_path, save_path, batch_size=32, cer=None, langid=None, use_cer=False, use_langid=False, download_path="hf_inference_models", device="cpu"):
     """
     batch audio mos prediction
     """
-    dataset = PreProcessBatch(manifest_path, cer, langid)
     loader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, shuffle=False, collate_fn=Collate())
     mos_model = load_model(use_cer, use_langid, download_path, device)
     results = {}
@@ -288,17 +289,17 @@ if __name__ == "__main__":
         raise ValueError("Please provide manifest_path for batch inference")
     cer = None
-    if cer is not None:
-        if cer > 1:
-            print("WARNING: Use raw CER value, not percentage")
     langid = None
     # langid = "kn"
-    if args.audio_path is not None:
         ###FIX THIS
-        score = score(audio_path=args.audio_path, cer=cer, langid=langid, use_cer=args.use_cer, use_langid=args.use_langid)
-        print("predicted MOS", score)
-    else:
-        assert args.save_path is not None, "Please provide a file path for the batch scores to be saved - save_path"
-        batch_score(manifest_path=args.manifest_path, save_path=args.save_path, batch_size=args.batch_size, cer=cer, langid=langid, use_cer=args.use_cer, use_langid=args.use_langid, device=args.device)

 CER_PREDICTOR = "joint_indicw2v_base_cer.pt"
 LANG_ID_PREDICTOR = "joint_indicw2v_base_lang.pt"
 CER_LANG_ID_PREDICTOR = "joint_indicw2v_base_cer_lang.pt"
+HF_PATH = "hf_inference_models"
 LANG_ID_MAPPING = {
     "hi": 0,
         return audio_padded, cers, lengths, langs, filenames
 class PreProcessBatch(torch.utils.data.Dataset):
+    def __init__(self, manifest_path, use_cer, use_langid):
         with open(manifest_path, "r") as f:
             data = f.read().split("\n")
         delim = "\t"
         audio, sr = torchaudio.load(audio_path)
         return audio.squeeze(), cer, langid, key
+def score(audio_path, cer=None, langid=None, use_cer=False, use_langid=False, download_path=HF_PATH, device="cpu"):
     """
     Single audio mos prediction
     """
         score = mos_model(audio, cer_data=cer, lang_data=langid).squeeze().cpu().item()
     return score
+def batch_score(manifest_path, save_path, batch_size=32, use_cer=False, use_langid=False, download_path="hf_inference_models", device="cpu"):
     """
     batch audio mos prediction
     """
+    dataset = PreProcessBatch(manifest_path, use_cer, use_langid)
     loader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, shuffle=False, collate_fn=Collate())
     mos_model = load_model(use_cer, use_langid, download_path, device)
     results = {}
         raise ValueError("Please provide manifest_path for batch inference")
     cer = None
+    # if cer is not None:
+        # if cer > 1:
+            # print("WARNING: Use raw CER value, not percentage")
     langid = None
     # langid = "kn"
+    # if args.audio_path is not None:
         ###FIX THIS
+        # score = score(audio_path=args.audio_path, cer=cer, langid=langid, use_cer=args.use_cer, use_langid=args.use_langid)
+        # print("predicted MOS", score)
+    # else:
+    assert args.save_path is not None, "Please provide a file path for the batch scores to be saved - save_path"
+    batch_score(manifest_path=args.manifest_path, save_path=args.save_path, batch_size=args.batch_size, use_cer=args.use_cer, use_langid=args.use_langid, device=args.device)

sample_manifest/manifest.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-id audio_path langid
 1 ../sample_audio/kn_audio1.wav
 2 ../sample_audio/hi_audio2.wav
-4 ../sample_audio/mr_audio3.wav

+id audio_path
 1 ../sample_audio/kn_audio1.wav
 2 ../sample_audio/hi_audio2.wav
+3 ../sample_audio/mr_audio3.wav

sample_manifest/manifest_lang.txt CHANGED Viewed

@@ -1,4 +1,4 @@
 id audio_path langid
 1 ../sample_audio/kn_audio1.wav kn
 2 ../sample_audio/hi_audio2.wav hi
-4 ../sample_audio/mr_audio3.wav mr

 id audio_path langid
 1 ../sample_audio/kn_audio1.wav kn
 2 ../sample_audio/hi_audio2.wav hi
+3 ../sample_audio/mr_audio3.wav mr