Spaces:

mms-meta
/

mms-zeroshot

Running

vineelpratap commited on Jul 4, 2024

Commit

4cba436

verified ·

1 Parent(s): cbf5820

Update zeroshot.py

Files changed (1) hide show

zeroshot.py CHANGED Viewed

@@ -133,12 +133,16 @@ def process(
     # print("after uroman")
     # print("len lexicon", len(lexicon))
     with tempfile.NamedTemporaryFile() as lexicon_file:
         with open(lexicon_file.name, "w") as f:
             idx = 10
             for word, spelling in lexicon.items():
                 f.write(word + " " + spelling + "\n")
-                if idx % 100 == 0:
                     print(word, spelling, flush=True)
                 idx += 1
@@ -150,11 +154,13 @@ def process(
             )
         if lmscore_usedefault:
             lmscore = LM_SCORE_DEFAULT if lm_path is not None else 0
         beam_search_decoder = ctc_decoder(
             lexicon=lexicon_file.name,
             tokens=token_file,
-            lm=None,
             nbest=1,
             beam_size=500,
             beam_size_token=50,

     # print("after uroman")
     # print("len lexicon", len(lexicon))
     with tempfile.NamedTemporaryFile() as lexicon_file:
+        print("lm_path before", lm_path)
+        if lm_path is not None and not lm_path.strip():
+            lm_path = None
+        print("lm_path after", lm_path)
         with open(lexicon_file.name, "w") as f:
             idx = 10
             for word, spelling in lexicon.items():
                 f.write(word + " " + spelling + "\n")
+                if idx % 250 == 0:
                     print(word, spelling, flush=True)
                 idx += 1
             )
         if lmscore_usedefault:
             lmscore = LM_SCORE_DEFAULT if lm_path is not None else 0
+        print("using word score", wscore)
+        print("using lm score", lmscore)
         beam_search_decoder = ctc_decoder(
             lexicon=lexicon_file.name,
             tokens=token_file,
+            lm=lm_path,
             nbest=1,
             beam_size=500,
             beam_size_token=50,