Spaces:
Sleeping
Sleeping
Update to_cha.py
Browse files
to_cha.py
CHANGED
|
@@ -1,6 +1,19 @@
|
|
| 1 |
-
import
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os, tempfile, uuid
|
| 2 |
+
import batchalign as ba
|
| 3 |
+
|
| 4 |
+
def to_cha_from_wav(wav_path: str, lang: str = "eng") -> str:
|
| 5 |
+
"""Run Batchalign → CHAT and return path to .cha"""
|
| 6 |
+
nlp = ba.BatchalignPipeline.new("asr,morphosyntax", lang=lang)
|
| 7 |
+
doc = ba.Document.new(media_path=wav_path, lang=lang)
|
| 8 |
+
doc = nlp(doc)
|
| 9 |
+
|
| 10 |
+
chat = ba.CHATFile(doc=doc)
|
| 11 |
+
|
| 12 |
+
out_dir = tempfile.mkdtemp(prefix="cha_")
|
| 13 |
+
out_path = os.path.join(out_dir, f"{uuid.uuid4().hex}.cha")
|
| 14 |
+
chat.write(out_path, write_wor=True) # keep your preferred flags
|
| 15 |
+
return out_path
|
| 16 |
+
|
| 17 |
+
if __name__ == "__main__":
|
| 18 |
+
import sys
|
| 19 |
+
print(to_cha_from_wav(sys.argv[1]))
|