Ellie5757575757 commited on
Commit
4393971
·
verified ·
1 Parent(s): 08677ee

Update to_cha.py

Browse files
Files changed (1) hide show
  1. to_cha.py +19 -6
to_cha.py CHANGED
@@ -1,6 +1,19 @@
1
- import batchalign as ba
2
- nlp = ba.BatchalignPipeline.new("asr,morphosyntax", lang="eng")
3
- doc = ba.Document.new(media_path="/workspace/SH001/videos/ACWT07a.wav", lang="eng")
4
- doc = nlp(doc)
5
- chat = ba.CHATFile(doc=doc)
6
- chat.write("/workspace/SH001/vid_output/output.cha", write_wor=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, tempfile, uuid
2
+ import batchalign as ba
3
+
4
+ def to_cha_from_wav(wav_path: str, lang: str = "eng") -> str:
5
+ """Run Batchalign → CHAT and return path to .cha"""
6
+ nlp = ba.BatchalignPipeline.new("asr,morphosyntax", lang=lang)
7
+ doc = ba.Document.new(media_path=wav_path, lang=lang)
8
+ doc = nlp(doc)
9
+
10
+ chat = ba.CHATFile(doc=doc)
11
+
12
+ out_dir = tempfile.mkdtemp(prefix="cha_")
13
+ out_path = os.path.join(out_dir, f"{uuid.uuid4().hex}.cha")
14
+ chat.write(out_path, write_wor=True) # keep your preferred flags
15
+ return out_path
16
+
17
+ if __name__ == "__main__":
18
+ import sys
19
+ print(to_cha_from_wav(sys.argv[1]))