Spaces:
Running
Added TextGrid Interval Support
Browse filesHere's screenshots of the changes:


Here's a sample output:
```
File type = "ooTextFile"
Object class = "TextGrid"
xmin = 0.0
xmax = 26.74608
tiers? <exists>
size = 2
item []:
item [1]:
class = "IntervalTier"
name = "sentence"
xmin = 0.0
xmax = 26.74608
intervals: size = 10
intervals [1]:
xmin = 0.0
xmax = 0.7995000400000001
text = ""
intervals [2]:
xmin = 0.7995000400000001
xmax = 3.4185402
text = "and seemed to work pretty well that way too"
intervals [3]:
xmin = 3.4185402
xmax = 4.7975004
text = ""
intervals [4]:
xmin = 4.7975004
xmax = 12.9231205
text = "also of course we would usually fill the gas tank on sunday if we had been doing any driving through the week"
intervals [5]:
xmin = 12.9231205
xmax = 13.4625
text = ""
intervals [6]:
xmin = 13.4625
xmax = 16.130500340179836
text = "and my first recollections"
intervals [7]:
xmin = 16.130500340179836
xmax = 16.421021
text = ""
intervals [8]:
xmin = 16.421021
xmax = 21.62864
text = "that there were no filling stations at all any anywhere that i can remember"
intervals [9]:
xmin = 21.62864
xmax = 22.84768
text = ""
intervals [10]:
xmin = 22.84768
xmax = 26.74608
text = "so dad when he first got his car"
item [2]:
class = "IntervalTier"
name = "IPATier"
xmin = 0.0
xmax = 26.74608
intervals: size = 10
intervals [1]:
xmin = 0.0
xmax = 0.7995000400000001
text = ""
intervals [2]:
xmin = 0.7995000400000001
xmax = 3.4185402
text = "ʌnsimdwʌkpɹɪɾiwɛlðæʔweɪtu"
intervals [3]:
xmin = 3.4185402
xmax = 4.7975004
text = ""
intervals [4]:
xmin = 4.7975004
xmax = 12.9231205
text = "ɔlsoʊkɔʌwiwʊdjuʒlifɪlʌɡæʃtæŋkɑnsʌndiɪfɪfwihɛdbɪnduɪŋɛnidɹaɪvɪŋθɹuðʌwik"
intervals [5]:
xmin = 12.9231205
xmax = 13.4625
text = ""
intervals [6]:
xmin = 13.4625
xmax = 16.130500340179836
text = "ændmaɪfɹ̩stɹɛkʌlɛkʃʌnz"
intervals [7]:
xmin = 16.130500340179836
xmax = 16.421021
text = ""
intervals [8]:
xmin = 16.421021
xmax = 21.62864
text = "ɑðɛtɛɹwɹ̩ʌnoʊfɪlɪŋsteɪʃʌnzɾɔɛniɛñiweɪðɛɾaɪkɪnɹmɛmbʌ"
intervals [9]:
xmin = 21.62864
xmax = 22.84768
text = ""
intervals [10]:
xmin = 22.84768
xmax = 26.74608
text = "soʊdɛdwɛɾ̃ifɹ̩stɡɑtɪzkʌ"
```
The files I used to test it out:
<audio controls src="https://cdn-uploads.huggingface.co/production/uploads/68304f0b3842b6d373054246/MaEGwMyzK8JNI0BH8eBE_.wav"></audio>
```
File type = "ooTextFile"
Object class = "TextGrid"
xmin = 0
xmax = 26.74608
tiers? <exists>
size = 1
item []:
item [1]:
class = "IntervalTier"
name = "sentence"
xmin = 0
xmax = 26.74608
intervals: size = 10
intervals [1]:
xmin = 0
xmax = 0.7995000400000001
text = ""
intervals [2]:
xmin = 0.7995000400000001
xmax = 3.4185402
text = "and seemed to work pretty well that way too"
intervals [3]:
xmin = 3.4185402
xmax = 4.7975004
text = ""
intervals [4]:
xmin = 4.7975004
xmax = 12.9231205
text = "also of course we would usually fill the gas tank on sunday if we had been doing any driving through the week"
intervals [5]:
xmin = 12.9231205
xmax = 13.4625
text = ""
intervals [6]:
xmin = 13.4625
xmax = 16.130500340179836
text = "and my first recollections"
intervals [7]:
xmin = 16.130500340179836
xmax = 16.421021
text = ""
intervals [8]:
xmin = 16.421021
xmax = 21.62864
text = "that there were no filling stations at all any anywhere that i can remember"
intervals [9]:
xmin = 21.62864
xmax = 22.84768
text = ""
intervals [10]:
xmin = 22.84768
xmax = 26.74608
text = "so dad when he first got his car"
```
|
@@ -1,10 +1,12 @@
|
|
| 1 |
from pathlib import Path
|
| 2 |
import tempfile
|
|
|
|
| 3 |
|
| 4 |
import gradio as gr
|
| 5 |
import librosa
|
| 6 |
import tgt.core
|
| 7 |
import tgt.io3
|
|
|
|
| 8 |
from transformers import pipeline
|
| 9 |
|
| 10 |
TEXTGRID_DIR = tempfile.mkdtemp()
|
|
@@ -105,6 +107,45 @@ def get_interactive_download_button(textgrid_contents, textgrid_filename):
|
|
| 105 |
)
|
| 106 |
|
| 107 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
def launch_demo():
|
| 109 |
initial_model = {
|
| 110 |
"loaded_model": pipeline(
|
|
@@ -125,9 +166,15 @@ def launch_demo():
|
|
| 125 |
info="Select the model to use for prediction.",
|
| 126 |
)
|
| 127 |
audio_in = gr.Audio(type="filepath", show_download_button=True)
|
|
|
|
| 128 |
model_state = gr.State(value=initial_model)
|
| 129 |
|
| 130 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
|
| 132 |
gr.Markdown("""## TextGrid File Options
|
| 133 |
Change these inputs if you'd like to customize and download the transcription in [TextGrid format](https://www.fon.hum.uva.nl/praat/manual/TextGrid_file_formats.html) for Praat.
|
|
@@ -152,6 +199,14 @@ def launch_demo():
|
|
| 152 |
variant="primary",
|
| 153 |
)
|
| 154 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
# Update prediction if model or audio changes
|
| 156 |
gr.on(
|
| 157 |
triggers=[audio_in.input, model_name.change],
|
|
@@ -160,6 +215,13 @@ def launch_demo():
|
|
| 160 |
outputs=[prediction, model_state, textgrid_filename],
|
| 161 |
)
|
| 162 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
# Download button becomes interactive if user updates audio or textgrid params
|
| 164 |
gr.on(
|
| 165 |
triggers=[textgrid_contents.change, textgrid_filename.change],
|
|
@@ -168,6 +230,30 @@ def launch_demo():
|
|
| 168 |
outputs=[download_btn],
|
| 169 |
)
|
| 170 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
demo.launch(max_file_size="100mb")
|
| 172 |
|
| 173 |
|
|
|
|
| 1 |
from pathlib import Path
|
| 2 |
import tempfile
|
| 3 |
+
import os
|
| 4 |
|
| 5 |
import gradio as gr
|
| 6 |
import librosa
|
| 7 |
import tgt.core
|
| 8 |
import tgt.io3
|
| 9 |
+
import soundfile as sf
|
| 10 |
from transformers import pipeline
|
| 11 |
|
| 12 |
TEXTGRID_DIR = tempfile.mkdtemp()
|
|
|
|
| 107 |
)
|
| 108 |
|
| 109 |
|
| 110 |
+
def transcribe_intervals(audio_in, textgrid_path, source_tier, target_tier, model_state):
|
| 111 |
+
if audio_in is None or textgrid_path is None:
|
| 112 |
+
return "Missing audio or TextGrid input file."
|
| 113 |
+
|
| 114 |
+
tg=tgt.io.read_textgrid(textgrid_path.name)
|
| 115 |
+
tier = tg.get_tier_by_name(source_tier)
|
| 116 |
+
ipa_tier = tgt.core.IntervalTier(name=target_tier)
|
| 117 |
+
|
| 118 |
+
for interval in tier.intervals:
|
| 119 |
+
if not interval.text.strip(): # Skip empty text intervals
|
| 120 |
+
continue
|
| 121 |
+
|
| 122 |
+
start, end = interval.start_time, interval.end_time
|
| 123 |
+
try:
|
| 124 |
+
y, sr = librosa.load(audio_in, sr=None, offset=start, duration=end-start)
|
| 125 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
|
| 126 |
+
sf.write(temp_audio.name, y, sr)
|
| 127 |
+
prediction = model_state["loaded_model"](temp_audio.name)["text"]
|
| 128 |
+
ipa_tier.add_annotation(tgt.core.Interval(start, end, prediction))
|
| 129 |
+
os.remove(temp_audio.name)
|
| 130 |
+
except Exception as e:
|
| 131 |
+
ipa_tier.add_annotation(tgt.core.Interval(start, end, f"[Error]: {str(e)}"))
|
| 132 |
+
|
| 133 |
+
tg.add_tier(ipa_tier)
|
| 134 |
+
tgt_str = tgt.io3.export_to_long_textgrid(tg)
|
| 135 |
+
|
| 136 |
+
return tgt_str, tgt_str
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
def extract_tier_names(textgrid_file):
|
| 141 |
+
try:
|
| 142 |
+
tg = tgt.io.read_textgrid(textgrid_file.name)
|
| 143 |
+
tier_names = [tier.name for tier in tg.tiers]
|
| 144 |
+
return gr.update(choices=tier_names, value=tier_names[0] if tier_names else None)
|
| 145 |
+
except Exception as e:
|
| 146 |
+
return gr.update(choices=[], value=None)
|
| 147 |
+
|
| 148 |
+
|
| 149 |
def launch_demo():
|
| 150 |
initial_model = {
|
| 151 |
"loaded_model": pipeline(
|
|
|
|
| 166 |
info="Select the model to use for prediction.",
|
| 167 |
)
|
| 168 |
audio_in = gr.Audio(type="filepath", show_download_button=True)
|
| 169 |
+
textgrid_file = gr.File(file_types=[".TextGrid"], label="Upload TextGrid File")
|
| 170 |
model_state = gr.State(value=initial_model)
|
| 171 |
|
| 172 |
+
tier_names = gr.Dropdown(label="Source Tier (existing)", choices=[], interactive=True)
|
| 173 |
+
target_tier = gr.Textbox(label="Target Tier (new)", value="IPATier", placeholder="e.g. IPATier")
|
| 174 |
+
|
| 175 |
+
run_btn = gr.Button("Transcribe Intervals")
|
| 176 |
+
|
| 177 |
+
prediction = gr.Textbox(label="Full Audio IPA transcription")
|
| 178 |
|
| 179 |
gr.Markdown("""## TextGrid File Options
|
| 180 |
Change these inputs if you'd like to customize and download the transcription in [TextGrid format](https://www.fon.hum.uva.nl/praat/manual/TextGrid_file_formats.html) for Praat.
|
|
|
|
| 199 |
variant="primary",
|
| 200 |
)
|
| 201 |
|
| 202 |
+
transcription_result = gr.Textbox(visible=False)
|
| 203 |
+
textgrid_preview = gr.Textbox(
|
| 204 |
+
label="Updated Interval Wise TextGrid Preview",
|
| 205 |
+
lines=20,
|
| 206 |
+
interactive=False,
|
| 207 |
+
show_copy_button=True
|
| 208 |
+
)
|
| 209 |
+
|
| 210 |
# Update prediction if model or audio changes
|
| 211 |
gr.on(
|
| 212 |
triggers=[audio_in.input, model_name.change],
|
|
|
|
| 215 |
outputs=[prediction, model_state, textgrid_filename],
|
| 216 |
)
|
| 217 |
|
| 218 |
+
gr.on(
|
| 219 |
+
triggers=[audio_in.input, textgrid_tier.input, prediction.change],
|
| 220 |
+
fn=get_textgrid_contents,
|
| 221 |
+
inputs=[audio_in, textgrid_tier, prediction],
|
| 222 |
+
outputs=[textgrid_contents],
|
| 223 |
+
)
|
| 224 |
+
|
| 225 |
# Download button becomes interactive if user updates audio or textgrid params
|
| 226 |
gr.on(
|
| 227 |
triggers=[textgrid_contents.change, textgrid_filename.change],
|
|
|
|
| 230 |
outputs=[download_btn],
|
| 231 |
)
|
| 232 |
|
| 233 |
+
textgrid_file.change(
|
| 234 |
+
fn=extract_tier_names,
|
| 235 |
+
inputs=[textgrid_file],
|
| 236 |
+
outputs=[tier_names],
|
| 237 |
+
)
|
| 238 |
+
|
| 239 |
+
run_btn.click(
|
| 240 |
+
fn=transcribe_intervals,
|
| 241 |
+
inputs=[audio_in, textgrid_file, tier_names, target_tier, model_state],
|
| 242 |
+
outputs=[transcription_result, textgrid_preview]
|
| 243 |
+
)
|
| 244 |
+
|
| 245 |
+
transcription_result.change(
|
| 246 |
+
fn=lambda tg_text: tg_text,
|
| 247 |
+
inputs=transcription_result,
|
| 248 |
+
outputs=textgrid_contents
|
| 249 |
+
)
|
| 250 |
+
|
| 251 |
+
transcription_result.change(
|
| 252 |
+
fn=lambda tg_text, filename: write_textgrid(tg_text, filename),
|
| 253 |
+
inputs=[transcription_result, textgrid_filename],
|
| 254 |
+
outputs=download_btn
|
| 255 |
+
)
|
| 256 |
+
|
| 257 |
demo.launch(max_file_size="100mb")
|
| 258 |
|
| 259 |
|