Spaces:
Sleeping
Sleeping
suyuan
#2
by ylmmhf - opened
app.py
CHANGED
|
@@ -12,7 +12,7 @@ import re
|
|
| 12 |
def create_interval_data_dict(xmin, xmax, sentence):
|
| 13 |
return {'xmin': float(xmin), 'xmax': float(xmax), 'text': sentence}
|
| 14 |
|
| 15 |
-
def write_textgrid_file(intervals, output_file_path, total_xmax
|
| 16 |
with open(output_file_path, 'w') as f:
|
| 17 |
f.write('File type = "ooTextFile"\n')
|
| 18 |
f.write('Object class = "TextGrid"\n\n')
|
|
@@ -23,16 +23,22 @@ def write_textgrid_file(intervals, output_file_path, total_xmax, tier_name):
|
|
| 23 |
f.write('item []:\n')
|
| 24 |
f.write(' item [1]:\n')
|
| 25 |
f.write(' class = "IntervalTier"\n')
|
| 26 |
-
f.write(
|
| 27 |
f.write(' xmin = 0\n')
|
| 28 |
f.write(f' xmax = {str(float(total_xmax))}\n')
|
| 29 |
-
f.write(f' intervals: size = {len(intervals)}\n')
|
| 30 |
|
| 31 |
for idx, interval in enumerate(intervals):
|
| 32 |
f.write(f' intervals [{idx + 1}]:\n')
|
| 33 |
f.write(f' xmin = {interval["xmin"]}\n')
|
| 34 |
f.write(f' xmax = {interval["xmax"]}\n')
|
| 35 |
f.write(f' text = "{interval["text"]}"\n')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
def validate_csv_format(header):
|
| 38 |
expected_headers = ['', 'file_name', 'xmin', 'xmax', 'text', 'is_unit_start_pred']
|
|
@@ -63,7 +69,7 @@ def validate_row(row):
|
|
| 63 |
return False, f"Value error: {e}"
|
| 64 |
|
| 65 |
# ==== Gradio Interface Function ====
|
| 66 |
-
def csv_to_textgrid(file
|
| 67 |
try:
|
| 68 |
# Create temporary directory
|
| 69 |
temp_dir = tempfile.mkdtemp()
|
|
@@ -142,13 +148,9 @@ def csv_to_textgrid(file, tier_name="generated_tier"):
|
|
| 142 |
if words:
|
| 143 |
intervals.append(create_interval_data_dict(iu_xmin, iu_xmax, ' '.join(words)))
|
| 144 |
|
| 145 |
-
if intervals:
|
| 146 |
-
last_xmax = intervals[-1]['xmax']
|
| 147 |
-
intervals.append(create_interval_data_dict(last_xmax, last_xmax + 0.001, '')) # New interval
|
| 148 |
-
|
| 149 |
if intervals:
|
| 150 |
textgrid_path = os.path.join(output_directory, f"{prev_filename}.TextGrid")
|
| 151 |
-
write_textgrid_file(intervals, textgrid_path, intervals[-1]['xmax']
|
| 152 |
processed_files.append(prev_filename)
|
| 153 |
print(f"Wrote file: {prev_filename}.TextGrid with {len(intervals)} intervals")
|
| 154 |
|
|
@@ -177,20 +179,10 @@ def csv_to_textgrid(file, tier_name="generated_tier"):
|
|
| 177 |
if not current_file_processed and prev_filename:
|
| 178 |
if words:
|
| 179 |
intervals.append(create_interval_data_dict(iu_xmin, iu_xmax, ' '.join(words)))
|
| 180 |
-
|
| 181 |
-
# Add the new interval with xmin as last xmax and xmax as last xmax + 0.001
|
| 182 |
-
if intervals:
|
| 183 |
-
last_xmax = intervals[-1]['xmax']
|
| 184 |
-
new_xmin = last_xmax
|
| 185 |
-
new_xmax = last_xmax + 0.001
|
| 186 |
-
|
| 187 |
-
# Only add the new interval if it's not a duplicate
|
| 188 |
-
if new_xmin < new_xmax: # Ensure they are not the same
|
| 189 |
-
intervals.append(create_interval_data_dict(new_xmin, new_xmax, '')) # New interval
|
| 190 |
-
|
| 191 |
if intervals:
|
| 192 |
textgrid_path = os.path.join(output_directory, f"{prev_filename}.TextGrid")
|
| 193 |
-
write_textgrid_file(intervals, textgrid_path, intervals[-1]['xmax']
|
| 194 |
processed_files.append(prev_filename)
|
| 195 |
print(f"Wrote last file: {prev_filename}.TextGrid with {len(intervals)} intervals")
|
| 196 |
|
|
@@ -218,40 +210,29 @@ def csv_to_textgrid(file, tier_name="generated_tier"):
|
|
| 218 |
return None, f"Error: {str(e)}"
|
| 219 |
|
| 220 |
# ==== Gradio Interface Setup ====
|
| 221 |
-
csv_format_instruction = """
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
-
|
| 227 |
-
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
**Example CSV:**
|
| 240 |
-
|
| 241 |
-
| | file_name | xmin | xmax | text | is_unit_start_pred |
|
| 242 |
-
|---|------------|--------|--------|-------|--------------------|
|
| 243 |
-
| 0 | example1 | 20.42 | 20.74 | mhmm | TRUE |
|
| 244 |
-
| 1 | example1 | 20.74 | 20.81 | hello | TRUE |
|
| 245 |
-
| 2 | example1 | 20.81 | 20.92 | world | FALSE |
|
| 246 |
|
| 247 |
"""
|
| 248 |
|
| 249 |
iface = gr.Interface(
|
| 250 |
fn=csv_to_textgrid,
|
| 251 |
-
inputs=[
|
| 252 |
-
gr.File(label="π Upload CSV File", file_types=[".csv"]),
|
| 253 |
-
gr.Textbox(label="π Enter Tier Name", placeholder="Enter the name of the tier") # New input for tier name
|
| 254 |
-
],
|
| 255 |
outputs=[
|
| 256 |
gr.File(label="π¦ Download TextGrid ZIP"),
|
| 257 |
gr.Textbox(label="β
Status")
|
|
|
|
| 12 |
def create_interval_data_dict(xmin, xmax, sentence):
|
| 13 |
return {'xmin': float(xmin), 'xmax': float(xmax), 'text': sentence}
|
| 14 |
|
| 15 |
+
def write_textgrid_file(intervals, output_file_path, total_xmax):
|
| 16 |
with open(output_file_path, 'w') as f:
|
| 17 |
f.write('File type = "ooTextFile"\n')
|
| 18 |
f.write('Object class = "TextGrid"\n\n')
|
|
|
|
| 23 |
f.write('item []:\n')
|
| 24 |
f.write(' item [1]:\n')
|
| 25 |
f.write(' class = "IntervalTier"\n')
|
| 26 |
+
f.write(' name = "Intonational Unit"\n')
|
| 27 |
f.write(' xmin = 0\n')
|
| 28 |
f.write(f' xmax = {str(float(total_xmax))}\n')
|
| 29 |
+
f.write(f' intervals: size = {len(intervals) + 1}\n')
|
| 30 |
|
| 31 |
for idx, interval in enumerate(intervals):
|
| 32 |
f.write(f' intervals [{idx + 1}]:\n')
|
| 33 |
f.write(f' xmin = {interval["xmin"]}\n')
|
| 34 |
f.write(f' xmax = {interval["xmax"]}\n')
|
| 35 |
f.write(f' text = "{interval["text"]}"\n')
|
| 36 |
+
|
| 37 |
+
if len(intervals) > 0:
|
| 38 |
+
f.write(f' intervals [{len(intervals) + 1}]:\n')
|
| 39 |
+
f.write(f' xmin = {intervals[-1]["xmax"]}\n')
|
| 40 |
+
f.write(f' xmax = {intervals[-1]["xmax"]}\n')
|
| 41 |
+
f.write(f' text = ""\n')
|
| 42 |
|
| 43 |
def validate_csv_format(header):
|
| 44 |
expected_headers = ['', 'file_name', 'xmin', 'xmax', 'text', 'is_unit_start_pred']
|
|
|
|
| 69 |
return False, f"Value error: {e}"
|
| 70 |
|
| 71 |
# ==== Gradio Interface Function ====
|
| 72 |
+
def csv_to_textgrid(file):
|
| 73 |
try:
|
| 74 |
# Create temporary directory
|
| 75 |
temp_dir = tempfile.mkdtemp()
|
|
|
|
| 148 |
if words:
|
| 149 |
intervals.append(create_interval_data_dict(iu_xmin, iu_xmax, ' '.join(words)))
|
| 150 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
if intervals:
|
| 152 |
textgrid_path = os.path.join(output_directory, f"{prev_filename}.TextGrid")
|
| 153 |
+
write_textgrid_file(intervals, textgrid_path, intervals[-1]['xmax'])
|
| 154 |
processed_files.append(prev_filename)
|
| 155 |
print(f"Wrote file: {prev_filename}.TextGrid with {len(intervals)} intervals")
|
| 156 |
|
|
|
|
| 179 |
if not current_file_processed and prev_filename:
|
| 180 |
if words:
|
| 181 |
intervals.append(create_interval_data_dict(iu_xmin, iu_xmax, ' '.join(words)))
|
| 182 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
if intervals:
|
| 184 |
textgrid_path = os.path.join(output_directory, f"{prev_filename}.TextGrid")
|
| 185 |
+
write_textgrid_file(intervals, textgrid_path, intervals[-1]['xmax'])
|
| 186 |
processed_files.append(prev_filename)
|
| 187 |
print(f"Wrote last file: {prev_filename}.TextGrid with {len(intervals)} intervals")
|
| 188 |
|
|
|
|
| 210 |
return None, f"Error: {str(e)}"
|
| 211 |
|
| 212 |
# ==== Gradio Interface Setup ====
|
| 213 |
+
csv_format_instruction = """**Expected CSV format:**
|
| 214 |
+
The first row is the header. Each subsequent row should contain:\n
|
| 215 |
+
`file_name, xmin, xmax, text, is_unit_start_pred`
|
| 216 |
+
|
| 217 |
+
Each row represents a word or segment in an audio file.
|
| 218 |
+
- `file_name`: Identifier for the audio file (used to group intervals).
|
| 219 |
+
- `xmin`: Start time of the segment (in seconds).
|
| 220 |
+
- `xmax`: End time of the segment (in seconds).
|
| 221 |
+
- `text`: The actual spoken word or phrase.
|
| 222 |
+
- `is_unit_start_pred`: Marks the beginning of a new unit (TRUE/FALSE).
|
| 223 |
+
|
| 224 |
+
Example:\n
|
| 225 |
+
| file_name | xmin | xmax | text | is_unit_start_pred |
|
| 226 |
+
|-----------|--------|--------|-------|---------------------|
|
| 227 |
+
| example1 | 20.42 | 20.74 | mhmm | TRUE |
|
| 228 |
+
| example1 | 20.74 | 20.81 | hello | TRUE |
|
| 229 |
+
| example1 | 20.81 | 20.92 | world | FALSE |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
|
| 231 |
"""
|
| 232 |
|
| 233 |
iface = gr.Interface(
|
| 234 |
fn=csv_to_textgrid,
|
| 235 |
+
inputs=gr.File(label="π Upload CSV File", file_types=[".csv"]),
|
|
|
|
|
|
|
|
|
|
| 236 |
outputs=[
|
| 237 |
gr.File(label="π¦ Download TextGrid ZIP"),
|
| 238 |
gr.Textbox(label="β
Status")
|