Files changed (1) hide show
  1. app.py +31 -50
app.py CHANGED
@@ -12,7 +12,7 @@ import re
12
  def create_interval_data_dict(xmin, xmax, sentence):
13
  return {'xmin': float(xmin), 'xmax': float(xmax), 'text': sentence}
14
 
15
- def write_textgrid_file(intervals, output_file_path, total_xmax, tier_name):
16
  with open(output_file_path, 'w') as f:
17
  f.write('File type = "ooTextFile"\n')
18
  f.write('Object class = "TextGrid"\n\n')
@@ -23,16 +23,22 @@ def write_textgrid_file(intervals, output_file_path, total_xmax, tier_name):
23
  f.write('item []:\n')
24
  f.write(' item [1]:\n')
25
  f.write(' class = "IntervalTier"\n')
26
- f.write(f' name = "{tier_name}"\n')
27
  f.write(' xmin = 0\n')
28
  f.write(f' xmax = {str(float(total_xmax))}\n')
29
- f.write(f' intervals: size = {len(intervals)}\n')
30
 
31
  for idx, interval in enumerate(intervals):
32
  f.write(f' intervals [{idx + 1}]:\n')
33
  f.write(f' xmin = {interval["xmin"]}\n')
34
  f.write(f' xmax = {interval["xmax"]}\n')
35
  f.write(f' text = "{interval["text"]}"\n')
 
 
 
 
 
 
36
 
37
  def validate_csv_format(header):
38
  expected_headers = ['', 'file_name', 'xmin', 'xmax', 'text', 'is_unit_start_pred']
@@ -63,7 +69,7 @@ def validate_row(row):
63
  return False, f"Value error: {e}"
64
 
65
  # ==== Gradio Interface Function ====
66
- def csv_to_textgrid(file, tier_name="generated_tier"):
67
  try:
68
  # Create temporary directory
69
  temp_dir = tempfile.mkdtemp()
@@ -142,13 +148,9 @@ def csv_to_textgrid(file, tier_name="generated_tier"):
142
  if words:
143
  intervals.append(create_interval_data_dict(iu_xmin, iu_xmax, ' '.join(words)))
144
 
145
- if intervals:
146
- last_xmax = intervals[-1]['xmax']
147
- intervals.append(create_interval_data_dict(last_xmax, last_xmax + 0.001, '')) # New interval
148
-
149
  if intervals:
150
  textgrid_path = os.path.join(output_directory, f"{prev_filename}.TextGrid")
151
- write_textgrid_file(intervals, textgrid_path, intervals[-1]['xmax'], tier_name)
152
  processed_files.append(prev_filename)
153
  print(f"Wrote file: {prev_filename}.TextGrid with {len(intervals)} intervals")
154
 
@@ -177,20 +179,10 @@ def csv_to_textgrid(file, tier_name="generated_tier"):
177
  if not current_file_processed and prev_filename:
178
  if words:
179
  intervals.append(create_interval_data_dict(iu_xmin, iu_xmax, ' '.join(words)))
180
-
181
- # Add the new interval with xmin as last xmax and xmax as last xmax + 0.001
182
- if intervals:
183
- last_xmax = intervals[-1]['xmax']
184
- new_xmin = last_xmax
185
- new_xmax = last_xmax + 0.001
186
-
187
- # Only add the new interval if it's not a duplicate
188
- if new_xmin < new_xmax: # Ensure they are not the same
189
- intervals.append(create_interval_data_dict(new_xmin, new_xmax, '')) # New interval
190
-
191
  if intervals:
192
  textgrid_path = os.path.join(output_directory, f"{prev_filename}.TextGrid")
193
- write_textgrid_file(intervals, textgrid_path, intervals[-1]['xmax'], tier_name)
194
  processed_files.append(prev_filename)
195
  print(f"Wrote last file: {prev_filename}.TextGrid with {len(intervals)} intervals")
196
 
@@ -218,40 +210,29 @@ def csv_to_textgrid(file, tier_name="generated_tier"):
218
  return None, f"Error: {str(e)}"
219
 
220
  # ==== Gradio Interface Setup ====
221
- csv_format_instruction = """
222
- **Expected CSV Format:**
223
-
224
- Please ensure that the CSV file adheres to the following format:\n
225
-
226
- - The first row must contain headers: `, file_name, xmin, xmax, text, is_unit_start_pred`.
227
- - Each subsequent row should contain the following columns for every word or segment in the audio file:
228
- - `file_name`: Identifier for the audio file, used to group intervals.
229
- - `xmin`: Start time of the segment (in seconds).
230
- - `xmax`: End time of the segment (in seconds).
231
- - `text`: The actual spoken word or phrase.
232
- - `is_unit_start_pred`: Marks the beginning of a new unit (TRUE/FALSE).
233
-
234
- **Please note: We currently only accept CSVs with an index.**
235
-
236
- **Tier Name:**
237
- Please enter the tier name according to your preference or as deemed appropriate for the data.
238
-
239
- **Example CSV:**
240
-
241
- | | file_name | xmin | xmax | text | is_unit_start_pred |
242
- |---|------------|--------|--------|-------|--------------------|
243
- | 0 | example1 | 20.42 | 20.74 | mhmm | TRUE |
244
- | 1 | example1 | 20.74 | 20.81 | hello | TRUE |
245
- | 2 | example1 | 20.81 | 20.92 | world | FALSE |
246
 
247
  """
248
 
249
  iface = gr.Interface(
250
  fn=csv_to_textgrid,
251
- inputs=[
252
- gr.File(label="πŸ“ Upload CSV File", file_types=[".csv"]),
253
- gr.Textbox(label="πŸ“ Enter Tier Name", placeholder="Enter the name of the tier") # New input for tier name
254
- ],
255
  outputs=[
256
  gr.File(label="πŸ“¦ Download TextGrid ZIP"),
257
  gr.Textbox(label="βœ… Status")
 
12
  def create_interval_data_dict(xmin, xmax, sentence):
13
  return {'xmin': float(xmin), 'xmax': float(xmax), 'text': sentence}
14
 
15
+ def write_textgrid_file(intervals, output_file_path, total_xmax):
16
  with open(output_file_path, 'w') as f:
17
  f.write('File type = "ooTextFile"\n')
18
  f.write('Object class = "TextGrid"\n\n')
 
23
  f.write('item []:\n')
24
  f.write(' item [1]:\n')
25
  f.write(' class = "IntervalTier"\n')
26
+ f.write(' name = "Intonational Unit"\n')
27
  f.write(' xmin = 0\n')
28
  f.write(f' xmax = {str(float(total_xmax))}\n')
29
+ f.write(f' intervals: size = {len(intervals) + 1}\n')
30
 
31
  for idx, interval in enumerate(intervals):
32
  f.write(f' intervals [{idx + 1}]:\n')
33
  f.write(f' xmin = {interval["xmin"]}\n')
34
  f.write(f' xmax = {interval["xmax"]}\n')
35
  f.write(f' text = "{interval["text"]}"\n')
36
+
37
+ if len(intervals) > 0:
38
+ f.write(f' intervals [{len(intervals) + 1}]:\n')
39
+ f.write(f' xmin = {intervals[-1]["xmax"]}\n')
40
+ f.write(f' xmax = {intervals[-1]["xmax"]}\n')
41
+ f.write(f' text = ""\n')
42
 
43
  def validate_csv_format(header):
44
  expected_headers = ['', 'file_name', 'xmin', 'xmax', 'text', 'is_unit_start_pred']
 
69
  return False, f"Value error: {e}"
70
 
71
  # ==== Gradio Interface Function ====
72
+ def csv_to_textgrid(file):
73
  try:
74
  # Create temporary directory
75
  temp_dir = tempfile.mkdtemp()
 
148
  if words:
149
  intervals.append(create_interval_data_dict(iu_xmin, iu_xmax, ' '.join(words)))
150
 
 
 
 
 
151
  if intervals:
152
  textgrid_path = os.path.join(output_directory, f"{prev_filename}.TextGrid")
153
+ write_textgrid_file(intervals, textgrid_path, intervals[-1]['xmax'])
154
  processed_files.append(prev_filename)
155
  print(f"Wrote file: {prev_filename}.TextGrid with {len(intervals)} intervals")
156
 
 
179
  if not current_file_processed and prev_filename:
180
  if words:
181
  intervals.append(create_interval_data_dict(iu_xmin, iu_xmax, ' '.join(words)))
182
+
 
 
 
 
 
 
 
 
 
 
183
  if intervals:
184
  textgrid_path = os.path.join(output_directory, f"{prev_filename}.TextGrid")
185
+ write_textgrid_file(intervals, textgrid_path, intervals[-1]['xmax'])
186
  processed_files.append(prev_filename)
187
  print(f"Wrote last file: {prev_filename}.TextGrid with {len(intervals)} intervals")
188
 
 
210
  return None, f"Error: {str(e)}"
211
 
212
  # ==== Gradio Interface Setup ====
213
+ csv_format_instruction = """**Expected CSV format:**
214
+ The first row is the header. Each subsequent row should contain:\n
215
+ `file_name, xmin, xmax, text, is_unit_start_pred`
216
+
217
+ Each row represents a word or segment in an audio file.
218
+ - `file_name`: Identifier for the audio file (used to group intervals).
219
+ - `xmin`: Start time of the segment (in seconds).
220
+ - `xmax`: End time of the segment (in seconds).
221
+ - `text`: The actual spoken word or phrase.
222
+ - `is_unit_start_pred`: Marks the beginning of a new unit (TRUE/FALSE).
223
+
224
+ Example:\n
225
+ | file_name | xmin | xmax | text | is_unit_start_pred |
226
+ |-----------|--------|--------|-------|---------------------|
227
+ | example1 | 20.42 | 20.74 | mhmm | TRUE |
228
+ | example1 | 20.74 | 20.81 | hello | TRUE |
229
+ | example1 | 20.81 | 20.92 | world | FALSE |
 
 
 
 
 
 
 
 
230
 
231
  """
232
 
233
  iface = gr.Interface(
234
  fn=csv_to_textgrid,
235
+ inputs=gr.File(label="πŸ“ Upload CSV File", file_types=[".csv"]),
 
 
 
236
  outputs=[
237
  gr.File(label="πŸ“¦ Download TextGrid ZIP"),
238
  gr.Textbox(label="βœ… Status")