eleferrand commited on
Commit
014fbd7
·
verified ·
1 Parent(s): ffc5786

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -6
app.py CHANGED
@@ -23,7 +23,7 @@ model = AutoModelForCTC.from_pretrained(MODEL_NAME)
23
 
24
  def transcribe(audio_file):
25
 
26
- output = []
27
  try:
28
  audio, rate = librosa.load(audio_file, sr=16000)
29
 
@@ -39,14 +39,15 @@ def transcribe(audio_file):
39
  start=ind
40
  transc = transcription.replace("[UNK]", "")
41
  print(transc)
42
- output.append((start,ind,transc))
43
  else:
44
  input_values = processor(audio, sampling_rate=16000, return_tensors="pt").input_values
45
  with torch.no_grad():
46
  logits = model(input_values).logits
47
  predicted_ids = torch.argmax(logits, dim=-1)
48
  transcription = processor.batch_decode(predicted_ids)[0]
49
- output.append(((0,len(audio)/rate),transcription))
 
50
  return output
51
 
52
 
@@ -141,7 +142,7 @@ def toggle_language(switch):
141
  with gr.Blocks() as demo:
142
  # lang_switch = gr.Checkbox(label="切換到繁體中文 (Switch to Traditional Chinese)")
143
 
144
- title = gr.Markdown("Yupik ASR Transcription & Correction System")
145
  step1 = gr.Markdown("Step 1: Audio Upload & Transcription")
146
 
147
  # Audio input and playback (Original section)
@@ -153,14 +154,14 @@ with gr.Blocks() as demo:
153
  with gr.Row(): # Added this Row to position the button below the audio input
154
  transcribe_button = gr.Button("Transcribe Audio")
155
 
156
- original_text = gr.Textbox(label="Original Transcription", interactive=False, lines=5)
157
  corrected_text = gr.Textbox(label="Corrected Transcription", interactive=True, lines=5)
158
 
159
  step3 = gr.Markdown("Step 3: User Information")
160
 
161
  with gr.Row():
162
  age_input = gr.Slider(minimum=0, maximum=100, step=1, label="Age", value=25)
163
- native_speaker_input = gr.Checkbox(label="Native Yupik Speaker?", value=True)
164
 
165
  step4 = gr.Markdown("Step 4: Save & Download")
166
 
 
23
 
24
  def transcribe(audio_file):
25
 
26
+ output = ""
27
  try:
28
  audio, rate = librosa.load(audio_file, sr=16000)
29
 
 
39
  start=ind
40
  transc = transcription.replace("[UNK]", "")
41
  print(transc)
42
+ output= output+f"{start} - {ind}: {transc}\n"
43
  else:
44
  input_values = processor(audio, sampling_rate=16000, return_tensors="pt").input_values
45
  with torch.no_grad():
46
  logits = model(input_values).logits
47
  predicted_ids = torch.argmax(logits, dim=-1)
48
  transcription = processor.batch_decode(predicted_ids)[0]
49
+ transc = transcription.replace("[UNK]", "")
50
+ output=output+f"0 - {len(audio)/rate}: {transc}"
51
  return output
52
 
53
 
 
142
  with gr.Blocks() as demo:
143
  # lang_switch = gr.Checkbox(label="切換到繁體中文 (Switch to Traditional Chinese)")
144
 
145
+ title = gr.Markdown("Creole ASR Transcription & Correction System")
146
  step1 = gr.Markdown("Step 1: Audio Upload & Transcription")
147
 
148
  # Audio input and playback (Original section)
 
154
  with gr.Row(): # Added this Row to position the button below the audio input
155
  transcribe_button = gr.Button("Transcribe Audio")
156
 
157
+ original_text = gr.Textbox(label="Transcription", interactive=False, lines=5)
158
  corrected_text = gr.Textbox(label="Corrected Transcription", interactive=True, lines=5)
159
 
160
  step3 = gr.Markdown("Step 3: User Information")
161
 
162
  with gr.Row():
163
  age_input = gr.Slider(minimum=0, maximum=100, step=1, label="Age", value=25)
164
+ native_speaker_input = gr.Checkbox(label="Native Creole Speaker?", value=True)
165
 
166
  step4 = gr.Markdown("Step 4: Save & Download")
167