Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -23,7 +23,7 @@ model = AutoModelForCTC.from_pretrained(MODEL_NAME)
|
|
| 23 |
|
| 24 |
def transcribe(audio_file):
|
| 25 |
|
| 26 |
-
output =
|
| 27 |
try:
|
| 28 |
audio, rate = librosa.load(audio_file, sr=16000)
|
| 29 |
|
|
@@ -39,14 +39,15 @@ def transcribe(audio_file):
|
|
| 39 |
start=ind
|
| 40 |
transc = transcription.replace("[UNK]", "")
|
| 41 |
print(transc)
|
| 42 |
-
output
|
| 43 |
else:
|
| 44 |
input_values = processor(audio, sampling_rate=16000, return_tensors="pt").input_values
|
| 45 |
with torch.no_grad():
|
| 46 |
logits = model(input_values).logits
|
| 47 |
predicted_ids = torch.argmax(logits, dim=-1)
|
| 48 |
transcription = processor.batch_decode(predicted_ids)[0]
|
| 49 |
-
|
|
|
|
| 50 |
return output
|
| 51 |
|
| 52 |
|
|
@@ -141,7 +142,7 @@ def toggle_language(switch):
|
|
| 141 |
with gr.Blocks() as demo:
|
| 142 |
# lang_switch = gr.Checkbox(label="切換到繁體中文 (Switch to Traditional Chinese)")
|
| 143 |
|
| 144 |
-
title = gr.Markdown("
|
| 145 |
step1 = gr.Markdown("Step 1: Audio Upload & Transcription")
|
| 146 |
|
| 147 |
# Audio input and playback (Original section)
|
|
@@ -153,14 +154,14 @@ with gr.Blocks() as demo:
|
|
| 153 |
with gr.Row(): # Added this Row to position the button below the audio input
|
| 154 |
transcribe_button = gr.Button("Transcribe Audio")
|
| 155 |
|
| 156 |
-
original_text = gr.Textbox(label="
|
| 157 |
corrected_text = gr.Textbox(label="Corrected Transcription", interactive=True, lines=5)
|
| 158 |
|
| 159 |
step3 = gr.Markdown("Step 3: User Information")
|
| 160 |
|
| 161 |
with gr.Row():
|
| 162 |
age_input = gr.Slider(minimum=0, maximum=100, step=1, label="Age", value=25)
|
| 163 |
-
native_speaker_input = gr.Checkbox(label="Native
|
| 164 |
|
| 165 |
step4 = gr.Markdown("Step 4: Save & Download")
|
| 166 |
|
|
|
|
| 23 |
|
| 24 |
def transcribe(audio_file):
|
| 25 |
|
| 26 |
+
output = ""
|
| 27 |
try:
|
| 28 |
audio, rate = librosa.load(audio_file, sr=16000)
|
| 29 |
|
|
|
|
| 39 |
start=ind
|
| 40 |
transc = transcription.replace("[UNK]", "")
|
| 41 |
print(transc)
|
| 42 |
+
output= output+f"{start} - {ind}: {transc}\n"
|
| 43 |
else:
|
| 44 |
input_values = processor(audio, sampling_rate=16000, return_tensors="pt").input_values
|
| 45 |
with torch.no_grad():
|
| 46 |
logits = model(input_values).logits
|
| 47 |
predicted_ids = torch.argmax(logits, dim=-1)
|
| 48 |
transcription = processor.batch_decode(predicted_ids)[0]
|
| 49 |
+
transc = transcription.replace("[UNK]", "")
|
| 50 |
+
output=output+f"0 - {len(audio)/rate}: {transc}"
|
| 51 |
return output
|
| 52 |
|
| 53 |
|
|
|
|
| 142 |
with gr.Blocks() as demo:
|
| 143 |
# lang_switch = gr.Checkbox(label="切換到繁體中文 (Switch to Traditional Chinese)")
|
| 144 |
|
| 145 |
+
title = gr.Markdown("Creole ASR Transcription & Correction System")
|
| 146 |
step1 = gr.Markdown("Step 1: Audio Upload & Transcription")
|
| 147 |
|
| 148 |
# Audio input and playback (Original section)
|
|
|
|
| 154 |
with gr.Row(): # Added this Row to position the button below the audio input
|
| 155 |
transcribe_button = gr.Button("Transcribe Audio")
|
| 156 |
|
| 157 |
+
original_text = gr.Textbox(label="Transcription", interactive=False, lines=5)
|
| 158 |
corrected_text = gr.Textbox(label="Corrected Transcription", interactive=True, lines=5)
|
| 159 |
|
| 160 |
step3 = gr.Markdown("Step 3: User Information")
|
| 161 |
|
| 162 |
with gr.Row():
|
| 163 |
age_input = gr.Slider(minimum=0, maximum=100, step=1, label="Age", value=25)
|
| 164 |
+
native_speaker_input = gr.Checkbox(label="Native Creole Speaker?", value=True)
|
| 165 |
|
| 166 |
step4 = gr.Markdown("Step 4: Save & Download")
|
| 167 |
|