Update app.py
Browse files
app.py
CHANGED
|
@@ -17,12 +17,16 @@ MODELS = {
|
|
| 17 |
"epitran": epitran.Epitran("ara-Arab")
|
| 18 |
},
|
| 19 |
"English": {
|
| 20 |
-
"processor": Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-
|
| 21 |
-
"model": Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-
|
| 22 |
"epitran": epitran.Epitran("eng-Latn")
|
| 23 |
}
|
| 24 |
}
|
| 25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
def clean_phonemes(ipa):
|
| 27 |
"""Remove diacritics and length markers from phonemes"""
|
| 28 |
return re.sub(r'[\u064B-\u0652\u02D0]', '', ipa)
|
|
@@ -42,7 +46,7 @@ def analyze_phonemes(language, reference_text, audio_file):
|
|
| 42 |
ref_phonemes.append(list(ipa_clean))
|
| 43 |
|
| 44 |
# Process audio file
|
| 45 |
-
audio, sr = librosa.load(audio_file
|
| 46 |
input_values = processor(audio, sampling_rate=16000, return_tensors="pt").input_values
|
| 47 |
|
| 48 |
# Get transcription
|
|
@@ -147,7 +151,7 @@ with gr.Blocks() as demo:
|
|
| 147 |
value=get_default_text("Arabic")
|
| 148 |
)
|
| 149 |
|
| 150 |
-
audio_input = gr.
|
| 151 |
submit_btn = gr.Button("Analyze")
|
| 152 |
output = gr.JSON(label="Phoneme Alignment Results")
|
| 153 |
|
|
|
|
| 17 |
"epitran": epitran.Epitran("ara-Arab")
|
| 18 |
},
|
| 19 |
"English": {
|
| 20 |
+
"processor": Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h"),
|
| 21 |
+
"model": Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h"),
|
| 22 |
"epitran": epitran.Epitran("eng-Latn")
|
| 23 |
}
|
| 24 |
}
|
| 25 |
|
| 26 |
+
# Suppress the warning about newly initialized weights
|
| 27 |
+
for lang in MODELS.values():
|
| 28 |
+
lang["model"].config.ctc_loss_reduction = "mean"
|
| 29 |
+
|
| 30 |
def clean_phonemes(ipa):
|
| 31 |
"""Remove diacritics and length markers from phonemes"""
|
| 32 |
return re.sub(r'[\u064B-\u0652\u02D0]', '', ipa)
|
|
|
|
| 46 |
ref_phonemes.append(list(ipa_clean))
|
| 47 |
|
| 48 |
# Process audio file
|
| 49 |
+
audio, sr = librosa.load(audio_file, sr=16000)
|
| 50 |
input_values = processor(audio, sampling_rate=16000, return_tensors="pt").input_values
|
| 51 |
|
| 52 |
# Get transcription
|
|
|
|
| 151 |
value=get_default_text("Arabic")
|
| 152 |
)
|
| 153 |
|
| 154 |
+
audio_input = gr.Audio(label="Upload Audio File", type="filepath")
|
| 155 |
submit_btn = gr.Button("Analyze")
|
| 156 |
output = gr.JSON(label="Phoneme Alignment Results")
|
| 157 |
|