baharbhz commited on
Commit
e34f97b
·
verified ·
1 Parent(s): 2fa1339

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -0
app.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torchaudio
3
+ import torch
4
+ import librosa
5
+ import numpy as np
6
+ import moviepy.editor as mp
7
+ from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
8
+
9
+ model_name = "m3hrdadfi/wav2vec2-large-xlsr-persian"
10
+ processor = Wav2Vec2Processor.from_pretrained(model_name)
11
+ model = Wav2Vec2ForCTC.from_pretrained(model_name)
12
+
13
+ def preprocess_audio(audio_path):
14
+ y, sr = librosa.load(audio_path, sr=16000, mono=True)
15
+ y = librosa.util.normalize(y)
16
+ y = librosa.effects.preemphasis(y)
17
+ return torch.tensor(y, dtype=torch.float32).unsqueeze(0)
18
+
19
+
20
+ def speech_to_text(audio_path):
21
+ waveform = preprocess_audio(audio_path)
22
+
23
+ input_values = processor(waveform.squeeze(), return_tensors="pt", sampling_rate=16000).input_values
24
+ with torch.no_grad():
25
+ logits = model(input_values).logits
26
+ predicted_ids = torch.argmax(logits, dim=-1)
27
+ transcription = processor.batch_decode(predicted_ids)[0]
28
+ return transcription
29
+
30
+
31
+ def video_to_text(video_path):
32
+ video = mp.VideoFileClip(video_path)
33
+ audio_path = "extracted_audio.wav"
34
+ video.audio.write_audiofile(audio_path, codec="pcm_s16le")
35
+ return speech_to_text(audio_path)
36
+
37
+ with gr.Blocks() as demo:
38
+ gr.Markdown("### تبدیل گفتار فارسی به متن با استفاده از Wav2Vec2")
39
+
40
+ with gr.Tab("آپلود ویدئو"):
41
+ video_input = gr.File(label="انتخاب ویدئو")
42
+ video_output = gr.Textbox(label="متن استخراج شده")
43
+ video_button = gr.Button("تبدیل به متن")
44
+ video_button.click(video_to_text, inputs=video_input, outputs=video_output)
45
+
46
+ with gr.Tab("آپلود فایل صوتی"):
47
+ audio_input = gr.File(label="انتخاب فایل صوتی")
48
+ audio_output = gr.Textbox(label="متن استخراج شده")
49
+ audio_button = gr.Button("تبدیل به متن")
50
+ audio_button.click(speech_to_text, inputs=audio_input, outputs=audio_output)
51
+
52
+ with gr.Tab("ضبط صدا"):
53
+ mic_input = gr.Audio(sources="microphone", type="filepath")
54
+ mic_output = gr.Textbox(label="متن استخراج شده")
55
+ mic_button = gr.Button("تبدیل به متن")
56
+ mic_button.click(speech_to_text, inputs=mic_input, outputs=mic_output)
57
+
58
+ demo.launch()