suusuu93 commited on
Commit
ffa8116
·
verified ·
1 Parent(s): 4c39fcf

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -0
app.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pip install transformers datasets torchaudio soundfile
2
+ from transformers import pipeline
3
+ import torchaudio
4
+
5
+ # 1. Whisper ASR model (Vietnamese)
6
+ asr = pipeline("automatic-speech-recognition", model="openai/whisper-small", device=-1)
7
+
8
+ # 2. Emotion classification model (Vietnamese)
9
+ emo_clf = pipeline("text-classification", model="bkai-foundation-models/vietnamese-emotion", top_k=None)
10
+
11
+ # 3. Pipeline: audio -> transcript -> emotion
12
+ def predict_emotion(audio_path):
13
+ # Chuyển audio thành text
14
+ transcript = asr(audio_path)["text"]
15
+ # Phân loại cảm xúc
16
+ emotions = emo_clf(transcript)[0]
17
+ # Sắp xếp theo độ tin cậy
18
+ emotions = sorted(emotions, key=lambda x: x['score'], reverse=True)
19
+ return transcript, emotions
20
+
21
+ # Demo
22
+ if __name__ == "__main__":
23
+ audio_file = "sample_vi.wav" # file giọng nói tiếng Việt
24
+ text, emo = predict_emotion(audio_file)
25
+ print("Transcript:", text)
26
+ print("Emotion prediction:", emo)