File size: 1,400 Bytes
4cb6aa6
 
 
 
 
 
237d67c
4cb6aa6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237d67c
4cb6aa6
237d67c
4cb6aa6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80922f5
4cb6aa6
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import gradio as gr
from transformers import pipeline
import torch

# Initialize ASR and classifier pipelines
model_asr = "kairaamilanii/whisper-mind14-enUS"
model_class = "kairaamilanii/RoBERTa-minds14-en"

transcriber = pipeline(
    "automatic-speech-recognition",
    model=model_asr,
    chunk_length_s=30,
    device="cuda:0" if torch.cuda.is_available() else "cpu"
)
classifier = pipeline("text-classification", model=model_class)

intent_classes = {
    0: 'abroad',
    1: 'address',
    2: 'app_error',
    3: 'atm_limit',
    4: 'balance',
    5: 'business_loan',
    6: 'card_issues',
    7: 'cash_deposit',
    8: 'direct_debit',
    9: 'freeze',
    10: 'high_value_payment',
    11: 'joint_account',
    12: 'latest_transactions',
    13: 'pay_bill'
}

# Function to process audio
def process_audio(audio):
    # Transcribe the audio
    text_asr = transcriber(audio)['text']
    # Classify the intent
    intent_class = classifier(text_asr)
    label_index = int(intent_class[0]['label'].split('_')[1])
    intent_name = intent_classes.get(label_index, "Unknown")
    return text_asr, intent_name

# Create Gradio interface
iface = gr.Interface(
    fn=process_audio,
    inputs=gr.Audio(type="filepath"),
    outputs=["text", "text"],
    title="ASR and Intent Classification",
    description="Upload an audio file to get transcription and intent classification."
)

iface.launch()