Spaces:
Runtime error
Runtime error
| """ | |
| AudioClassifier class | |
| Author: HenryAreiza | |
| Date: 08/09/2023 | |
| """ | |
| from scipy.io import wavfile | |
| from scipy.signal import decimate | |
| from transformers import pipeline | |
| class AudioClassifier: | |
| """ | |
| A class for classifying audio commands using a pre-trained model. | |
| This class provides functionality for classifying audio commands based on | |
| a pre-trained audio classification model. | |
| Attributes: | |
| vocab (list): Vocabulary of valid commands | |
| commands (list): List of corresponding mouse actions | |
| pipe: The Hugging Face Transformers pipeline for audio classification. | |
| """ | |
| def __init__(self): | |
| """ | |
| Initializes the AudioClassifier class. | |
| """ | |
| self.vocab = ["left", "right", "up", "down", "go", "follow", | |
| "on", "off", "one", "two", "three", "stop"] | |
| self.commands = ["left click", "right click", "scroll up", "scroll down", "double click", "sustained click", "enable cursor movement", | |
| "disable cursor movement", "slow cursor speed", "medium cursor speed", "fast cursor speed", "finish the application"] | |
| # Load the audio classification pipeline | |
| self.pipe = pipeline("audio-classification", model="0xb1/wav2vec2-base-finetuned-speech_commands-v0.02") | |
| def predict(self, audio_path): | |
| """ | |
| Classify audio data into a command label. | |
| Args: | |
| audio_data (numpy.ndarray): Input audio data. | |
| Returns: | |
| result (str): The classified command label. | |
| """ | |
| _, audio = wavfile.read(audio_path) | |
| audio = decimate(audio, 3) | |
| result = self.pipe(audio)[0]["label"] | |
| if result not in self.vocab: | |
| result = 'unknown command' | |
| else: | |
| result = result + ' ---> ' + '(' + self.commands[self.vocab.index(result)] + ')' | |
| return result | |