Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import torch | |
| import librosa | |
| import time | |
| import pandas as pd | |
| from datetime import datetime | |
| from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2FeatureExtractor | |
| DESCRIPTION = "Store a record of previous calls in order to verify if the client already called or not. Pretrained on `https://huggingface.co/datasets/superb` using [S3PRL recipe](https://github.com/s3prl/s3prl/tree/master/s3prl/downstream/voxceleb1)." | |
| # COLUMNS = ["call_id", "date", "client_id", "duration", "new"] | |
| model = Wav2Vec2ForSequenceClassification.from_pretrained("superb/wav2vec2-large-superb-sid") | |
| feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained("superb/wav2vec2-large-superb-sid") | |
| def file_to_array(path): | |
| speech, _ = librosa.load(path, sr=16000, mono=True) | |
| duration = librosa.get_duration(y=speech) | |
| return speech, duration | |
| def handler(audio_path): | |
| calls = pd.read_csv("call_records.csv") | |
| speech, duration = file_to_array(audio_path) | |
| # compute attention masks and normalize the waveform if needed | |
| inputs = feature_extractor(speech, sampling_rate=16000, padding=True, return_tensors="pt") | |
| logits = model(**inputs).logits | |
| predicted_ids = torch.argmax(logits, dim=-1) | |
| labels = [model.config.id2label[_id] for _id in predicted_ids.tolist()] | |
| client_id = labels[0] | |
| call_id = str(int(time.time())) | |
| date = datetime.now().strftime("%d/%m/%Y %H:%M:%S") | |
| n_of_calls = len(calls.loc[calls.client_id == client_id]) | |
| new = n_of_calls == 0 | |
| # add new call record | |
| record = [call_id, date, client_id, duration, new] | |
| calls.loc[len(calls)] = record | |
| calls.to_csv("call_records.csv", index=False) | |
| if new: | |
| return f"New client call: Client ID {client_id}" | |
| return f"Client {client_id} calling again: {n_of_calls} previous calls" | |
| first = gr.Interface( | |
| fn=handler, | |
| inputs=gr.Audio(label="Speech Audio", type="filepath"), | |
| outputs=gr.Text(label="Output", value="..."), | |
| description=DESCRIPTION | |
| ) | |
| second = gr.Interface( | |
| fn=handler, | |
| inputs=gr.Audio(label="Microphone Input", source="microphone", type="filepath"), | |
| outputs=gr.Text(label="Output", value="..."), | |
| description=DESCRIPTION | |
| ) | |
| app = gr.TabbedInterface( | |
| [first, second], | |
| title="Speaker Call Verification 🎤", | |
| tab_names=["Audio Upload", "Microphone"], | |
| ) | |
| app.launch() |