Spaces:
Runtime error
Runtime error
File size: 4,487 Bytes
384f8dc d40d6d4 722f84e d40d6d4 f9f0314 85bca98 8773f6c 722f84e e07795f c4365d2 d40d6d4 e07795f b30e39a f9f0314 d40d6d4 e07795f 8773f6c 1f06dd4 d40d6d4 722f84e d40d6d4 0b71a97 d40d6d4 8773f6c 1f06dd4 d40d6d4 e07795f 384f8dc e07795f 722f84e e75c5f3 7dfa0e2 e07795f 722f84e f093f7f 722f84e 3f67209 722f84e a03cd02 b30e39a a03cd02 722f84e 85bca98 587e152 85bca98 d5d3072 85bca98 722f84e 1e4d7e8 ba6b417 e07795f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 | import json
import torch
import gradio as gr
import models as MOD
import process_data as PD
from transformers import pipeline
from huggingface_hub import hf_hub_download
model_master = {
"SSL-AASIST (Trained on ASV-Spoof5)": {"eer_threshold": 3.3330237865448,
"data_process_func": "process_ssl_assist_input",
"note": "This model is trained only on ASVSpoof 2024 training data.",
"model_class": "Model",
"model_checkpoint": "ssl_aasist_epoch_7.pth"},
"AASIST": {"eer_threshold": 1.8018419742584229,
"data_process_func": "process_assist_input",
"note": "This model is trained on ASVSpoof 2024 training data.",
"model_class":"AASIST_Model",
"model_checkpoint": "orig_aasist_epoch_1.pth"}
}
model = MOD.Model(None, "cpu")
base_model_file = hf_hub_download("arnabdas8901/aasist-trained-asvspoof2024", filename="ssl_aasist_epoch_7.pth")
model.load_state_dict(torch.load(base_model_file, map_location="cpu"))
model.eval()
loaded_model = "SSL-AASIST (Trained on ASV-Spoof5)"
def process(file, type):
global model
global loaded_model
inp = getattr(PD, model_master[type]["data_process_func"])(file)
if not loaded_model == type:
model = getattr(MOD, model_master[type]["model_class"])(None, "cpu")
model_file = hf_hub_download("arnabdas8901/aasist-trained-asvspoof2024", filename=model_master[type]["model_checkpoint"])
model.load_state_dict(torch.load(model_file, map_location="cpu"))
model.eval()
loaded_model = type
op = model(inp).detach().squeeze()[1].item()
output_json = {}
output_json["decision_score"] = str(op)
output_json["model_threshold"] = str(model_master[type]["eer_threshold"])
output_json["optional_note"] = "1. Any score below threshold is indicative of fake. \n2. {}".format(model_master[type]["note"])
response_text = json.dumps(output_json, indent=4)
"""response_text = "Decision score: {} \nDecision threshold: {} \nNotes: 1. Any score below threshold is indicative of fake. \n2. {} ".format(
str(op), str(model_master[type]["eer_threshold"]), model_master[type]["note"])"""
return response_text
demo = gr.Blocks()
file_proc = gr.Interface(
fn=process,
inputs=[
gr.Audio(sources=["upload"], label="Audio file", type="filepath"),
gr.Radio(["SSL-AASIST (Trained on ASV-Spoof5)", "AASIST"], label="Select Model", type="value"),
],
outputs="text",
title="Find the Fake: Analyze 'Real' or 'Fake'.",
description=(
"Analyze fake or real with a click of a button. Upload a .wav or .flac file."
),
examples=[
["./bonafide.flac", "SSL-AASIST (Trained on ASV-Spoof5)"],
["./fake.flac", "SSL-AASIST (Trained on ASV-Spoof5)"],
["./bonafide.flac", "AASIST"],
["./fake.flac", "AASIST"],
],
cache_examples=True,
allow_flagging="never",
)
#####################################################################################
# For ASR interface
pipe = pipeline(
task="automatic-speech-recognition",
model="openai/whisper-large-v3",
chunk_length_s=30,
device="cpu",
)
def transcribe(inputs):
if inputs is None:
raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
op = pipe(inputs, batch_size=8, generate_kwargs={"task": "transcribe"}, return_timestamps=False, return_language=True)
lang = op["chunks"][0]["language"]
text = op["text"]
return lang, text
transcribe_proc = gr.Interface(
fn = transcribe,
inputs = [
gr.Audio(type="filepath", label="Speech file (<30s)", max_length=30, sources=["microphone", "upload"], show_download_button=True)
],
outputs=[
gr.Text(label="Predicted Language", info="Language identification is performed automatically."),
gr.Text(label="Predicted transcription", info="Best hypothesis."),
],
title="Transcribe Anything.",
description=(
"Automatactic language identification and transcription service by Whisper Large V3. Upload a .wav or .flac file."
),
allow_flagging="never"
)
with demo:
gr.TabbedInterface([file_proc, transcribe_proc], ["Analyze Audio File", "Transcribe Audio File"])
demo.queue(max_size=10)
demo.launch(share=True)
|