Spaces:

Ngadou
/

Audio_Scam_Detection

Sleeping

Ngadou commited on Jun 14, 2023

Commit

b0090a7

1 Parent(s): 17c3241

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,20 +3,32 @@ import time
 import openai
 import json
 import os
-from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
-asr_pipeline = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-large-robust-ft-libri-960h")
 openai.api_key = os.environ.get('OPENAI_KEY')
 def classify_audio(audio):
     # Transcribe the audio to text
-    audio_transcript = asr_pipeline(audio)["text"]
-    audio_transcript = audio_transcript.lower()
     messages = [
         {"role": "system", "content": "Is this chat a scam, spam or is safe? Only answer in JSON format with 'classification': '' as string and 'reasons': '' as the most plausible reasons why. The reason should be explaning to the potential victim why the conversation is probably a scam"},
-        {"role": "user", "content": audio_transcript},
     ]
     # Call the OpenAI API to generate a response

 import openai
 import json
 import os
+from transformers import pipeline
+from transformers import AutoProcessor, AutoModelForCTC
+processor = AutoProcessor.from_pretrained("facebook/wav2vec2-large-robust-ft-libri-960h")
+model = AutoModelForCTC.from_pretrained("facebook/wav2vec2-large-robust-ft-libri-960h")
+# asr_pipeline = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-large-robust-ft-libri-960h")
 openai.api_key = os.environ.get('OPENAI_KEY')
 def classify_audio(audio):
     # Transcribe the audio to text
+    # audio_transcript = asr_pipeline(audio)["text"]
+    # audio_transcript = audio_transcript.lower()
+    input_values = processor(audio, return_tensors="pt", padding="longest").input_values
+    # retrieve logits
+    logits = model(input_values).logits
+    # take argmax and decode
+    predicted_ids = torch.argmax(logits, dim=-1)
+    transcription = processor.batch_decode(predicted_ids)
     messages = [
         {"role": "system", "content": "Is this chat a scam, spam or is safe? Only answer in JSON format with 'classification': '' as string and 'reasons': '' as the most plausible reasons why. The reason should be explaning to the potential victim why the conversation is probably a scam"},
+        {"role": "user", "content": transcription},
     ]
     # Call the OpenAI API to generate a response