Spaces:

DavidCombei
/

AI4TRUST_space

Sleeping

App Files Files Community

DavidCombei commited on Sep 15, 2024

Commit

8288053

verified ·

1 Parent(s): d8876d6

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -8

app.py CHANGED Viewed

@@ -6,7 +6,7 @@ import librosa
 import numpy as np
 from sklearn.linear_model import LogisticRegression
 import gradio as gr
-from pytube import YouTube
 class HuggingFaceFeatureExtractor:
     def __init__(self, model_class, name):
@@ -40,9 +40,22 @@ model4 = joblib.load('model4_ensemble.pkl')
 final_model = joblib.load('final_model_ensemble.pkl')
 def download_audio_from_youtube(youtube_url, output_path='.'):
-    yt = YouTube(youtube_url)
-    audio_stream = yt.streams.filter(only_audio=True).first()
-    audio_file = audio_stream.download(output_path=output_path)
     return audio_file
 def segment_audio(audio, sr, segment_duration):
@@ -88,7 +101,7 @@ def process_audio(input_data, segment_duration=3):
     # PhantomNet extractor
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
     model = PhantomNet(feature_size=1920, num_classes=2, conv_projection=False, use_mode='extractor').to(device)
-    state_dict = torch.load("PhantomNet/saved_models/PhantomNet_Finetuned_V2.pt", map_location=device)
     model.load_state_dict(state_dict, strict=False)
     model.eval()
@@ -120,9 +133,9 @@ def process_audio(input_data, segment_duration=3):
     y_pred_inference = classify_with_eer_threshold(final_prob, eer_thresh)
     if y_pred_inference == 1:
-        return f"Fake with a confidence of: {100 - final_prob[0] * 100:.2f}%"
     else:
-        return f"Real with a confidence of: {final_prob[0] * 100:.2f}%"
 def gradio_interface(audio, youtube_link):
     if youtube_link:
@@ -137,7 +150,7 @@ interface = gr.Interface(
     inputs=[gr.Audio(type="filepath", label="Upload Audio"), gr.Textbox(label="YouTube Link (Optional)")],
     outputs="text",
     title="AI4TRUST Development",
-    description="Upload an audio file or provide a YouTube link to check for authenticity.",
 )
 interface.launch(share=True)

 import numpy as np
 from sklearn.linear_model import LogisticRegression
 import gradio as gr
+import yt_dlp as youtube_dl
 class HuggingFaceFeatureExtractor:
     def __init__(self, model_class, name):
 final_model = joblib.load('final_model_ensemble.pkl')
 def download_audio_from_youtube(youtube_url, output_path='.'):
+    ydl_opts = {
+        'format': 'bestaudio/best',
+        'outtmpl': f'{output_path}/%(title)s.%(ext)s',
+        'postprocessors': [{
+            'key': 'FFmpegExtractAudio',
+            'preferredcodec': 'wav',
+            'preferredquality': '192',
+        }],
+        'postprocessor_args': ['-ar', '16000'],
+        'prefer_ffmpeg': True,
+    }
+    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
+        info_dict = ydl.extract_info(youtube_url, download=True)
+        #i have issues with the .webm extension, force replace with .wav
+        audio_file = ydl.prepare_filename(info_dict).replace('.webm', '.wav')
     return audio_file
 def segment_audio(audio, sr, segment_duration):
     # PhantomNet extractor
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
     model = PhantomNet(feature_size=1920, num_classes=2, conv_projection=False, use_mode='extractor').to(device)
+    state_dict = torch.load("PhantomNet_Finetuned_V2.pt", map_location=device)
     model.load_state_dict(state_dict, strict=False)
     model.eval()
     y_pred_inference = classify_with_eer_threshold(final_prob, eer_thresh)
     if y_pred_inference == 1:
+        return f"Fake with a confidence of: {final_prob[0] * 100:.2f}%"
     else:
+        return f"Real with a confidence of: {100 - final_prob[0] * 100:.2f}%"
 def gradio_interface(audio, youtube_link):
     if youtube_link:
     inputs=[gr.Audio(type="filepath", label="Upload Audio"), gr.Textbox(label="YouTube Link (Optional)")],
     outputs="text",
     title="AI4TRUST Development",
+    description="Upload an audio file or provide a YouTube link to check if it's AI generated",
 )
 interface.launch(share=True)