Spaces:

Jeonghwanny
/

deepvoice

Sleeping

App Files Files Community

Jeonghwanny commited on Nov 10, 2024

Commit

02926d5

verified ·

1 Parent(s): a5f8c36

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -26

app.py CHANGED Viewed

@@ -33,7 +33,6 @@ import gradio as gr
 from moviepy.editor import VideoFileClip
 # 오디오 변환 mp4 --> wav
 def extract_audio_from_video(video_file_path, audio_file_path):
     # mp4 파일 불러오기
@@ -68,14 +67,13 @@ def seprate_speaker(audio_file, pipeline):
     for speaker, segments in speaker_segments.items():
         # 화자의 모든 발화 구간을 이어붙임
         combined_waveform = torch.cat(segments, dim=1)
-        #current_path = os.getcwd()
-        # output_path = "/tmp/wav"    # 경로
-        output_path = 'wav'
         os.makedirs(output_path, exist_ok=True) # 경로가 없으면 생성
         output_filename = os.path.join(output_path,f"{speaker}.wav")
         torchaudio.save(output_filename, combined_waveform, sample_rate) #오디오 파일 저장
 # 간단한 DeepVoice 스타일 모델 정의
@@ -121,29 +119,38 @@ def real_fake_check(list_dir, path, model):
     f_cnt = 0
     prob = {}
     for i in list_dir:      # real / fake 선택
         input_data = extract_mfcc_path(os.path.join(path, i))
-        input_data = torch.tensor(input_data).unsqueeze(0).to('cuda')  # 배치 차원을 추가하여 (1, input_dim, sequence_length)로 맞춤
         result = model(input_data.float())
         probabilities = F.softmax(result, dim=1)
         prob[i]='%.2f'%probabilities[0][1].item()
-        predicted_class = 0 if probabilities[0][0] >= THRESHOLD else 1  # 확률값이 기준치보다 크다면 real, 아니면 fake
         if predicted_class == 0:
             r_cnt += 1
         else:
             f_cnt += 1
     return {'real: ':f'{r_cnt}/{len(list_dir)}', 'fake: ':f'{f_cnt}/{len(list_dir)}', 'prob: ': prob}
 def main(file_name):
-    pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1")
-    device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
     video_file = file_name #deepfake #meganfox.mp4'
-    #current_path = os.getcwd()
-    # audio_file = '/tmp/output_audio.wav'  # 저장할 오디오 파일의 경로, 이름 지정
-    audio_file = 'output_audio.wav'
     extract_audio_from_video(video_file, audio_file)
     seprate_speaker(audio_file,pipeline) # 발화자 분리해서 파일로 만들기
@@ -156,37 +163,39 @@ def main(file_name):
     l2_reg = 0.01
     # 모델
-    model_name = "deepvoice_model_girl.pth"
     model = DeepVoiceModel(input_dim, hidden_dim, num_classes, dropout_rate, l2_reg).to(device)
-    model.load_state_dict(torch.load(model_name))
     model.eval()  # 평가 모드로 설정
     #real,fake 폴더
-    #real_path = '/content/drive/MyDrive/캡스톤 1조/data/deepvoice/real'
     #real_path = '/content/drive/MyDrive/Celeb-DF-v2/Celeb-real'
     #real = os.listdir(real_path)
-    #current_path = os.getcwd()
-    # fake_path = '/tmp/wav'
-    fake_path = 'wav'
     fake = os.listdir(fake_path)
     rf_check = real_fake_check(fake, fake_path,model)       #fake dataset\
     return rf_check
 def deepvoice_check(video_file):
     results = main(video_file)
     return results
 # Gradio 인터페이스 생성
-deepfake = gr.Interface(
-    fn=deepvoice_check,
     inputs=gr.Video(label="Upload mp4 File"),
-    outputs=gr.Textbox(label="DeepFaKeVoice Detection Result"),
-    title="DeepFaKeVoice Check",
-    description="Upload an mp4 file to check."
 )
-if __name__ == "__main__":
-    deepfake.launch(share=True, debug=True)

 from moviepy.editor import VideoFileClip
 # 오디오 변환 mp4 --> wav
 def extract_audio_from_video(video_file_path, audio_file_path):
     # mp4 파일 불러오기
     for speaker, segments in speaker_segments.items():
         # 화자의 모든 발화 구간을 이어붙임
         combined_waveform = torch.cat(segments, dim=1)
+        # output_path = "/content/wav"    # 경로
+        output_path = './output'
         os.makedirs(output_path, exist_ok=True) # 경로가 없으면 생성
         output_filename = os.path.join(output_path,f"{speaker}.wav")
         torchaudio.save(output_filename, combined_waveform, sample_rate) #오디오 파일 저장
+        #print(f"Saved {output_filename} for speaker {speaker}")
 # 간단한 DeepVoice 스타일 모델 정의
     f_cnt = 0
     prob = {}
     for i in list_dir:      # real / fake 선택
+        #print('------',i)
         input_data = extract_mfcc_path(os.path.join(path, i))
+        input_data = torch.tensor(input_data).unsqueeze(0).to(device)  # 배치 차원을 추가하여 (1, input_dim, sequence_length)로 맞춤
         result = model(input_data.float())
+        # predicted_class = torch.argmax(result, dim=1).item()
         probabilities = F.softmax(result, dim=1)
         prob[i]='%.2f'%probabilities[0][1].item()
+        predicted_class = 0 if probabilities[0][0] >= THRESHOLD else 1  # 확률값이 기준치보다 크다면 real, 아니면 fake
+        # print('-- %.2f'%probabilities[0][0].item()) #확률 값 출력
         if predicted_class == 0:
+            # print("REAL")
             r_cnt += 1
         else:
+            # print("FAKE")
             f_cnt += 1
+    #print()
+    #print('real: ',r_cnt,'/',len(list_dir))
+    #print('fake: ',f_cnt,'/',len(list_dir))
     return {'real: ':f'{r_cnt}/{len(list_dir)}', 'fake: ':f'{f_cnt}/{len(list_dir)}', 'prob: ': prob}
 def main(file_name):
+    my_key = os.getenv("my_key")
+    pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1",
+                                        use_auth_token=my_key)
+    # pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1")
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     video_file = file_name #deepfake #meganfox.mp4'
+    audio_file = './output_audio.wav'  # 저장할 오디오 파일의 경로, 이름 지정
     extract_audio_from_video(video_file, audio_file)
     seprate_speaker(audio_file,pipeline) # 발화자 분리해서 파일로 만들기
     l2_reg = 0.01
     # 모델
+    model_name = './deepvoice_model_girl.pth'
     model = DeepVoiceModel(input_dim, hidden_dim, num_classes, dropout_rate, l2_reg).to(device)
+    model.load_state_dict(torch.load(model_name, map_location=torch.device(device)))#("/content/drive/MyDrive/캡스톤 1조/model/deepvoice_model_girl.pth"))
     model.eval()  # 평가 모드로 설정
     #real,fake 폴더
+    #real_path = '/content/drive/MyDrive/캡스톤 1조/data/deepvoice/real'
     #real_path = '/content/drive/MyDrive/Celeb-DF-v2/Celeb-real'
     #real = os.listdir(real_path)
+    fake_path = './output'#'/content/drive/MyDrive/캡스톤 1조/data/deepvoice/fake'
     fake = os.listdir(fake_path)
+    #print("\n-------real data---------")
+    #real_fake_check(real, real_path, model) #real dataset
+    #print("\n-------fake data---------")
     rf_check = real_fake_check(fake, fake_path,model)       #fake dataset\
     return rf_check
+#Gradio 메인 함수
 def deepvoice_check(video_file):
     results = main(video_file)
     return results
 # Gradio 인터페이스 생성
+iface = gr.Interface(
+    fn=main,
     inputs=gr.Video(label="Upload mp4 File"),
+    outputs=gr.Textbox(label="Deepfake Detection Result"),
+    title="DeepVoice Check",
+    description="Upload an mp4 file to check for DeepVoice indicators."
 )
+# Gradio 인터페이스 실행
+iface.launch(share=True, debug=True)