sssssungk commited on
Commit
2d8df4b
ยท
verified ยท
1 Parent(s): a065eb5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -29
app.py CHANGED
@@ -31,7 +31,6 @@ import gradio as gr
31
 
32
 
33
 
34
-
35
  # ์˜ค๋””์˜ค ๋ณ€ํ™˜ mp4 --> wav
36
  def extract_audio_from_video(video_file_path, audio_file_path):
37
  # mp4 ํŒŒ์ผ ๋ถˆ๋Ÿฌ์˜ค๊ธฐ
@@ -66,12 +65,13 @@ def seprate_speaker(audio_file, pipeline):
66
  for speaker, segments in speaker_segments.items():
67
  # ํ™”์ž์˜ ๋ชจ๋“  ๋ฐœํ™” ๊ตฌ๊ฐ„์„ ์ด์–ด๋ถ™์ž„
68
  combined_waveform = torch.cat(segments, dim=1)
 
69
  output_path = "/content/wav" # ๊ฒฝ๋กœ
70
  os.makedirs(output_path, exist_ok=True) # ๊ฒฝ๋กœ๊ฐ€ ์—†์œผ๋ฉด ์ƒ์„ฑ
71
  output_filename = os.path.join(output_path,f"{speaker}.wav")
72
 
73
  torchaudio.save(output_filename, combined_waveform, sample_rate) #์˜ค๋””์˜ค ํŒŒ์ผ ์ €์žฅ
74
- #print(f"Saved {output_filename} for speaker {speaker}")
75
 
76
 
77
  # ๊ฐ„๋‹จํ•œ DeepVoice ์Šคํƒ€์ผ ๋ชจ๋ธ ์ •์˜
@@ -117,35 +117,29 @@ def real_fake_check(list_dir, path, model):
117
  f_cnt = 0
118
  prob = {}
119
  for i in list_dir: # real / fake ์„ ํƒ
120
- #print('------',i)
121
  input_data = extract_mfcc_path(os.path.join(path, i))
122
- input_data = torch.tensor(input_data).unsqueeze(0).to('cuda') # ๋ฐฐ์น˜ ์ฐจ์›์„ ์ถ”๊ฐ€ํ•˜์—ฌ (1, input_dim, sequence_length)๋กœ ๋งž์ถค
 
123
  result = model(input_data.float())
124
- # predicted_class = torch.argmax(result, dim=1).item()
125
  probabilities = F.softmax(result, dim=1)
126
  prob[i]='%.2f'%probabilities[0][1].item()
127
-
128
  predicted_class = 0 if probabilities[0][0] >= THRESHOLD else 1 # ํ™•๋ฅ ๊ฐ’์ด ๊ธฐ์ค€์น˜๋ณด๋‹ค ํฌ๋‹ค๋ฉด real, ์•„๋‹ˆ๋ฉด fake
129
- # print('-- %.2f'%probabilities[0][0].item()) #ํ™•๋ฅ  ๊ฐ’ ์ถœ๋ ฅ
130
  if predicted_class == 0:
131
- # print("REAL")
132
  r_cnt += 1
133
  else:
134
- # print("FAKE")
135
  f_cnt += 1
136
- #print()
137
- #print('real: ',r_cnt,'/',len(list_dir))
138
- #print('fake: ',f_cnt,'/',len(list_dir))
139
  return {'real: ':f'{r_cnt}/{len(list_dir)}', 'fake: ':f'{f_cnt}/{len(list_dir)}', 'prob: ': prob}
140
 
141
 
142
  def main(file_name):
143
- pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1",)
144
-
145
- #pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1")
146
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
147
 
148
  video_file = file_name #deepfake #meganfox.mp4'
 
149
  audio_file = '/content/output_audio.wav' # ์ €์žฅํ•  ์˜ค๋””์˜ค ํŒŒ์ผ์˜ ๊ฒฝ๋กœ, ์ด๋ฆ„ ์ง€์ •
150
 
151
  extract_audio_from_video(video_file, audio_file)
@@ -162,7 +156,7 @@ def main(file_name):
162
  # ๋ชจ๋ธ
163
  model_name = hf_hub_download(repo_id="sssssungk/deepfake_voice", filename="deepvoice_model_girl.pth")
164
  model = DeepVoiceModel(input_dim, hidden_dim, num_classes, dropout_rate, l2_reg).to(device)
165
- model.load_state_dict(torch.load(model_name))#("/content/drive/MyDrive/แ„แ…ขแ†ธแ„‰แ…ณแ„แ…ฉแ†ซ 1แ„Œแ…ฉ/model/deepvoice_model_girl.pth"))
166
  model.eval() # ํ‰๊ฐ€ ๋ชจ๋“œ๋กœ ์„ค์ •
167
 
168
 
@@ -171,29 +165,25 @@ def main(file_name):
171
  #real_path = '/content/drive/MyDrive/Celeb-DF-v2/Celeb-real'
172
 
173
  #real = os.listdir(real_path)
174
- fake_path = '/content/wav'#'/content/drive/MyDrive/แ„แ…ขแ†ธแ„‰แ…ณแ„แ…ฉแ†ซ 1แ„Œแ…ฉ/data/deepvoice/fake'
 
175
  fake = os.listdir(fake_path)
176
 
177
- #print("\n-------real data---------")
178
- #real_fake_check(real, real_path, model) #real dataset
179
- #print("\n-------fake data---------")
180
  rf_check = real_fake_check(fake, fake_path,model) #fake dataset\
181
  return rf_check
182
 
183
- #Gradio ๋ฉ”์ธ ํ•จ์ˆ˜
184
  def deepvoice_check(video_file):
185
  results = main(video_file)
186
  return results
187
 
188
  # Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ƒ์„ฑ
189
- iface = gr.Interface(
190
- fn=main,
191
  inputs=gr.Video(label="Upload mp4 File"),
192
- outputs=gr.Textbox(label="Deepfake Detection Result"),
193
- title="DeepVoice Check",
194
- description="Upload an mp4 file to check for DeepVoice indicators."
195
  )
196
 
197
- # Gradio ์ธํ„ฐํŽ˜์ด์Šค ์‹คํ–‰
198
  if __name__ == "__main__":
199
- iface.launch()
 
31
 
32
 
33
 
 
34
  # ์˜ค๋””์˜ค ๋ณ€ํ™˜ mp4 --> wav
35
  def extract_audio_from_video(video_file_path, audio_file_path):
36
  # mp4 ํŒŒ์ผ ๋ถˆ๋Ÿฌ์˜ค๊ธฐ
 
65
  for speaker, segments in speaker_segments.items():
66
  # ํ™”์ž์˜ ๋ชจ๋“  ๋ฐœํ™” ๊ตฌ๊ฐ„์„ ์ด์–ด๋ถ™์ž„
67
  combined_waveform = torch.cat(segments, dim=1)
68
+ #current_path = os.getcwd()
69
  output_path = "/content/wav" # ๊ฒฝ๋กœ
70
  os.makedirs(output_path, exist_ok=True) # ๊ฒฝ๋กœ๊ฐ€ ์—†์œผ๋ฉด ์ƒ์„ฑ
71
  output_filename = os.path.join(output_path,f"{speaker}.wav")
72
 
73
  torchaudio.save(output_filename, combined_waveform, sample_rate) #์˜ค๋””์˜ค ํŒŒ์ผ ์ €์žฅ
74
+
75
 
76
 
77
  # ๊ฐ„๋‹จํ•œ DeepVoice ์Šคํƒ€์ผ ๋ชจ๋ธ ์ •์˜
 
117
  f_cnt = 0
118
  prob = {}
119
  for i in list_dir: # real / fake ์„ ํƒ
 
120
  input_data = extract_mfcc_path(os.path.join(path, i))
121
+ #input_data = torch.tensor(input_data).unsqueeze(0).to('cuda') # ๋ฐฐ์น˜ ์ฐจ์›์„ ์ถ”๊ฐ€ํ•˜์—ฌ (1, input_dim, sequence_length)๋กœ ๋งž์ถค
122
+ input_data = torch.tensor(input_data).unsqueeze(0).to('cpu')
123
  result = model(input_data.float())
 
124
  probabilities = F.softmax(result, dim=1)
125
  prob[i]='%.2f'%probabilities[0][1].item()
 
126
  predicted_class = 0 if probabilities[0][0] >= THRESHOLD else 1 # ํ™•๋ฅ ๊ฐ’์ด ๊ธฐ์ค€์น˜๋ณด๋‹ค ํฌ๋‹ค๋ฉด real, ์•„๋‹ˆ๋ฉด fake
127
+
128
  if predicted_class == 0:
 
129
  r_cnt += 1
130
  else:
 
131
  f_cnt += 1
132
+
 
 
133
  return {'real: ':f'{r_cnt}/{len(list_dir)}', 'fake: ':f'{f_cnt}/{len(list_dir)}', 'prob: ': prob}
134
 
135
 
136
  def main(file_name):
137
+ pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1")
138
+ #device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
139
+ device = torch.device('cpu')
 
140
 
141
  video_file = file_name #deepfake #meganfox.mp4'
142
+ #current_path = os.getcwd()
143
  audio_file = '/content/output_audio.wav' # ์ €์žฅํ•  ์˜ค๋””์˜ค ํŒŒ์ผ์˜ ๊ฒฝ๋กœ, ์ด๋ฆ„ ์ง€์ •
144
 
145
  extract_audio_from_video(video_file, audio_file)
 
156
  # ๋ชจ๋ธ
157
  model_name = hf_hub_download(repo_id="sssssungk/deepfake_voice", filename="deepvoice_model_girl.pth")
158
  model = DeepVoiceModel(input_dim, hidden_dim, num_classes, dropout_rate, l2_reg).to(device)
159
+ model.load_state_dict(torch.load(model_name, map_location=torch.device('cpu')))
160
  model.eval() # ํ‰๊ฐ€ ๋ชจ๋“œ๋กœ ์„ค์ •
161
 
162
 
 
165
  #real_path = '/content/drive/MyDrive/Celeb-DF-v2/Celeb-real'
166
 
167
  #real = os.listdir(real_path)
168
+ #current_path = os.getcwd()
169
+ fake_path = '/content/wav'
170
  fake = os.listdir(fake_path)
171
 
 
 
 
172
  rf_check = real_fake_check(fake, fake_path,model) #fake dataset\
173
  return rf_check
174
 
 
175
  def deepvoice_check(video_file):
176
  results = main(video_file)
177
  return results
178
 
179
  # Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ƒ์„ฑ
180
+ deepfake = gr.Interface(
181
+ fn=deepvoice_check,
182
  inputs=gr.Video(label="Upload mp4 File"),
183
+ outputs=gr.Textbox(label="DeepFaKeVoice Detection Result"),
184
+ title="DeepFaKeVoice Check",
185
+ description="Upload an mp4 file to check."
186
  )
187
 
 
188
  if __name__ == "__main__":
189
+ deepfake.launch(share=True, debug=True)