Spaces:

Max005
/

DeepfakeDetection

Runtime error

App Files Files Community

DeepfakeDetection / DeepfakeModel.py

Max005

Update

8af0b8e about 1 year ago

raw

history blame contribute delete

3.9 kB

	from fastapi import FastAPI, File, UploadFile
	from pydantic import BaseModel
	import uvicorn
	import os
	import torchaudio
	import torch.nn.functional as F
	import torch
	from transformers import AutoProcessor, AutoModelForAudioClassification
	from pathlib import Path

	app_dir = Path(__file__).parent
	# Model setup
	model_path = app_dir / "Deepfake" / "model"

	processor = AutoProcessor.from_pretrained(model_path)
	model = AutoModelForAudioClassification.from_pretrained(
	pretrained_model_name_or_path=model_path,
	local_files_only=True,
	)

	def prepare_audio(file_path, sampling_rate=16000, duration=10):
	"""
	Prepares audio by loading, resampling, and returning it in manageable chunks.
	"""
	# Load and resample the audio file
	waveform, original_sampling_rate = torchaudio.load(file_path)

	# Convert stereo to mono if necessary
	if waveform.shape[0] > 1: # More than 1 channel
	waveform = torch.mean(waveform, dim=0, keepdim=True)

	# Resample if needed
	if original_sampling_rate != sampling_rate:
	resampler = torchaudio.transforms.Resample(orig_freq=original_sampling_rate, new_freq=sampling_rate)
	waveform = resampler(waveform)

	# Calculate chunk size in samples
	chunk_size = sampling_rate * duration
	audio_chunks = []

	# Split the audio into chunks
	for start in range(0, waveform.shape[1], chunk_size):
	chunk = waveform[:, start:start + chunk_size]

	# Pad the last chunk if it's shorter than the chunk size
	if chunk.shape[1] < chunk_size:
	padding = chunk_size - chunk.shape[1]
	chunk = torch.nn.functional.pad(chunk, (0, padding))

	audio_chunks.append(chunk.squeeze().numpy())

	return audio_chunks

	def predict_audio(file_path):
	"""
	Predicts the class of an audio file by aggregating predictions from chunks and calculates confidence.
	"""
	# Prepare audio chunks
	audio_chunks = prepare_audio(file_path)
	predictions = []
	confidences = []

	for i, chunk in enumerate(audio_chunks):
	# Prepare input for the model
	inputs = processor(
	chunk, sampling_rate=16000, return_tensors="pt", padding=True
	)

	# Perform inference
	with torch.no_grad():
	outputs = model(**inputs)
	logits = outputs.logits

	# Apply softmax to calculate probabilities
	probabilities = F.softmax(logits, dim=1)

	# Get the predicted class and its confidence
	confidence, predicted_class = torch.max(probabilities, dim=1)
	predictions.append(predicted_class.item())
	confidences.append(confidence.item())

	# Aggregate predictions (majority voting)
	aggregated_prediction_id = max(set(predictions), key=predictions.count)
	predicted_label = model.config.id2label[aggregated_prediction_id]

	# Calculate average confidence across chunks
	average_confidence = sum(confidences) / len(confidences)

	return {
	"predicted_label": predicted_label,
	"average_confidence": average_confidence
	}

	# Initialize FastAPI
	app = FastAPI()

	@app.post("/infer")
	async def infer(file: UploadFile = File(...)):
	"""
	Accepts an audio file and returns the prediction and confidence.
	"""
	# Save the uploaded file to a temporary location
	temp_file_path = f"temp_{file.filename}"
	with open(temp_file_path, "wb") as temp_file:
	temp_file.write(await file.read())

	try:
	# Perform inference
	predictions = predict_audio(temp_file_path)
	finally:
	# Clean up the temporary file
	os.remove(temp_file_path)

	return predictions

	@app.get("/health")
	async def health():
	return {
	"message": "ok",
	"Sound":str(torchaudio.list_audio_backends())
	}