{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting fastapi (from -r requirements.txt (line 1))\n",
" Using cached fastapi-0.115.6-py3-none-any.whl.metadata (27 kB)\n",
"Collecting uvicorn (from -r requirements.txt (line 2))\n",
" Using cached uvicorn-0.34.0-py3-none-any.whl.metadata (6.5 kB)\n",
"Collecting transformers (from -r requirements.txt (line 3))\n",
" Using cached transformers-4.47.0-py3-none-any.whl.metadata (43 kB)\n",
"Collecting torch (from -r requirements.txt (line 4))\n",
" Using cached torch-2.5.1-cp312-cp312-win_amd64.whl.metadata (28 kB)\n",
"Collecting torchvision (from -r requirements.txt (line 5))\n",
" Using cached torchvision-0.20.1-cp312-cp312-win_amd64.whl.metadata (6.2 kB)\n",
"Collecting torchaudio (from -r requirements.txt (line 6))\n",
" Using cached torchaudio-2.5.1-cp312-cp312-win_amd64.whl.metadata (6.5 kB)\n",
"Collecting PySoundFile (from -r requirements.txt (line 7))\n",
" Using cached PySoundFile-0.9.0.post1-py2.py3.cp26.cp27.cp32.cp33.cp34.cp35.cp36.pp27.pp32.pp33-none-win_amd64.whl.metadata (9.4 kB)\n",
"Collecting ffmpeg-python (from -r requirements.txt (line 8))\n",
" Using cached ffmpeg_python-0.2.0-py3-none-any.whl.metadata (1.7 kB)\n",
"Collecting pydantic (from -r requirements.txt (line 9))\n",
" Using cached pydantic-2.10.3-py3-none-any.whl.metadata (172 kB)\n",
"Collecting numpy (from -r requirements.txt (line 10))\n",
" Using cached numpy-2.2.0-cp312-cp312-win_amd64.whl.metadata (60 kB)\n",
"Collecting python-multipart (from -r requirements.txt (line 11))\n",
" Using cached python_multipart-0.0.19-py3-none-any.whl.metadata (1.8 kB)\n",
"Collecting starlette<0.42.0,>=0.40.0 (from fastapi->-r requirements.txt (line 1))\n",
" Using cached starlette-0.41.3-py3-none-any.whl.metadata (6.0 kB)\n",
"Requirement already satisfied: typing-extensions>=4.8.0 in c:\\users\\asus\\anaconda3\\envs\\modelenv\\lib\\site-packages (from fastapi->-r requirements.txt (line 1)) (4.12.2)\n",
"Collecting click>=7.0 (from uvicorn->-r requirements.txt (line 2))\n",
" Using cached click-8.1.7-py3-none-any.whl.metadata (3.0 kB)\n",
"Collecting h11>=0.8 (from uvicorn->-r requirements.txt (line 2))\n",
" Using cached h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)\n",
"Collecting filelock (from transformers->-r requirements.txt (line 3))\n",
" Using cached filelock-3.16.1-py3-none-any.whl.metadata (2.9 kB)\n",
"Collecting huggingface-hub<1.0,>=0.24.0 (from transformers->-r requirements.txt (line 3))\n",
" Using cached huggingface_hub-0.27.0-py3-none-any.whl.metadata (13 kB)\n",
"Requirement already satisfied: packaging>=20.0 in c:\\users\\asus\\anaconda3\\envs\\modelenv\\lib\\site-packages (from transformers->-r requirements.txt (line 3)) (24.2)\n",
"Collecting pyyaml>=5.1 (from transformers->-r requirements.txt (line 3))\n",
" Using cached PyYAML-6.0.2-cp312-cp312-win_amd64.whl.metadata (2.1 kB)\n",
"Collecting regex!=2019.12.17 (from transformers->-r requirements.txt (line 3))\n",
" Using cached regex-2024.11.6-cp312-cp312-win_amd64.whl.metadata (41 kB)\n",
"Collecting requests (from transformers->-r requirements.txt (line 3))\n",
" Using cached requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)\n",
"Collecting tokenizers<0.22,>=0.21 (from transformers->-r requirements.txt (line 3))\n",
" Using cached tokenizers-0.21.0-cp39-abi3-win_amd64.whl.metadata (6.9 kB)\n",
"Collecting safetensors>=0.4.1 (from transformers->-r requirements.txt (line 3))\n",
" Using cached safetensors-0.4.5-cp312-none-win_amd64.whl.metadata (3.9 kB)\n",
"Collecting tqdm>=4.27 (from transformers->-r requirements.txt (line 3))\n",
" Using cached tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)\n",
"Collecting networkx (from torch->-r requirements.txt (line 4))\n",
" Using cached networkx-3.4.2-py3-none-any.whl.metadata (6.3 kB)\n",
"Collecting jinja2 (from torch->-r requirements.txt (line 4))\n",
" Using cached jinja2-3.1.4-py3-none-any.whl.metadata (2.6 kB)\n",
"Collecting fsspec (from torch->-r requirements.txt (line 4))\n",
" Using cached fsspec-2024.10.0-py3-none-any.whl.metadata (11 kB)\n",
"Requirement already satisfied: setuptools in c:\\users\\asus\\anaconda3\\envs\\modelenv\\lib\\site-packages (from torch->-r requirements.txt (line 4)) (75.6.0)\n",
"Collecting sympy==1.13.1 (from torch->-r requirements.txt (line 4))\n",
" Using cached sympy-1.13.1-py3-none-any.whl.metadata (12 kB)\n",
"Collecting mpmath<1.4,>=1.1.0 (from sympy==1.13.1->torch->-r requirements.txt (line 4))\n",
" Using cached mpmath-1.3.0-py3-none-any.whl.metadata (8.6 kB)\n",
"Collecting pillow!=8.3.*,>=5.3.0 (from torchvision->-r requirements.txt (line 5))\n",
" Downloading pillow-11.0.0-cp312-cp312-win_amd64.whl.metadata (9.3 kB)\n",
"Collecting cffi>=0.6 (from PySoundFile->-r requirements.txt (line 7))\n",
" Using cached cffi-1.17.1-cp312-cp312-win_amd64.whl.metadata (1.6 kB)\n",
"Collecting future (from ffmpeg-python->-r requirements.txt (line 8))\n",
" Using cached future-1.0.0-py3-none-any.whl.metadata (4.0 kB)\n",
"Collecting annotated-types>=0.6.0 (from pydantic->-r requirements.txt (line 9))\n",
" Using cached annotated_types-0.7.0-py3-none-any.whl.metadata (15 kB)\n",
"Collecting pydantic-core==2.27.1 (from pydantic->-r requirements.txt (line 9))\n",
" Using cached pydantic_core-2.27.1-cp312-none-win_amd64.whl.metadata (6.7 kB)\n",
"Collecting pycparser (from cffi>=0.6->PySoundFile->-r requirements.txt (line 7))\n",
" Using cached pycparser-2.22-py3-none-any.whl.metadata (943 bytes)\n",
"Requirement already satisfied: colorama in c:\\users\\asus\\anaconda3\\envs\\modelenv\\lib\\site-packages (from click>=7.0->uvicorn->-r requirements.txt (line 2)) (0.4.6)\n",
"Collecting anyio<5,>=3.4.0 (from starlette<0.42.0,>=0.40.0->fastapi->-r requirements.txt (line 1))\n",
" Using cached anyio-4.7.0-py3-none-any.whl.metadata (4.7 kB)\n",
"Collecting MarkupSafe>=2.0 (from jinja2->torch->-r requirements.txt (line 4))\n",
" Using cached MarkupSafe-3.0.2-cp312-cp312-win_amd64.whl.metadata (4.1 kB)\n",
"Collecting charset-normalizer<4,>=2 (from requests->transformers->-r requirements.txt (line 3))\n",
" Using cached charset_normalizer-3.4.0-cp312-cp312-win_amd64.whl.metadata (34 kB)\n",
"Collecting idna<4,>=2.5 (from requests->transformers->-r requirements.txt (line 3))\n",
" Using cached idna-3.10-py3-none-any.whl.metadata (10 kB)\n",
"Collecting urllib3<3,>=1.21.1 (from requests->transformers->-r requirements.txt (line 3))\n",
" Using cached urllib3-2.2.3-py3-none-any.whl.metadata (6.5 kB)\n",
"Collecting certifi>=2017.4.17 (from requests->transformers->-r requirements.txt (line 3))\n",
" Using cached certifi-2024.12.14-py3-none-any.whl.metadata (2.3 kB)\n",
"Collecting sniffio>=1.1 (from anyio<5,>=3.4.0->starlette<0.42.0,>=0.40.0->fastapi->-r requirements.txt (line 1))\n",
" Using cached sniffio-1.3.1-py3-none-any.whl.metadata (3.9 kB)\n",
"Using cached fastapi-0.115.6-py3-none-any.whl (94 kB)\n",
"Using cached uvicorn-0.34.0-py3-none-any.whl (62 kB)\n",
"Using cached transformers-4.47.0-py3-none-any.whl (10.1 MB)\n",
"Using cached torch-2.5.1-cp312-cp312-win_amd64.whl (203.0 MB)\n",
"Using cached sympy-1.13.1-py3-none-any.whl (6.2 MB)\n",
"Using cached torchvision-0.20.1-cp312-cp312-win_amd64.whl (1.6 MB)\n",
"Using cached torchaudio-2.5.1-cp312-cp312-win_amd64.whl (2.4 MB)\n",
"Using cached PySoundFile-0.9.0.post1-py2.py3.cp26.cp27.cp32.cp33.cp34.cp35.cp36.pp27.pp32.pp33-none-win_amd64.whl (671 kB)\n",
"Using cached ffmpeg_python-0.2.0-py3-none-any.whl (25 kB)\n",
"Using cached pydantic-2.10.3-py3-none-any.whl (456 kB)\n",
"Using cached pydantic_core-2.27.1-cp312-none-win_amd64.whl (2.0 MB)\n",
"Using cached numpy-2.2.0-cp312-cp312-win_amd64.whl (12.6 MB)\n",
"Using cached python_multipart-0.0.19-py3-none-any.whl (24 kB)\n",
"Using cached annotated_types-0.7.0-py3-none-any.whl (13 kB)\n",
"Using cached cffi-1.17.1-cp312-cp312-win_amd64.whl (181 kB)\n",
"Using cached click-8.1.7-py3-none-any.whl (97 kB)\n",
"Using cached h11-0.14.0-py3-none-any.whl (58 kB)\n",
"Using cached huggingface_hub-0.27.0-py3-none-any.whl (450 kB)\n",
"Using cached fsspec-2024.10.0-py3-none-any.whl (179 kB)\n",
"Downloading pillow-11.0.0-cp312-cp312-win_amd64.whl (2.6 MB)\n",
" ---------------------------------------- 0.0/2.6 MB ? eta -:--:--\n",
" ---------------------------------------- 2.6/2.6 MB 21.2 MB/s eta 0:00:00\n",
"Using cached PyYAML-6.0.2-cp312-cp312-win_amd64.whl (156 kB)\n",
"Using cached regex-2024.11.6-cp312-cp312-win_amd64.whl (273 kB)\n",
"Using cached safetensors-0.4.5-cp312-none-win_amd64.whl (286 kB)\n",
"Using cached starlette-0.41.3-py3-none-any.whl (73 kB)\n",
"Using cached tokenizers-0.21.0-cp39-abi3-win_amd64.whl (2.4 MB)\n",
"Using cached tqdm-4.67.1-py3-none-any.whl (78 kB)\n",
"Using cached filelock-3.16.1-py3-none-any.whl (16 kB)\n",
"Using cached future-1.0.0-py3-none-any.whl (491 kB)\n",
"Using cached jinja2-3.1.4-py3-none-any.whl (133 kB)\n",
"Using cached networkx-3.4.2-py3-none-any.whl (1.7 MB)\n",
"Using cached requests-2.32.3-py3-none-any.whl (64 kB)\n",
"Using cached anyio-4.7.0-py3-none-any.whl (93 kB)\n",
"Using cached certifi-2024.12.14-py3-none-any.whl (164 kB)\n",
"Using cached charset_normalizer-3.4.0-cp312-cp312-win_amd64.whl (102 kB)\n",
"Using cached idna-3.10-py3-none-any.whl (70 kB)\n",
"Using cached MarkupSafe-3.0.2-cp312-cp312-win_amd64.whl (15 kB)\n",
"Using cached mpmath-1.3.0-py3-none-any.whl (536 kB)\n",
"Using cached urllib3-2.2.3-py3-none-any.whl (126 kB)\n",
"Using cached pycparser-2.22-py3-none-any.whl (117 kB)\n",
"Using cached sniffio-1.3.1-py3-none-any.whl (10 kB)\n",
"Installing collected packages: mpmath, urllib3, tqdm, sympy, sniffio, safetensors, regex, pyyaml, python-multipart, pydantic-core, pycparser, pillow, numpy, networkx, MarkupSafe, idna, h11, future, fsspec, filelock, click, charset-normalizer, certifi, annotated-types, uvicorn, requests, pydantic, jinja2, ffmpeg-python, cffi, anyio, torch, starlette, PySoundFile, huggingface-hub, torchvision, torchaudio, tokenizers, fastapi, transformers\n",
"Successfully installed MarkupSafe-3.0.2 PySoundFile-0.9.0.post1 annotated-types-0.7.0 anyio-4.7.0 certifi-2024.12.14 cffi-1.17.1 charset-normalizer-3.4.0 click-8.1.7 fastapi-0.115.6 ffmpeg-python-0.2.0 filelock-3.16.1 fsspec-2024.10.0 future-1.0.0 h11-0.14.0 huggingface-hub-0.27.0 idna-3.10 jinja2-3.1.4 mpmath-1.3.0 networkx-3.4.2 numpy-2.2.0 pillow-11.0.0 pycparser-2.22 pydantic-2.10.3 pydantic-core-2.27.1 python-multipart-0.0.19 pyyaml-6.0.2 regex-2024.11.6 requests-2.32.3 safetensors-0.4.5 sniffio-1.3.1 starlette-0.41.3 sympy-1.13.1 tokenizers-0.21.0 torch-2.5.1 torchaudio-2.5.1 torchvision-0.20.1 tqdm-4.67.1 transformers-4.47.0 urllib3-2.2.3 uvicorn-0.34.0\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"pip install -r requirements.txt"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['soundfile']\n"
]
}
],
"source": [
"import torchaudio\n",
"print(str(torchaudio.list_audio_backends()))"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"pip list --format=freeze > requirements.txt"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"<>:13: SyntaxWarning: invalid escape sequence '\\m'\n",
"<>:17: SyntaxWarning: invalid escape sequence '\\H'\n",
"<>:13: SyntaxWarning: invalid escape sequence '\\m'\n",
"<>:17: SyntaxWarning: invalid escape sequence '\\H'\n",
"C:\\Users\\Asus\\AppData\\Local\\Temp\\ipykernel_18220\\208613059.py:13: SyntaxWarning: invalid escape sequence '\\m'\n",
" model_path = \"Deepfake\\model\"\n",
"C:\\Users\\Asus\\AppData\\Local\\Temp\\ipykernel_18220\\208613059.py:17: SyntaxWarning: invalid escape sequence '\\H'\n",
" cache_dir=\"D:\\HuggingFace\",\n"
]
}
],
"source": [
"from transformers import pipeline\n",
"from transformers import AutoProcessor, AutoModelForAudioClassification\n",
"from fastapi import FastAPI\n",
"from pydantic import BaseModel\n",
"import uvicorn\n",
"import torchaudio\n",
"import torch\n",
"\n",
"# Define the input schema\n",
"class InputData(BaseModel):\n",
" input: str\n",
"\n",
"model_path = \"Deepfake\\model\"\n",
"processor = AutoProcessor.from_pretrained(model_path)\n",
"# Instantiate the model\n",
"model = AutoModelForAudioClassification.from_pretrained(pretrained_model_name_or_path=model_path,\n",
" cache_dir=\"D:\\HuggingFace\",\n",
" local_files_only=True,\n",
" )\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Functions"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"def prepare_audio(file_path, sampling_rate=16000, duration=10):\n",
" \"\"\"\n",
" Prepares audio by loading, resampling, and returning it in manageable chunks.\n",
" \n",
" Parameters:\n",
" - file_path: Path to the audio file.\n",
" - sampling_rate: Target sampling rate for the audio.\n",
" - duration: Duration in seconds for each chunk.\n",
" \n",
" Returns:\n",
" - A list of audio chunks, each as a numpy array.\n",
" \"\"\"\n",
" # Load and resample the audio file\n",
" waveform, original_sampling_rate = torchaudio.load(file_path)\n",
" \n",
" # Convert stereo to mono if necessary\n",
" if waveform.shape[0] > 1: # More than 1 channel\n",
" waveform = torch.mean(waveform, dim=0, keepdim=True)\n",
" \n",
" # Resample if needed\n",
" if original_sampling_rate != sampling_rate:\n",
" resampler = torchaudio.transforms.Resample(orig_freq=original_sampling_rate, new_freq=sampling_rate)\n",
" waveform = resampler(waveform)\n",
" \n",
" # Calculate chunk size in samples\n",
" chunk_size = sampling_rate * duration\n",
" audio_chunks = []\n",
"\n",
" # Split the audio into chunks\n",
" for start in range(0, waveform.shape[1], chunk_size):\n",
" chunk = waveform[:, start:start + chunk_size]\n",
" \n",
" # Pad the last chunk if it's shorter than the chunk size\n",
" if chunk.shape[1] < chunk_size:\n",
" padding = chunk_size - chunk.shape[1]\n",
" chunk = torch.nn.functional.pad(chunk, (0, padding))\n",
" \n",
" audio_chunks.append(chunk.squeeze().numpy())\n",
" \n",
" return audio_chunks\n"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"import torch.nn.functional as F\n",
"\n",
"def predict_audio(file_path):\n",
" \"\"\"\n",
" Predicts the class of an audio file by aggregating predictions from chunks and calculates confidence.\n",
" \n",
" Args:\n",
" file_path (str): Path to the audio file.\n",
"\n",
" Returns:\n",
" dict: Contains the predicted class label and average confidence score.\n",
" \"\"\"\n",
" # Prepare audio chunks\n",
" audio_chunks = prepare_audio(file_path)\n",
" predictions = []\n",
" confidences = []\n",
"\n",
" for i, chunk in enumerate(audio_chunks):\n",
" # Prepare input for the model\n",
" inputs = processor(\n",
" chunk, sampling_rate=16000, return_tensors=\"pt\", padding=True\n",
" )\n",
" \n",
" # Perform inference\n",
" with torch.no_grad():\n",
" outputs = model(**inputs)\n",
" logits = outputs.logits\n",
" \n",
" # Apply softmax to calculate probabilities\n",
" probabilities = F.softmax(logits, dim=1)\n",
" \n",
" # Get the predicted class and its confidence\n",
" confidence, predicted_class = torch.max(probabilities, dim=1)\n",
" predictions.append(predicted_class.item())\n",
" confidences.append(confidence.item())\n",
" \n",
" # Aggregate predictions (majority voting)\n",
" aggregated_prediction_id = max(set(predictions), key=predictions.count)\n",
" predicted_label = model.config.id2label[aggregated_prediction_id]\n",
" \n",
" # Calculate average confidence across chunks\n",
" average_confidence = sum(confidences) / len(confidences)\n",
"\n",
" return {\n",
" \"predicted_label\": predicted_label,\n",
" \"average_confidence\": average_confidence\n",
" }\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Chunk shape: (160000,)\n",
"Chunk shape: (160000,)\n",
"Chunk shape: (160000,)\n",
"Chunk shape: (160000,)\n",
"Chunk shape: (160000,)\n",
"Chunk shape: (160000,)\n",
"Chunk shape: (160000,)\n",
"Chunk shape: (160000,)\n",
"Chunk shape: (160000,)\n",
"Chunk shape: (160000,)\n",
"Predicted Class: {'predicted_label': 'Real', 'average_confidence': 0.9984144032001495}\n"
]
},
{
"ename": "",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n",
"\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n",
"\u001b[1;31mClick here for more info. \n",
"\u001b[1;31mView Jupyter log for further details."
]
}
],
"source": [
"# Example: Test a single audio file\n",
"file_path = r\"D:\\repos\\GODAM\\audioFiles\\test.wav\" # Replace with your audio file path\n",
"predicted_class = predict_audio(file_path)\n",
"print(f\"Predicted Class: {predicted_class}\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "modelEnv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}