{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Collecting fastapi (from -r requirements.txt (line 1))\n", " Using cached fastapi-0.115.6-py3-none-any.whl.metadata (27 kB)\n", "Collecting uvicorn (from -r requirements.txt (line 2))\n", " Using cached uvicorn-0.34.0-py3-none-any.whl.metadata (6.5 kB)\n", "Collecting transformers (from -r requirements.txt (line 3))\n", " Using cached transformers-4.47.0-py3-none-any.whl.metadata (43 kB)\n", "Collecting torch (from -r requirements.txt (line 4))\n", " Using cached torch-2.5.1-cp312-cp312-win_amd64.whl.metadata (28 kB)\n", "Collecting torchvision (from -r requirements.txt (line 5))\n", " Using cached torchvision-0.20.1-cp312-cp312-win_amd64.whl.metadata (6.2 kB)\n", "Collecting torchaudio (from -r requirements.txt (line 6))\n", " Using cached torchaudio-2.5.1-cp312-cp312-win_amd64.whl.metadata (6.5 kB)\n", "Collecting PySoundFile (from -r requirements.txt (line 7))\n", " Using cached PySoundFile-0.9.0.post1-py2.py3.cp26.cp27.cp32.cp33.cp34.cp35.cp36.pp27.pp32.pp33-none-win_amd64.whl.metadata (9.4 kB)\n", "Collecting ffmpeg-python (from -r requirements.txt (line 8))\n", " Using cached ffmpeg_python-0.2.0-py3-none-any.whl.metadata (1.7 kB)\n", "Collecting pydantic (from -r requirements.txt (line 9))\n", " Using cached pydantic-2.10.3-py3-none-any.whl.metadata (172 kB)\n", "Collecting numpy (from -r requirements.txt (line 10))\n", " Using cached numpy-2.2.0-cp312-cp312-win_amd64.whl.metadata (60 kB)\n", "Collecting python-multipart (from -r requirements.txt (line 11))\n", " Using cached python_multipart-0.0.19-py3-none-any.whl.metadata (1.8 kB)\n", "Collecting starlette<0.42.0,>=0.40.0 (from fastapi->-r requirements.txt (line 1))\n", " Using cached starlette-0.41.3-py3-none-any.whl.metadata (6.0 kB)\n", "Requirement already satisfied: typing-extensions>=4.8.0 in c:\\users\\asus\\anaconda3\\envs\\modelenv\\lib\\site-packages (from fastapi->-r requirements.txt (line 1)) (4.12.2)\n", "Collecting click>=7.0 (from uvicorn->-r requirements.txt (line 2))\n", " Using cached click-8.1.7-py3-none-any.whl.metadata (3.0 kB)\n", "Collecting h11>=0.8 (from uvicorn->-r requirements.txt (line 2))\n", " Using cached h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)\n", "Collecting filelock (from transformers->-r requirements.txt (line 3))\n", " Using cached filelock-3.16.1-py3-none-any.whl.metadata (2.9 kB)\n", "Collecting huggingface-hub<1.0,>=0.24.0 (from transformers->-r requirements.txt (line 3))\n", " Using cached huggingface_hub-0.27.0-py3-none-any.whl.metadata (13 kB)\n", "Requirement already satisfied: packaging>=20.0 in c:\\users\\asus\\anaconda3\\envs\\modelenv\\lib\\site-packages (from transformers->-r requirements.txt (line 3)) (24.2)\n", "Collecting pyyaml>=5.1 (from transformers->-r requirements.txt (line 3))\n", " Using cached PyYAML-6.0.2-cp312-cp312-win_amd64.whl.metadata (2.1 kB)\n", "Collecting regex!=2019.12.17 (from transformers->-r requirements.txt (line 3))\n", " Using cached regex-2024.11.6-cp312-cp312-win_amd64.whl.metadata (41 kB)\n", "Collecting requests (from transformers->-r requirements.txt (line 3))\n", " Using cached requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)\n", "Collecting tokenizers<0.22,>=0.21 (from transformers->-r requirements.txt (line 3))\n", " Using cached tokenizers-0.21.0-cp39-abi3-win_amd64.whl.metadata (6.9 kB)\n", "Collecting safetensors>=0.4.1 (from transformers->-r requirements.txt (line 3))\n", " Using cached safetensors-0.4.5-cp312-none-win_amd64.whl.metadata (3.9 kB)\n", "Collecting tqdm>=4.27 (from transformers->-r requirements.txt (line 3))\n", " Using cached tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)\n", "Collecting networkx (from torch->-r requirements.txt (line 4))\n", " Using cached networkx-3.4.2-py3-none-any.whl.metadata (6.3 kB)\n", "Collecting jinja2 (from torch->-r requirements.txt (line 4))\n", " Using cached jinja2-3.1.4-py3-none-any.whl.metadata (2.6 kB)\n", "Collecting fsspec (from torch->-r requirements.txt (line 4))\n", " Using cached fsspec-2024.10.0-py3-none-any.whl.metadata (11 kB)\n", "Requirement already satisfied: setuptools in c:\\users\\asus\\anaconda3\\envs\\modelenv\\lib\\site-packages (from torch->-r requirements.txt (line 4)) (75.6.0)\n", "Collecting sympy==1.13.1 (from torch->-r requirements.txt (line 4))\n", " Using cached sympy-1.13.1-py3-none-any.whl.metadata (12 kB)\n", "Collecting mpmath<1.4,>=1.1.0 (from sympy==1.13.1->torch->-r requirements.txt (line 4))\n", " Using cached mpmath-1.3.0-py3-none-any.whl.metadata (8.6 kB)\n", "Collecting pillow!=8.3.*,>=5.3.0 (from torchvision->-r requirements.txt (line 5))\n", " Downloading pillow-11.0.0-cp312-cp312-win_amd64.whl.metadata (9.3 kB)\n", "Collecting cffi>=0.6 (from PySoundFile->-r requirements.txt (line 7))\n", " Using cached cffi-1.17.1-cp312-cp312-win_amd64.whl.metadata (1.6 kB)\n", "Collecting future (from ffmpeg-python->-r requirements.txt (line 8))\n", " Using cached future-1.0.0-py3-none-any.whl.metadata (4.0 kB)\n", "Collecting annotated-types>=0.6.0 (from pydantic->-r requirements.txt (line 9))\n", " Using cached annotated_types-0.7.0-py3-none-any.whl.metadata (15 kB)\n", "Collecting pydantic-core==2.27.1 (from pydantic->-r requirements.txt (line 9))\n", " Using cached pydantic_core-2.27.1-cp312-none-win_amd64.whl.metadata (6.7 kB)\n", "Collecting pycparser (from cffi>=0.6->PySoundFile->-r requirements.txt (line 7))\n", " Using cached pycparser-2.22-py3-none-any.whl.metadata (943 bytes)\n", "Requirement already satisfied: colorama in c:\\users\\asus\\anaconda3\\envs\\modelenv\\lib\\site-packages (from click>=7.0->uvicorn->-r requirements.txt (line 2)) (0.4.6)\n", "Collecting anyio<5,>=3.4.0 (from starlette<0.42.0,>=0.40.0->fastapi->-r requirements.txt (line 1))\n", " Using cached anyio-4.7.0-py3-none-any.whl.metadata (4.7 kB)\n", "Collecting MarkupSafe>=2.0 (from jinja2->torch->-r requirements.txt (line 4))\n", " Using cached MarkupSafe-3.0.2-cp312-cp312-win_amd64.whl.metadata (4.1 kB)\n", "Collecting charset-normalizer<4,>=2 (from requests->transformers->-r requirements.txt (line 3))\n", " Using cached charset_normalizer-3.4.0-cp312-cp312-win_amd64.whl.metadata (34 kB)\n", "Collecting idna<4,>=2.5 (from requests->transformers->-r requirements.txt (line 3))\n", " Using cached idna-3.10-py3-none-any.whl.metadata (10 kB)\n", "Collecting urllib3<3,>=1.21.1 (from requests->transformers->-r requirements.txt (line 3))\n", " Using cached urllib3-2.2.3-py3-none-any.whl.metadata (6.5 kB)\n", "Collecting certifi>=2017.4.17 (from requests->transformers->-r requirements.txt (line 3))\n", " Using cached certifi-2024.12.14-py3-none-any.whl.metadata (2.3 kB)\n", "Collecting sniffio>=1.1 (from anyio<5,>=3.4.0->starlette<0.42.0,>=0.40.0->fastapi->-r requirements.txt (line 1))\n", " Using cached sniffio-1.3.1-py3-none-any.whl.metadata (3.9 kB)\n", "Using cached fastapi-0.115.6-py3-none-any.whl (94 kB)\n", "Using cached uvicorn-0.34.0-py3-none-any.whl (62 kB)\n", "Using cached transformers-4.47.0-py3-none-any.whl (10.1 MB)\n", "Using cached torch-2.5.1-cp312-cp312-win_amd64.whl (203.0 MB)\n", "Using cached sympy-1.13.1-py3-none-any.whl (6.2 MB)\n", "Using cached torchvision-0.20.1-cp312-cp312-win_amd64.whl (1.6 MB)\n", "Using cached torchaudio-2.5.1-cp312-cp312-win_amd64.whl (2.4 MB)\n", "Using cached PySoundFile-0.9.0.post1-py2.py3.cp26.cp27.cp32.cp33.cp34.cp35.cp36.pp27.pp32.pp33-none-win_amd64.whl (671 kB)\n", "Using cached ffmpeg_python-0.2.0-py3-none-any.whl (25 kB)\n", "Using cached pydantic-2.10.3-py3-none-any.whl (456 kB)\n", "Using cached pydantic_core-2.27.1-cp312-none-win_amd64.whl (2.0 MB)\n", "Using cached numpy-2.2.0-cp312-cp312-win_amd64.whl (12.6 MB)\n", "Using cached python_multipart-0.0.19-py3-none-any.whl (24 kB)\n", "Using cached annotated_types-0.7.0-py3-none-any.whl (13 kB)\n", "Using cached cffi-1.17.1-cp312-cp312-win_amd64.whl (181 kB)\n", "Using cached click-8.1.7-py3-none-any.whl (97 kB)\n", "Using cached h11-0.14.0-py3-none-any.whl (58 kB)\n", "Using cached huggingface_hub-0.27.0-py3-none-any.whl (450 kB)\n", "Using cached fsspec-2024.10.0-py3-none-any.whl (179 kB)\n", "Downloading pillow-11.0.0-cp312-cp312-win_amd64.whl (2.6 MB)\n", " ---------------------------------------- 0.0/2.6 MB ? eta -:--:--\n", " ---------------------------------------- 2.6/2.6 MB 21.2 MB/s eta 0:00:00\n", "Using cached PyYAML-6.0.2-cp312-cp312-win_amd64.whl (156 kB)\n", "Using cached regex-2024.11.6-cp312-cp312-win_amd64.whl (273 kB)\n", "Using cached safetensors-0.4.5-cp312-none-win_amd64.whl (286 kB)\n", "Using cached starlette-0.41.3-py3-none-any.whl (73 kB)\n", "Using cached tokenizers-0.21.0-cp39-abi3-win_amd64.whl (2.4 MB)\n", "Using cached tqdm-4.67.1-py3-none-any.whl (78 kB)\n", "Using cached filelock-3.16.1-py3-none-any.whl (16 kB)\n", "Using cached future-1.0.0-py3-none-any.whl (491 kB)\n", "Using cached jinja2-3.1.4-py3-none-any.whl (133 kB)\n", "Using cached networkx-3.4.2-py3-none-any.whl (1.7 MB)\n", "Using cached requests-2.32.3-py3-none-any.whl (64 kB)\n", "Using cached anyio-4.7.0-py3-none-any.whl (93 kB)\n", "Using cached certifi-2024.12.14-py3-none-any.whl (164 kB)\n", "Using cached charset_normalizer-3.4.0-cp312-cp312-win_amd64.whl (102 kB)\n", "Using cached idna-3.10-py3-none-any.whl (70 kB)\n", "Using cached MarkupSafe-3.0.2-cp312-cp312-win_amd64.whl (15 kB)\n", "Using cached mpmath-1.3.0-py3-none-any.whl (536 kB)\n", "Using cached urllib3-2.2.3-py3-none-any.whl (126 kB)\n", "Using cached pycparser-2.22-py3-none-any.whl (117 kB)\n", "Using cached sniffio-1.3.1-py3-none-any.whl (10 kB)\n", "Installing collected packages: mpmath, urllib3, tqdm, sympy, sniffio, safetensors, regex, pyyaml, python-multipart, pydantic-core, pycparser, pillow, numpy, networkx, MarkupSafe, idna, h11, future, fsspec, filelock, click, charset-normalizer, certifi, annotated-types, uvicorn, requests, pydantic, jinja2, ffmpeg-python, cffi, anyio, torch, starlette, PySoundFile, huggingface-hub, torchvision, torchaudio, tokenizers, fastapi, transformers\n", "Successfully installed MarkupSafe-3.0.2 PySoundFile-0.9.0.post1 annotated-types-0.7.0 anyio-4.7.0 certifi-2024.12.14 cffi-1.17.1 charset-normalizer-3.4.0 click-8.1.7 fastapi-0.115.6 ffmpeg-python-0.2.0 filelock-3.16.1 fsspec-2024.10.0 future-1.0.0 h11-0.14.0 huggingface-hub-0.27.0 idna-3.10 jinja2-3.1.4 mpmath-1.3.0 networkx-3.4.2 numpy-2.2.0 pillow-11.0.0 pycparser-2.22 pydantic-2.10.3 pydantic-core-2.27.1 python-multipart-0.0.19 pyyaml-6.0.2 regex-2024.11.6 requests-2.32.3 safetensors-0.4.5 sniffio-1.3.1 starlette-0.41.3 sympy-1.13.1 tokenizers-0.21.0 torch-2.5.1 torchaudio-2.5.1 torchvision-0.20.1 tqdm-4.67.1 transformers-4.47.0 urllib3-2.2.3 uvicorn-0.34.0\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } ], "source": [ "pip install -r requirements.txt" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['soundfile']\n" ] } ], "source": [ "import torchaudio\n", "print(str(torchaudio.list_audio_backends()))" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Note: you may need to restart the kernel to use updated packages.\n" ] } ], "source": [ "pip list --format=freeze > requirements.txt" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "<>:13: SyntaxWarning: invalid escape sequence '\\m'\n", "<>:17: SyntaxWarning: invalid escape sequence '\\H'\n", "<>:13: SyntaxWarning: invalid escape sequence '\\m'\n", "<>:17: SyntaxWarning: invalid escape sequence '\\H'\n", "C:\\Users\\Asus\\AppData\\Local\\Temp\\ipykernel_18220\\208613059.py:13: SyntaxWarning: invalid escape sequence '\\m'\n", " model_path = \"Deepfake\\model\"\n", "C:\\Users\\Asus\\AppData\\Local\\Temp\\ipykernel_18220\\208613059.py:17: SyntaxWarning: invalid escape sequence '\\H'\n", " cache_dir=\"D:\\HuggingFace\",\n" ] } ], "source": [ "from transformers import pipeline\n", "from transformers import AutoProcessor, AutoModelForAudioClassification\n", "from fastapi import FastAPI\n", "from pydantic import BaseModel\n", "import uvicorn\n", "import torchaudio\n", "import torch\n", "\n", "# Define the input schema\n", "class InputData(BaseModel):\n", " input: str\n", "\n", "model_path = \"Deepfake\\model\"\n", "processor = AutoProcessor.from_pretrained(model_path)\n", "# Instantiate the model\n", "model = AutoModelForAudioClassification.from_pretrained(pretrained_model_name_or_path=model_path,\n", " cache_dir=\"D:\\HuggingFace\",\n", " local_files_only=True,\n", " )\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Functions" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "def prepare_audio(file_path, sampling_rate=16000, duration=10):\n", " \"\"\"\n", " Prepares audio by loading, resampling, and returning it in manageable chunks.\n", " \n", " Parameters:\n", " - file_path: Path to the audio file.\n", " - sampling_rate: Target sampling rate for the audio.\n", " - duration: Duration in seconds for each chunk.\n", " \n", " Returns:\n", " - A list of audio chunks, each as a numpy array.\n", " \"\"\"\n", " # Load and resample the audio file\n", " waveform, original_sampling_rate = torchaudio.load(file_path)\n", " \n", " # Convert stereo to mono if necessary\n", " if waveform.shape[0] > 1: # More than 1 channel\n", " waveform = torch.mean(waveform, dim=0, keepdim=True)\n", " \n", " # Resample if needed\n", " if original_sampling_rate != sampling_rate:\n", " resampler = torchaudio.transforms.Resample(orig_freq=original_sampling_rate, new_freq=sampling_rate)\n", " waveform = resampler(waveform)\n", " \n", " # Calculate chunk size in samples\n", " chunk_size = sampling_rate * duration\n", " audio_chunks = []\n", "\n", " # Split the audio into chunks\n", " for start in range(0, waveform.shape[1], chunk_size):\n", " chunk = waveform[:, start:start + chunk_size]\n", " \n", " # Pad the last chunk if it's shorter than the chunk size\n", " if chunk.shape[1] < chunk_size:\n", " padding = chunk_size - chunk.shape[1]\n", " chunk = torch.nn.functional.pad(chunk, (0, padding))\n", " \n", " audio_chunks.append(chunk.squeeze().numpy())\n", " \n", " return audio_chunks\n" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "import torch.nn.functional as F\n", "\n", "def predict_audio(file_path):\n", " \"\"\"\n", " Predicts the class of an audio file by aggregating predictions from chunks and calculates confidence.\n", " \n", " Args:\n", " file_path (str): Path to the audio file.\n", "\n", " Returns:\n", " dict: Contains the predicted class label and average confidence score.\n", " \"\"\"\n", " # Prepare audio chunks\n", " audio_chunks = prepare_audio(file_path)\n", " predictions = []\n", " confidences = []\n", "\n", " for i, chunk in enumerate(audio_chunks):\n", " # Prepare input for the model\n", " inputs = processor(\n", " chunk, sampling_rate=16000, return_tensors=\"pt\", padding=True\n", " )\n", " \n", " # Perform inference\n", " with torch.no_grad():\n", " outputs = model(**inputs)\n", " logits = outputs.logits\n", " \n", " # Apply softmax to calculate probabilities\n", " probabilities = F.softmax(logits, dim=1)\n", " \n", " # Get the predicted class and its confidence\n", " confidence, predicted_class = torch.max(probabilities, dim=1)\n", " predictions.append(predicted_class.item())\n", " confidences.append(confidence.item())\n", " \n", " # Aggregate predictions (majority voting)\n", " aggregated_prediction_id = max(set(predictions), key=predictions.count)\n", " predicted_label = model.config.id2label[aggregated_prediction_id]\n", " \n", " # Calculate average confidence across chunks\n", " average_confidence = sum(confidences) / len(confidences)\n", "\n", " return {\n", " \"predicted_label\": predicted_label,\n", " \"average_confidence\": average_confidence\n", " }\n", "\n" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Chunk shape: (160000,)\n", "Chunk shape: (160000,)\n", "Chunk shape: (160000,)\n", "Chunk shape: (160000,)\n", "Chunk shape: (160000,)\n", "Chunk shape: (160000,)\n", "Chunk shape: (160000,)\n", "Chunk shape: (160000,)\n", "Chunk shape: (160000,)\n", "Chunk shape: (160000,)\n", "Predicted Class: {'predicted_label': 'Real', 'average_confidence': 0.9984144032001495}\n" ] }, { "ename": "", "evalue": "", "output_type": "error", "traceback": [ "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n", "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n", "\u001b[1;31mClick here for more info. \n", "\u001b[1;31mView Jupyter log for further details." ] } ], "source": [ "# Example: Test a single audio file\n", "file_path = r\"D:\\repos\\GODAM\\audioFiles\\test.wav\" # Replace with your audio file path\n", "predicted_class = predict_audio(file_path)\n", "print(f\"Predicted Class: {predicted_class}\")" ] } ], "metadata": { "kernelspec": { "display_name": "modelEnv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.8" } }, "nbformat": 4, "nbformat_minor": 2 }