{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collecting fastapi (from -r requirements.txt (line 1))\n",
      "  Using cached fastapi-0.115.6-py3-none-any.whl.metadata (27 kB)\n",
      "Collecting uvicorn (from -r requirements.txt (line 2))\n",
      "  Using cached uvicorn-0.34.0-py3-none-any.whl.metadata (6.5 kB)\n",
      "Collecting transformers (from -r requirements.txt (line 3))\n",
      "  Using cached transformers-4.47.0-py3-none-any.whl.metadata (43 kB)\n",
      "Collecting torch (from -r requirements.txt (line 4))\n",
      "  Using cached torch-2.5.1-cp312-cp312-win_amd64.whl.metadata (28 kB)\n",
      "Collecting torchvision (from -r requirements.txt (line 5))\n",
      "  Using cached torchvision-0.20.1-cp312-cp312-win_amd64.whl.metadata (6.2 kB)\n",
      "Collecting torchaudio (from -r requirements.txt (line 6))\n",
      "  Using cached torchaudio-2.5.1-cp312-cp312-win_amd64.whl.metadata (6.5 kB)\n",
      "Collecting PySoundFile (from -r requirements.txt (line 7))\n",
      "  Using cached PySoundFile-0.9.0.post1-py2.py3.cp26.cp27.cp32.cp33.cp34.cp35.cp36.pp27.pp32.pp33-none-win_amd64.whl.metadata (9.4 kB)\n",
      "Collecting ffmpeg-python (from -r requirements.txt (line 8))\n",
      "  Using cached ffmpeg_python-0.2.0-py3-none-any.whl.metadata (1.7 kB)\n",
      "Collecting pydantic (from -r requirements.txt (line 9))\n",
      "  Using cached pydantic-2.10.3-py3-none-any.whl.metadata (172 kB)\n",
      "Collecting numpy (from -r requirements.txt (line 10))\n",
      "  Using cached numpy-2.2.0-cp312-cp312-win_amd64.whl.metadata (60 kB)\n",
      "Collecting python-multipart (from -r requirements.txt (line 11))\n",
      "  Using cached python_multipart-0.0.19-py3-none-any.whl.metadata (1.8 kB)\n",
      "Collecting starlette<0.42.0,>=0.40.0 (from fastapi->-r requirements.txt (line 1))\n",
      "  Using cached starlette-0.41.3-py3-none-any.whl.metadata (6.0 kB)\n",
      "Requirement already satisfied: typing-extensions>=4.8.0 in c:\\users\\asus\\anaconda3\\envs\\modelenv\\lib\\site-packages (from fastapi->-r requirements.txt (line 1)) (4.12.2)\n",
      "Collecting click>=7.0 (from uvicorn->-r requirements.txt (line 2))\n",
      "  Using cached click-8.1.7-py3-none-any.whl.metadata (3.0 kB)\n",
      "Collecting h11>=0.8 (from uvicorn->-r requirements.txt (line 2))\n",
      "  Using cached h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)\n",
      "Collecting filelock (from transformers->-r requirements.txt (line 3))\n",
      "  Using cached filelock-3.16.1-py3-none-any.whl.metadata (2.9 kB)\n",
      "Collecting huggingface-hub<1.0,>=0.24.0 (from transformers->-r requirements.txt (line 3))\n",
      "  Using cached huggingface_hub-0.27.0-py3-none-any.whl.metadata (13 kB)\n",
      "Requirement already satisfied: packaging>=20.0 in c:\\users\\asus\\anaconda3\\envs\\modelenv\\lib\\site-packages (from transformers->-r requirements.txt (line 3)) (24.2)\n",
      "Collecting pyyaml>=5.1 (from transformers->-r requirements.txt (line 3))\n",
      "  Using cached PyYAML-6.0.2-cp312-cp312-win_amd64.whl.metadata (2.1 kB)\n",
      "Collecting regex!=2019.12.17 (from transformers->-r requirements.txt (line 3))\n",
      "  Using cached regex-2024.11.6-cp312-cp312-win_amd64.whl.metadata (41 kB)\n",
      "Collecting requests (from transformers->-r requirements.txt (line 3))\n",
      "  Using cached requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)\n",
      "Collecting tokenizers<0.22,>=0.21 (from transformers->-r requirements.txt (line 3))\n",
      "  Using cached tokenizers-0.21.0-cp39-abi3-win_amd64.whl.metadata (6.9 kB)\n",
      "Collecting safetensors>=0.4.1 (from transformers->-r requirements.txt (line 3))\n",
      "  Using cached safetensors-0.4.5-cp312-none-win_amd64.whl.metadata (3.9 kB)\n",
      "Collecting tqdm>=4.27 (from transformers->-r requirements.txt (line 3))\n",
      "  Using cached tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)\n",
      "Collecting networkx (from torch->-r requirements.txt (line 4))\n",
      "  Using cached networkx-3.4.2-py3-none-any.whl.metadata (6.3 kB)\n",
      "Collecting jinja2 (from torch->-r requirements.txt (line 4))\n",
      "  Using cached jinja2-3.1.4-py3-none-any.whl.metadata (2.6 kB)\n",
      "Collecting fsspec (from torch->-r requirements.txt (line 4))\n",
      "  Using cached fsspec-2024.10.0-py3-none-any.whl.metadata (11 kB)\n",
      "Requirement already satisfied: setuptools in c:\\users\\asus\\anaconda3\\envs\\modelenv\\lib\\site-packages (from torch->-r requirements.txt (line 4)) (75.6.0)\n",
      "Collecting sympy==1.13.1 (from torch->-r requirements.txt (line 4))\n",
      "  Using cached sympy-1.13.1-py3-none-any.whl.metadata (12 kB)\n",
      "Collecting mpmath<1.4,>=1.1.0 (from sympy==1.13.1->torch->-r requirements.txt (line 4))\n",
      "  Using cached mpmath-1.3.0-py3-none-any.whl.metadata (8.6 kB)\n",
      "Collecting pillow!=8.3.*,>=5.3.0 (from torchvision->-r requirements.txt (line 5))\n",
      "  Downloading pillow-11.0.0-cp312-cp312-win_amd64.whl.metadata (9.3 kB)\n",
      "Collecting cffi>=0.6 (from PySoundFile->-r requirements.txt (line 7))\n",
      "  Using cached cffi-1.17.1-cp312-cp312-win_amd64.whl.metadata (1.6 kB)\n",
      "Collecting future (from ffmpeg-python->-r requirements.txt (line 8))\n",
      "  Using cached future-1.0.0-py3-none-any.whl.metadata (4.0 kB)\n",
      "Collecting annotated-types>=0.6.0 (from pydantic->-r requirements.txt (line 9))\n",
      "  Using cached annotated_types-0.7.0-py3-none-any.whl.metadata (15 kB)\n",
      "Collecting pydantic-core==2.27.1 (from pydantic->-r requirements.txt (line 9))\n",
      "  Using cached pydantic_core-2.27.1-cp312-none-win_amd64.whl.metadata (6.7 kB)\n",
      "Collecting pycparser (from cffi>=0.6->PySoundFile->-r requirements.txt (line 7))\n",
      "  Using cached pycparser-2.22-py3-none-any.whl.metadata (943 bytes)\n",
      "Requirement already satisfied: colorama in c:\\users\\asus\\anaconda3\\envs\\modelenv\\lib\\site-packages (from click>=7.0->uvicorn->-r requirements.txt (line 2)) (0.4.6)\n",
      "Collecting anyio<5,>=3.4.0 (from starlette<0.42.0,>=0.40.0->fastapi->-r requirements.txt (line 1))\n",
      "  Using cached anyio-4.7.0-py3-none-any.whl.metadata (4.7 kB)\n",
      "Collecting MarkupSafe>=2.0 (from jinja2->torch->-r requirements.txt (line 4))\n",
      "  Using cached MarkupSafe-3.0.2-cp312-cp312-win_amd64.whl.metadata (4.1 kB)\n",
      "Collecting charset-normalizer<4,>=2 (from requests->transformers->-r requirements.txt (line 3))\n",
      "  Using cached charset_normalizer-3.4.0-cp312-cp312-win_amd64.whl.metadata (34 kB)\n",
      "Collecting idna<4,>=2.5 (from requests->transformers->-r requirements.txt (line 3))\n",
      "  Using cached idna-3.10-py3-none-any.whl.metadata (10 kB)\n",
      "Collecting urllib3<3,>=1.21.1 (from requests->transformers->-r requirements.txt (line 3))\n",
      "  Using cached urllib3-2.2.3-py3-none-any.whl.metadata (6.5 kB)\n",
      "Collecting certifi>=2017.4.17 (from requests->transformers->-r requirements.txt (line 3))\n",
      "  Using cached certifi-2024.12.14-py3-none-any.whl.metadata (2.3 kB)\n",
      "Collecting sniffio>=1.1 (from anyio<5,>=3.4.0->starlette<0.42.0,>=0.40.0->fastapi->-r requirements.txt (line 1))\n",
      "  Using cached sniffio-1.3.1-py3-none-any.whl.metadata (3.9 kB)\n",
      "Using cached fastapi-0.115.6-py3-none-any.whl (94 kB)\n",
      "Using cached uvicorn-0.34.0-py3-none-any.whl (62 kB)\n",
      "Using cached transformers-4.47.0-py3-none-any.whl (10.1 MB)\n",
      "Using cached torch-2.5.1-cp312-cp312-win_amd64.whl (203.0 MB)\n",
      "Using cached sympy-1.13.1-py3-none-any.whl (6.2 MB)\n",
      "Using cached torchvision-0.20.1-cp312-cp312-win_amd64.whl (1.6 MB)\n",
      "Using cached torchaudio-2.5.1-cp312-cp312-win_amd64.whl (2.4 MB)\n",
      "Using cached PySoundFile-0.9.0.post1-py2.py3.cp26.cp27.cp32.cp33.cp34.cp35.cp36.pp27.pp32.pp33-none-win_amd64.whl (671 kB)\n",
      "Using cached ffmpeg_python-0.2.0-py3-none-any.whl (25 kB)\n",
      "Using cached pydantic-2.10.3-py3-none-any.whl (456 kB)\n",
      "Using cached pydantic_core-2.27.1-cp312-none-win_amd64.whl (2.0 MB)\n",
      "Using cached numpy-2.2.0-cp312-cp312-win_amd64.whl (12.6 MB)\n",
      "Using cached python_multipart-0.0.19-py3-none-any.whl (24 kB)\n",
      "Using cached annotated_types-0.7.0-py3-none-any.whl (13 kB)\n",
      "Using cached cffi-1.17.1-cp312-cp312-win_amd64.whl (181 kB)\n",
      "Using cached click-8.1.7-py3-none-any.whl (97 kB)\n",
      "Using cached h11-0.14.0-py3-none-any.whl (58 kB)\n",
      "Using cached huggingface_hub-0.27.0-py3-none-any.whl (450 kB)\n",
      "Using cached fsspec-2024.10.0-py3-none-any.whl (179 kB)\n",
      "Downloading pillow-11.0.0-cp312-cp312-win_amd64.whl (2.6 MB)\n",
      "   ---------------------------------------- 0.0/2.6 MB ? eta -:--:--\n",
      "   ---------------------------------------- 2.6/2.6 MB 21.2 MB/s eta 0:00:00\n",
      "Using cached PyYAML-6.0.2-cp312-cp312-win_amd64.whl (156 kB)\n",
      "Using cached regex-2024.11.6-cp312-cp312-win_amd64.whl (273 kB)\n",
      "Using cached safetensors-0.4.5-cp312-none-win_amd64.whl (286 kB)\n",
      "Using cached starlette-0.41.3-py3-none-any.whl (73 kB)\n",
      "Using cached tokenizers-0.21.0-cp39-abi3-win_amd64.whl (2.4 MB)\n",
      "Using cached tqdm-4.67.1-py3-none-any.whl (78 kB)\n",
      "Using cached filelock-3.16.1-py3-none-any.whl (16 kB)\n",
      "Using cached future-1.0.0-py3-none-any.whl (491 kB)\n",
      "Using cached jinja2-3.1.4-py3-none-any.whl (133 kB)\n",
      "Using cached networkx-3.4.2-py3-none-any.whl (1.7 MB)\n",
      "Using cached requests-2.32.3-py3-none-any.whl (64 kB)\n",
      "Using cached anyio-4.7.0-py3-none-any.whl (93 kB)\n",
      "Using cached certifi-2024.12.14-py3-none-any.whl (164 kB)\n",
      "Using cached charset_normalizer-3.4.0-cp312-cp312-win_amd64.whl (102 kB)\n",
      "Using cached idna-3.10-py3-none-any.whl (70 kB)\n",
      "Using cached MarkupSafe-3.0.2-cp312-cp312-win_amd64.whl (15 kB)\n",
      "Using cached mpmath-1.3.0-py3-none-any.whl (536 kB)\n",
      "Using cached urllib3-2.2.3-py3-none-any.whl (126 kB)\n",
      "Using cached pycparser-2.22-py3-none-any.whl (117 kB)\n",
      "Using cached sniffio-1.3.1-py3-none-any.whl (10 kB)\n",
      "Installing collected packages: mpmath, urllib3, tqdm, sympy, sniffio, safetensors, regex, pyyaml, python-multipart, pydantic-core, pycparser, pillow, numpy, networkx, MarkupSafe, idna, h11, future, fsspec, filelock, click, charset-normalizer, certifi, annotated-types, uvicorn, requests, pydantic, jinja2, ffmpeg-python, cffi, anyio, torch, starlette, PySoundFile, huggingface-hub, torchvision, torchaudio, tokenizers, fastapi, transformers\n",
      "Successfully installed MarkupSafe-3.0.2 PySoundFile-0.9.0.post1 annotated-types-0.7.0 anyio-4.7.0 certifi-2024.12.14 cffi-1.17.1 charset-normalizer-3.4.0 click-8.1.7 fastapi-0.115.6 ffmpeg-python-0.2.0 filelock-3.16.1 fsspec-2024.10.0 future-1.0.0 h11-0.14.0 huggingface-hub-0.27.0 idna-3.10 jinja2-3.1.4 mpmath-1.3.0 networkx-3.4.2 numpy-2.2.0 pillow-11.0.0 pycparser-2.22 pydantic-2.10.3 pydantic-core-2.27.1 python-multipart-0.0.19 pyyaml-6.0.2 regex-2024.11.6 requests-2.32.3 safetensors-0.4.5 sniffio-1.3.1 starlette-0.41.3 sympy-1.13.1 tokenizers-0.21.0 torch-2.5.1 torchaudio-2.5.1 torchvision-0.20.1 tqdm-4.67.1 transformers-4.47.0 urllib3-2.2.3 uvicorn-0.34.0\n",
      "Note: you may need to restart the kernel to use updated packages.\n"
     ]
    }
   ],
   "source": [
    "pip install -r requirements.txt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['soundfile']\n"
     ]
    }
   ],
   "source": [
    "import torchaudio\n",
    "print(str(torchaudio.list_audio_backends()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Note: you may need to restart the kernel to use updated packages.\n"
     ]
    }
   ],
   "source": [
    "pip list --format=freeze > requirements.txt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<>:13: SyntaxWarning: invalid escape sequence '\\m'\n",
      "<>:17: SyntaxWarning: invalid escape sequence '\\H'\n",
      "<>:13: SyntaxWarning: invalid escape sequence '\\m'\n",
      "<>:17: SyntaxWarning: invalid escape sequence '\\H'\n",
      "C:\\Users\\Asus\\AppData\\Local\\Temp\\ipykernel_18220\\208613059.py:13: SyntaxWarning: invalid escape sequence '\\m'\n",
      "  model_path = \"Deepfake\\model\"\n",
      "C:\\Users\\Asus\\AppData\\Local\\Temp\\ipykernel_18220\\208613059.py:17: SyntaxWarning: invalid escape sequence '\\H'\n",
      "  cache_dir=\"D:\\HuggingFace\",\n"
     ]
    }
   ],
   "source": [
    "from transformers import pipeline\n",
    "from transformers import AutoProcessor, AutoModelForAudioClassification\n",
    "from fastapi import FastAPI\n",
    "from pydantic import BaseModel\n",
    "import uvicorn\n",
    "import torchaudio\n",
    "import torch\n",
    "\n",
    "# Define the input schema\n",
    "class InputData(BaseModel):\n",
    "    input: str\n",
    "\n",
    "model_path = \"Deepfake\\model\"\n",
    "processor = AutoProcessor.from_pretrained(model_path)\n",
    "# Instantiate the model\n",
    "model = AutoModelForAudioClassification.from_pretrained(pretrained_model_name_or_path=model_path,\n",
    "                                               cache_dir=\"D:\\HuggingFace\",\n",
    "                                               local_files_only=True,\n",
    "                                               )\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def prepare_audio(file_path, sampling_rate=16000, duration=10):\n",
    "    \"\"\"\n",
    "    Prepares audio by loading, resampling, and returning it in manageable chunks.\n",
    "    \n",
    "    Parameters:\n",
    "    - file_path: Path to the audio file.\n",
    "    - sampling_rate: Target sampling rate for the audio.\n",
    "    - duration: Duration in seconds for each chunk.\n",
    "    \n",
    "    Returns:\n",
    "    - A list of audio chunks, each as a numpy array.\n",
    "    \"\"\"\n",
    "    # Load and resample the audio file\n",
    "    waveform, original_sampling_rate = torchaudio.load(file_path)\n",
    "    \n",
    "    # Convert stereo to mono if necessary\n",
    "    if waveform.shape[0] > 1:  # More than 1 channel\n",
    "        waveform = torch.mean(waveform, dim=0, keepdim=True)\n",
    "    \n",
    "    # Resample if needed\n",
    "    if original_sampling_rate != sampling_rate:\n",
    "        resampler = torchaudio.transforms.Resample(orig_freq=original_sampling_rate, new_freq=sampling_rate)\n",
    "        waveform = resampler(waveform)\n",
    "    \n",
    "    # Calculate chunk size in samples\n",
    "    chunk_size = sampling_rate * duration\n",
    "    audio_chunks = []\n",
    "\n",
    "    # Split the audio into chunks\n",
    "    for start in range(0, waveform.shape[1], chunk_size):\n",
    "        chunk = waveform[:, start:start + chunk_size]\n",
    "        \n",
    "        # Pad the last chunk if it's shorter than the chunk size\n",
    "        if chunk.shape[1] < chunk_size:\n",
    "            padding = chunk_size - chunk.shape[1]\n",
    "            chunk = torch.nn.functional.pad(chunk, (0, padding))\n",
    "        \n",
    "        audio_chunks.append(chunk.squeeze().numpy())\n",
    "    \n",
    "    return audio_chunks\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch.nn.functional as F\n",
    "\n",
    "def predict_audio(file_path):\n",
    "    \"\"\"\n",
    "    Predicts the class of an audio file by aggregating predictions from chunks and calculates confidence.\n",
    "    \n",
    "    Args:\n",
    "        file_path (str): Path to the audio file.\n",
    "\n",
    "    Returns:\n",
    "        dict: Contains the predicted class label and average confidence score.\n",
    "    \"\"\"\n",
    "    # Prepare audio chunks\n",
    "    audio_chunks = prepare_audio(file_path)\n",
    "    predictions = []\n",
    "    confidences = []\n",
    "\n",
    "    for i, chunk in enumerate(audio_chunks):\n",
    "        # Prepare input for the model\n",
    "        inputs = processor(\n",
    "            chunk, sampling_rate=16000, return_tensors=\"pt\", padding=True\n",
    "        )\n",
    "        \n",
    "        # Perform inference\n",
    "        with torch.no_grad():\n",
    "            outputs = model(**inputs)\n",
    "            logits = outputs.logits\n",
    "            \n",
    "            # Apply softmax to calculate probabilities\n",
    "            probabilities = F.softmax(logits, dim=1)\n",
    "            \n",
    "            # Get the predicted class and its confidence\n",
    "            confidence, predicted_class = torch.max(probabilities, dim=1)\n",
    "            predictions.append(predicted_class.item())\n",
    "            confidences.append(confidence.item())\n",
    "    \n",
    "    # Aggregate predictions (majority voting)\n",
    "    aggregated_prediction_id = max(set(predictions), key=predictions.count)\n",
    "    predicted_label = model.config.id2label[aggregated_prediction_id]\n",
    "    \n",
    "    # Calculate average confidence across chunks\n",
    "    average_confidence = sum(confidences) / len(confidences)\n",
    "\n",
    "    return {\n",
    "        \"predicted_label\": predicted_label,\n",
    "        \"average_confidence\": average_confidence\n",
    "    }\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Chunk shape: (160000,)\n",
      "Chunk shape: (160000,)\n",
      "Chunk shape: (160000,)\n",
      "Chunk shape: (160000,)\n",
      "Chunk shape: (160000,)\n",
      "Chunk shape: (160000,)\n",
      "Chunk shape: (160000,)\n",
      "Chunk shape: (160000,)\n",
      "Chunk shape: (160000,)\n",
      "Chunk shape: (160000,)\n",
      "Predicted Class: {'predicted_label': 'Real', 'average_confidence': 0.9984144032001495}\n"
     ]
    },
    {
     "ename": "",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n",
      "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n",
      "\u001b[1;31mClick <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. \n",
      "\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
     ]
    }
   ],
   "source": [
    "# Example: Test a single audio file\n",
    "file_path = r\"D:\\repos\\GODAM\\audioFiles\\test.wav\"  # Replace with your audio file path\n",
    "predicted_class = predict_audio(file_path)\n",
    "print(f\"Predicted Class: {predicted_class}\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "modelEnv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}