{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Import data" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Collecting torchaudio\n", " Downloading torchaudio-2.5.1-cp312-cp312-win_amd64.whl.metadata (6.5 kB)\n", "Requirement already satisfied: torch==2.5.1 in c:\\users\\asus\\anaconda3\\lib\\site-packages (from torchaudio) (2.5.1)\n", "Requirement already satisfied: filelock in c:\\users\\asus\\anaconda3\\lib\\site-packages (from torch==2.5.1->torchaudio) (3.13.1)\n", "Requirement already satisfied: typing-extensions>=4.8.0 in c:\\users\\asus\\anaconda3\\lib\\site-packages (from torch==2.5.1->torchaudio) (4.11.0)\n", "Requirement already satisfied: networkx in c:\\users\\asus\\anaconda3\\lib\\site-packages (from torch==2.5.1->torchaudio) (3.3)\n", "Requirement already satisfied: jinja2 in c:\\users\\asus\\anaconda3\\lib\\site-packages (from torch==2.5.1->torchaudio) (3.1.4)\n", "Requirement already satisfied: fsspec in c:\\users\\asus\\anaconda3\\lib\\site-packages (from torch==2.5.1->torchaudio) (2024.6.1)\n", "Requirement already satisfied: setuptools in c:\\users\\asus\\anaconda3\\lib\\site-packages (from torch==2.5.1->torchaudio) (75.1.0)\n", "Requirement already satisfied: sympy==1.13.1 in c:\\users\\asus\\anaconda3\\lib\\site-packages (from torch==2.5.1->torchaudio) (1.13.1)\n", "Requirement already satisfied: mpmath<1.4,>=1.1.0 in c:\\users\\asus\\anaconda3\\lib\\site-packages (from sympy==1.13.1->torch==2.5.1->torchaudio) (1.3.0)\n", "Requirement already satisfied: MarkupSafe>=2.0 in c:\\users\\asus\\anaconda3\\lib\\site-packages (from jinja2->torch==2.5.1->torchaudio) (2.1.3)\n", "Downloading torchaudio-2.5.1-cp312-cp312-win_amd64.whl (2.4 MB)\n", " ---------------------------------------- 0.0/2.4 MB ? eta -:--:--\n", " ---------------------------------------- 2.4/2.4 MB 11.6 MB/s eta 0:00:00\n", "Installing collected packages: torchaudio\n", "Successfully installed torchaudio-2.5.1\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } ], "source": [ "pip install torchaudio" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "ename": "ModuleNotFoundError", "evalue": "No module named 'datasets'", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", "Cell \u001b[1;32mIn[3], line 5\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mos\u001b[39;00m\n\u001b[0;32m 4\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtorchaudio\u001b[39;00m\n\u001b[1;32m----> 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mdatasets\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m DatasetDict, load_dataset\n\u001b[0;32m 7\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mprepare_dataset\u001b[39m(directory):\n\u001b[0;32m 8\u001b[0m data \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpath\u001b[39m\u001b[38;5;124m\"\u001b[39m: [], \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlabel\u001b[39m\u001b[38;5;124m\"\u001b[39m: []}\n", "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'datasets'" ] }, { "ename": "", "evalue": "", "output_type": "error", "traceback": [ "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n", "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n", "\u001b[1;31mClick here for more info. \n", "\u001b[1;31mView Jupyter log for further details." ] } ], "source": [ "## The data is not pushed to repo, only model and training logs etc are uploaded\n", "\n", "import os\n", "import torchaudio\n", "from datasets import DatasetDict, load_dataset\n", "\n", "def prepare_dataset(directory):\n", " data = {\"path\": [], \"label\": []}\n", " labels = {\"fake\": 0, \"real\": 1} # Map fake to 0 and real to 1\n", "\n", " for label, label_id in labels.items():\n", " folder_path = os.path.join(directory, label)\n", " for file in os.listdir(folder_path):\n", " if file.endswith(\".wav\"):\n", " data[\"path\"].append(os.path.join(folder_path, file))\n", " data[\"label\"].append(label_id)\n", " return data\n", "\n", "# Prepare train, validation, and test datasets\n", "train_data = prepare_dataset(r\"dataset\\for-norm\\for-norm\\testing\")\n", "val_data = prepare_dataset(r\"dataset\\for-norm\\for-norm\\testing\")\n", "test_data = prepare_dataset(r\"dataset\\for-norm\\for-norm\\testing\")\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "from datasets import Dataset\n", "\n", "train_dataset = Dataset.from_dict(train_data)\n", "val_dataset = Dataset.from_dict(val_data)\n", "test_dataset = Dataset.from_dict(test_data)\n", "\n", "dataset = DatasetDict({\"train\": train_dataset, \"validation\": val_dataset, \"test\": test_dataset})\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Import Model" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\60165\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\transformers\\configuration_utils.py:302: UserWarning: Passing `gradient_checkpointing` to a config initialization is deprecated and will be removed in v5 Transformers. Using `model.gradient_checkpointing_enable()` instead, or if you are using the `Trainer` API, pass `gradient_checkpointing=True` in your `TrainingArguments`.\n", " warnings.warn(\n", "Some weights of Wav2Vec2ForSequenceClassification were not initialized from the model checkpoint at facebook/wav2vec2-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'projector.bias', 'projector.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] } ], "source": [ "from transformers import AutoProcessor\n", "from transformers import AutoModelForAudioClassification\n", "\n", "# Initialize processor\n", "model_name = \"facebook/wav2vec2-base\" # Replace with your model if different\n", "model = AutoModelForAudioClassification.from_pretrained(model_name, num_labels=2) # Adjust `num_labels` based on your dataset\n", "processor = AutoProcessor.from_pretrained(model_name)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Preprocess Data" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "af2c5f31f0db43ee9975023b28e2c57c", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Map: 0%| | 0/4634 [00:00\n", "torch.int64\n" ] } ], "source": [ "import torch\n", "\n", "\n", "def preprocess_function(batch):\n", " audio = torchaudio.load(batch[\"path\"])[0].squeeze().numpy()\n", " inputs = processor(\n", " audio,\n", " sampling_rate=16000,\n", " padding=True,\n", " truncation=True,\n", " max_length=32000, \n", " return_tensors=\"pt\"\n", " )\n", " batch[\"input_values\"] = inputs.input_values[0]\n", " # Ensure labels are converted to LongTensor\n", " batch[\"label\"] = torch.tensor(batch[\"label\"], dtype=torch.long) # Convert label to LongTensor\n", " return batch\n", "\n", "processed_dataset = dataset.map(preprocess_function, remove_columns=[\"path\"], batched=False)\n", "# Set format to torch tensors for compatibility with PyTorch\n", "processed_dataset.set_format(type=\"torch\", columns=[\"input_values\", \"label\"])\n", "\n", "# Double-check the label type again\n", "print(processed_dataset[\"train\"][0][\"label\"], type(processed_dataset[\"train\"][0][\"label\"]))\n", "print(processed_dataset[\"train\"][0][\"label\"].dtype) # Should print torch.int64\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Map Training Labels" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Labels: {0: 'Fake', 1: 'Real'}\n", "Labels: {0: 'Fake', 1: 'Real'}\n" ] } ], "source": [ "# Ensure labels are in numerical format (e.g., 0, 1)\n", "id2label = {0: \"Fake\", 1: \"Real\"} # Define the mapping based on your dataset\n", "label2id = {v: k for k, v in id2label.items()} # Reverse mapping\n", "\n", "\n", "print(\"Labels:\", id2label)\n", "\n", "# Update the model's configuration with labels\n", "model.config.id2label = id2label\n", "model.config.label2id = label2id\n", "\n", "print(\"Labels:\", model.config.id2label) # Verify\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from transformers import DataCollatorWithPadding\n", "\n", "# Use the processor's tokenizer for padding\n", "data_collator = DataCollatorWithPadding(tokenizer=processor, padding=True)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Initialize Training Arguments" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "TrainingArguments initialized successfully!\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\60165\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\transformers\\training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n", " warnings.warn(\n" ] } ], "source": [ "from transformers import TrainingArguments\n", "\n", "training_args = TrainingArguments(\n", " output_dir=\"./results\",\n", " evaluation_strategy=\"epoch\",\n", " save_strategy=\"epoch\",\n", " learning_rate=5e-5,\n", " per_device_train_batch_size=8,\n", " per_device_eval_batch_size=8,\n", " num_train_epochs=3,\n", " weight_decay=0.01,\n", " logging_dir=\"./logs\",\n", " logging_steps=10,\n", " save_total_limit=2,\n", " fp16=True, \n", " push_to_hub=False,\n", ")\n", "print(\"TrainingArguments initialized successfully!\")\n" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "from transformers import Trainer\n", "\n", "trainer = Trainer(\n", " model=model,\n", " args=training_args,\n", " train_dataset=processed_dataset[\"train\"],\n", " eval_dataset=processed_dataset[\"validation\"],\n", " tokenizer=processor, # Required for the data collator\n", " data_collator=data_collator,\n", ")\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Start Training" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "53044eac53174227a4eb19becbeb0bbf", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/1740 [00:00 1: # More than 1 channel\n", " waveform = torch.mean(waveform, dim=0, keepdim=True)\n", " \n", " # Resample if needed\n", " if original_sampling_rate != sampling_rate:\n", " resampler = torchaudio.transforms.Resample(orig_freq=original_sampling_rate, new_freq=sampling_rate)\n", " waveform = resampler(waveform)\n", " \n", " # Calculate chunk size in samples\n", " chunk_size = sampling_rate * duration\n", " audio_chunks = []\n", "\n", " # Split the audio into chunks\n", " for start in range(0, waveform.shape[1], chunk_size):\n", " chunk = waveform[:, start:start + chunk_size]\n", " \n", " # Pad the last chunk if it's shorter than the chunk size\n", " if chunk.shape[1] < chunk_size:\n", " padding = chunk_size - chunk.shape[1]\n", " chunk = torch.nn.functional.pad(chunk, (0, padding))\n", " \n", " audio_chunks.append(chunk.squeeze().numpy())\n", " \n", " return audio_chunks\n" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Chunk shape: (160000,)\n", "Logits for chunk 1: tensor([[ 4.6742, -5.1778]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 2: tensor([[ 4.7219, -5.2332]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 3: tensor([[ 4.7545, -5.2641]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 4: tensor([[ 4.6714, -5.1740]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 5: tensor([[ 4.7660, -5.2743]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 6: tensor([[ 4.7724, -5.2836]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 7: tensor([[ 4.7268, -5.2362]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 8: tensor([[ 4.6898, -5.1898]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 9: tensor([[ 4.6646, -5.1708]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 10: tensor([[ 4.5948, -5.0867]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 11: tensor([[ 4.7512, -5.2579]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 12: tensor([[-4.5599, 5.0363]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 13: tensor([[-0.4980, 0.5546]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 14: tensor([[ 4.7295, -5.2358]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 15: tensor([[ 4.7426, -5.2534]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 16: tensor([[ 1.9405, -2.1493]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 17: tensor([[ 4.7168, -5.2235]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 18: tensor([[ 4.6801, -5.1907]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 19: tensor([[ 4.7454, -5.2568]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 20: tensor([[ 4.7642, -5.2723]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 21: tensor([[ 4.7868, -5.2969]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 22: tensor([[ 4.7600, -5.2690]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 23: tensor([[ 4.7337, -5.2411]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 24: tensor([[ 4.7835, -5.2943]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 25: tensor([[ 4.7572, -5.2647]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 26: tensor([[ 4.7485, -5.2581]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 27: tensor([[ 4.6874, -5.2023]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 28: tensor([[ 4.6877, -5.1922]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 29: tensor([[ 4.7474, -5.2561]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 30: tensor([[-4.3064, 4.7629]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 31: tensor([[-3.8067, 4.2312]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 32: tensor([[ 4.7217, -5.2325]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 33: tensor([[ 4.7798, -5.2913]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 34: tensor([[ 4.7214, -5.2355]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 35: tensor([[ 4.7116, -5.2192]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 36: tensor([[ 4.6687, -5.1812]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 37: tensor([[-0.8128, 0.9402]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 38: tensor([[ 4.7259, -5.2333]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 39: tensor([[ 4.5698, -5.0731]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 40: tensor([[ 4.7467, -5.2544]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 41: tensor([[ 4.7781, -5.2884]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 42: tensor([[ 4.7243, -5.2365]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 43: tensor([[ 3.9325, -4.3570]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 44: tensor([[-3.8786, 4.3105]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 45: tensor([[ 3.3633, -3.6958]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 46: tensor([[ 4.7127, -5.2213]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 47: tensor([[ 0.0519, -0.0359]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 48: tensor([[ 4.7457, -5.2535]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 49: tensor([[ 3.4856, -3.8528]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 50: tensor([[ 4.6485, -5.1538]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 51: tensor([[ 4.6274, -5.1355]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 52: tensor([[ 4.6852, -5.1872]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 53: tensor([[ 4.7341, -5.2452]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 54: tensor([[-4.5378, 5.0152]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 55: tensor([[ 4.6822, -5.1887]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 56: tensor([[ 4.7186, -5.2252]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 57: tensor([[ 4.7688, -5.2787]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 58: tensor([[ 4.7285, -5.2342]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 59: tensor([[ 4.7447, -5.2550]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 60: tensor([[ 4.5292, -5.0253]])\n", "Predicted Class: Fake\n" ] } ], "source": [ "def predict_audio(file_path):\n", " \"\"\"\n", " Predicts the class of an audio file by aggregating predictions from chunks.\n", " \n", " Args:\n", " file_path (str): Path to the audio file.\n", "\n", " Returns:\n", " str: Predicted class label.\n", " \"\"\"\n", " # Prepare audio chunks\n", " audio_chunks = prepare_audio(file_path)\n", " predictions = []\n", "\n", " for i, chunk in enumerate(audio_chunks):\n", " # Prepare input for the model\n", " print(f\"Chunk shape: {chunk.shape}\")\n", " inputs = processor(\n", " chunk, sampling_rate=16000, return_tensors=\"pt\", padding=True\n", " )\n", " \n", " # Perform inference\n", " with torch.no_grad():\n", " outputs = model(**inputs)\n", " logits = outputs.logits\n", " print(f\"Logits for chunk {i + 1}: {logits}\") # Print the logits\n", " predicted_class = torch.argmax(logits, dim=1).item()\n", " predictions.append(predicted_class)\n", " \n", " # Aggregate predictions (e.g., majority voting)\n", " aggregated_prediction = max(set(predictions), key=predictions.count)\n", " \n", " # Convert class ID to label\n", " return model.config.id2label[aggregated_prediction]\n", "\n", "# Example: Test a single audio file\n", "file_path = r\"D:\\Year 3 Sem 2\\Godamlah\\Deepfake\\deepfake model ver3\\data\\KAGGLE\\AUDIO\\FAKE\\biden-to-linus.wav\" # Replace with your audio file path\n", "predicted_class = predict_audio(file_path)\n", "print(f\"Predicted Class: {predicted_class}\")\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Batch Testing" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Chunk shape: (160000,)\n", "Logits for chunk 1: tensor([[-3.3933, 3.7590]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 1: tensor([[-3.3933, 3.7590]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 1: tensor([[-1.5531, 1.7190]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 1: tensor([[-1.5917, 1.7620]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 1: tensor([[ 4.7569, -5.2631]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 1: tensor([[ 4.7569, -5.2630]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 1: tensor([[-4.5033, 4.9768]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 1: tensor([[-4.5029, 4.9765]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 1: tensor([[ 4.7639, -5.2653]])\n", "Chunk shape: (160000,)\n", "Logits for chunk 1: tensor([[ 4.7639, -5.2653]])\n", "{'file': 'human voice 1 to mr beast.mp3', 'predicted_class': 'Real'}\n", "{'file': 'human voice 1 to mr beast.wav', 'predicted_class': 'Real'}\n", "{'file': 'human voice 1.mp3', 'predicted_class': 'Real'}\n", "{'file': 'human voice 1.wav', 'predicted_class': 'Real'}\n", "{'file': 'human voice 2 to Jett.mp3', 'predicted_class': 'Fake'}\n", "{'file': 'human voice 2 to Jett.wav', 'predicted_class': 'Fake'}\n", "{'file': 'human voice 2.mp3', 'predicted_class': 'Real'}\n", "{'file': 'human voice 2.wav', 'predicted_class': 'Real'}\n", "{'file': 'text to audio jett.mp3', 'predicted_class': 'Fake'}\n", "{'file': 'text to audio jett.wav', 'predicted_class': 'Fake'}\n" ] } ], "source": [ "import os\n", "\n", "def batch_predict(test_folder, limit=10):\n", " \"\"\"\n", " Batch processes audio files for predictions.\n", "\n", " Args:\n", " test_folder (str): Path to the folder containing audio files.\n", " limit (int): Maximum number of files to process. Set to None for all files.\n", "\n", " Returns:\n", " list: A list of dictionaries containing file names and predicted classes.\n", " \"\"\"\n", " results = []\n", " files = os.listdir(test_folder)\n", "\n", " # Limit the number of files processed if a limit is provided\n", " if limit is not None:\n", " files = files[:limit]\n", "\n", " # Process each file in the folder\n", " for file_name in files:\n", " file_path = os.path.join(test_folder, file_name)\n", " try:\n", " predicted_class = predict_audio(file_path) # Use the predict_audio function\n", " results.append({\"file\": file_name, \"predicted_class\": predicted_class})\n", " except Exception as e:\n", " print(f\"Error processing {file_name}: {e}\")\n", " \n", " return results\n", "\n", "# Specify the folder path and limit\n", "test_folder = r\"D:\\Year 3 Sem 2\\Godamlah\\Deepfake\\deepfake model ver3\\data\\real life test audio\" # Replace with your test folder path\n", "results = batch_predict(test_folder, limit=10)\n", "\n", "# Print results\n", "for result in results:\n", " print(result)\n" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "479899631f95453e9f82355f7511cff3", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/580 [00:00