{ "cells": [ { "metadata": {}, "cell_type": "markdown", "source": "Audio", "id": "8b8c1a352260e82a" }, { "cell_type": "code", "id": "initial_id", "metadata": { "collapsed": true, "ExecuteTime": { "end_time": "2025-06-10T09:38:10.760409Z", "start_time": "2025-06-10T09:38:10.617508Z" } }, "source": [ "from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor\n", "import torch\n", "import torchaudio.transforms as T\n", "import pydub\n", "import numpy as np" ], "outputs": [], "execution_count": 5 }, { "metadata": { "ExecuteTime": { "end_time": "2025-06-10T09:43:53.684713Z", "start_time": "2025-06-10T09:43:53.681866Z" } }, "cell_type": "code", "source": [ "# CONSTANTS\n", "audio_model_dir = './models_for_proj/wav2vec2-base-960h'\n", "\n", "# audio_dir = 'files/1f975693-876d-457b-a649-393859e79bf3.mp3'\n", "audio_dir = 'files/99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3'" ], "id": "3ee50d096b2c9d44", "outputs": [], "execution_count": 19 }, { "metadata": { "ExecuteTime": { "end_time": "2025-06-10T09:43:54.053411Z", "start_time": "2025-06-10T09:43:54.006676Z" } }, "cell_type": "code", "source": [ "\n", "model = Wav2Vec2ForCTC.from_pretrained(audio_model_dir)\n", "processor = Wav2Vec2Processor.from_pretrained(audio_model_dir)" ], "id": "b51a485af7b9cf14", "outputs": [], "execution_count": 20 }, { "metadata": { "ExecuteTime": { "end_time": "2025-06-10T09:43:54.603559Z", "start_time": "2025-06-10T09:43:54.414677Z" } }, "cell_type": "code", "source": [ "def read_mp3(f, normalized=False):\n", " \"\"\"Read MP3 file to numpy array.\"\"\"\n", " a = pydub.AudioSegment.from_mp3(f)\n", " y = np.array(a.get_array_of_samples())\n", " if a.channels == 2:\n", " y = y.reshape((-1, 2))\n", " if normalized:\n", " return a.frame_rate, np.float32(y) / 2**15\n", " else:\n", " return a.frame_rate, y\n", "\n", "# Usage\n", "audio_input_sr, audio_input_np = read_mp3(audio_dir)" ], "id": "ac7e2b43ace4d232", "outputs": [], "execution_count": 21 }, { "metadata": { "ExecuteTime": { "end_time": "2025-06-10T09:43:56.920665Z", "start_time": "2025-06-10T09:43:56.244101Z" } }, "cell_type": "code", "source": [ "# --------------------------------------------------------------------------- #\n", "# audio_input_sr, audio_input_np = audio_input\n", "audio_input_t = torch.tensor(audio_input_np, dtype=torch.float32)\n", "target_sr = 16000\n", "resampler = T.Resample(audio_input_sr, target_sr, dtype=audio_input_t.dtype)\n", "resampled_audio_input_t: torch.Tensor = resampler(audio_input_t)\n", "resampled_audio_input_np = resampled_audio_input_t.numpy()\n", "# --------------------------------------------------------------------------- #\n", "# result = asr_pipe_default(resampled_audio_input_np)\n", "inputs = processor(resampled_audio_input_np, sampling_rate=16000, return_tensors=\"pt\", padding=True)\n", "# Inference\n", "with torch.no_grad():\n", " logits = model(**inputs).logits\n", "# Decode\n", "predicted_ids = torch.argmax(logits, dim=-1)\n", "transcription = processor.decode(predicted_ids[0])\n", "# print(\"Transcription:\", transcription)\n", "transcription" ], "id": "2a4738e9d038985", "outputs": [ { "data": { "text/plain": [ "'IN A SAUCEPAN COMBINE RIPE STRAWBERRIES GRANULATED SUGAR FRESHLY SQUEEZED LEMON JUICE AND CORNSTARCH COOK THE MIXTURE OF A MEDIUM HEAT STIRRING CONSTANTLY UNTIL IT THICKENS TO A SMOOTH CONSISTENCY REMOVE FROM HEAT AND STIR IN A DASH OF PURE VANILLA EXTRACT ALLOW THE STRAWBERRY PIE FEELING TO COOL BEFORE USING IT AS A DELICIOUS AND FRUITY FILLING FOR YOUR PIE CRUST'" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "execution_count": 22 }, { "metadata": {}, "cell_type": "code", "outputs": [], "execution_count": null, "source": "", "id": "f159c2955f140600" } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 5 }