{ "cells": [ { "cell_type": "code", "id": "6369bdabdf59b658", "metadata": { "ExecuteTime": { "end_time": "2025-06-10T20:28:20.833977Z", "start_time": "2025-06-10T20:28:14.932967Z" } }, "source": [ "from langchain.document_loaders import YoutubeLoader\n", "from langchain_yt_dlp.youtube_loader import YoutubeLoaderDL\n", "from globals import *\n", "import torch\n", "import torchaudio.transforms as T\n", "import pydub\n", "import numpy as np\n", "from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor" ], "outputs": [], "execution_count": 33 }, { "metadata": { "ExecuteTime": { "end_time": "2025-06-10T20:05:04.414620Z", "start_time": "2025-06-10T20:05:04.412354Z" } }, "cell_type": "code", "source": "url = \"https://www.youtube.com/watch?v=1htKBjuUWec\"\n", "id": "666e521f8ecf3f47", "outputs": [], "execution_count": 14 }, { "metadata": { "ExecuteTime": { "end_time": "2025-06-10T20:26:57.491908Z", "start_time": "2025-06-10T20:26:57.489481Z" } }, "cell_type": "code", "source": [ "# Load transcript as LangChain Documents\n", "# loader = YoutubeLoader.from_youtube_url(url, add_video_info=False)\n", "# loader = YoutubeLoaderDL.from_youtube_url(\n", "# url, add_video_info=True\n", "# )\n", "# docs = loader.load()\n", "#\n", "# # Print the transcript content\n", "# for doc in docs:\n", "# print(doc.page_content)\n", "\n", "# Optionally, save to a file\n", "# with open(\"transcript.txt\", \"w\", encoding=\"utf-8\") as f:\n", "# for doc in docs:\n", "# f.write(doc.page_content)" ], "id": "initial_id", "outputs": [], "execution_count": 30 }, { "metadata": { "ExecuteTime": { "end_time": "2025-06-10T20:27:03.519450Z", "start_time": "2025-06-10T20:27:03.517474Z" } }, "cell_type": "code", "source": [ "# from pytube import YouTube\n", "#\n", "# yt = YouTube(url)\n", "#\n", "# # Download the audio stream (usually mp4)\n", "# stream = yt.streams.filter(only_audio=True).first()\n", "# stream.download(firstilename=f\"{yt.title}.mp3\")" ], "id": "ec4885c3a15d9a2b", "outputs": [], "execution_count": 31 }, { "metadata": { "ExecuteTime": { "end_time": "2025-06-10T20:04:18.601366Z", "start_time": "2025-06-10T20:04:18.597488Z" } }, "cell_type": "code", "source": [ "import ssl\n", "import certifi\n", "# Correct: assign a lambda (function) that returns a properly configured SSL context\n", "ssl._create_default_https_context = lambda: ssl.create_default_context(cafile=certifi.where())" ], "id": "167af702547c15e4", "outputs": [], "execution_count": 12 }, { "metadata": { "ExecuteTime": { "end_time": "2025-06-10T20:07:53.667018Z", "start_time": "2025-06-10T20:07:52.627871Z" } }, "cell_type": "code", "source": [ "\n", "from pytube import YouTube\n", "\n", "def download_video(url, output_path='.'):\n", " try:\n", " yt = YouTube(url)\n", " print('here')\n", " stream = yt.streams.get_highest_resolution()\n", " print(f\"Downloading: {yt.title}\")\n", " stream.download(output_path=output_path)\n", " print(\"Download completed.\")\n", " except Exception as e:\n", " print(f\"Error: {e}\")\n", "\n", "# Example usage\n", "download_video(url)" ], "id": "289b9a4321ea487b", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "here\n", "Error: HTTP Error 400: Bad Request\n" ] } ], "execution_count": 23 }, { "metadata": { "ExecuteTime": { "end_time": "2025-06-10T20:08:47.590897Z", "start_time": "2025-06-10T20:08:44.115350Z" } }, "cell_type": "code", "source": [ "import yt_dlp\n", "\n", "ydl_opts = {\n", " 'format': 'best', # or 'bestvideo+bestaudio'\n", " 'outtmpl': '%(title)s.%(ext)s', # save as video title\n", "}\n", "\n", "with yt_dlp.YoutubeDL(ydl_opts) as ydl:\n", " ydl.download([url])" ], "id": "4eb045792318e67a", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[youtube] Extracting URL: https://www.youtube.com/watch?v=1htKBjuUWec\n", "[youtube] 1htKBjuUWec: Downloading webpage\n", "[youtube] 1htKBjuUWec: Downloading tv client config\n", "[youtube] 1htKBjuUWec: Downloading tv player API JSON\n", "[youtube] 1htKBjuUWec: Downloading ios player API JSON\n", "[youtube] 1htKBjuUWec: Downloading m3u8 information\n", "[info] 1htKBjuUWec: Downloading 1 format(s): 18\n", "[download] Destination: Teal'c coffee first time.mp4\n", "[download] 100% of 1.19MiB in 00:00:01 at 1.09MiB/s \n" ] } ], "execution_count": 24 }, { "metadata": { "ExecuteTime": { "end_time": "2025-06-10T20:22:32.706482Z", "start_time": "2025-06-10T20:22:29.843517Z" } }, "cell_type": "code", "source": [ "import yt_dlp\n", "\n", "file_name = 'my_audio_file'\n", "ydl_opts = {\n", " 'format': 'bestaudio/best',\n", " 'outtmpl': f'files/{file_name}.%(ext)s', # <-- set your custom filename here\n", " 'postprocessors': [{\n", " 'key': 'FFmpegExtractAudio',\n", " 'preferredcodec': 'mp3',\n", " 'preferredquality': '192',\n", " }],\n", "}\n", "\n", "with yt_dlp.YoutubeDL(ydl_opts) as ydl:\n", " ydl.download([url])" ], "id": "68b51ca78254d8f", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[youtube] Extracting URL: https://www.youtube.com/watch?v=1htKBjuUWec\n", "[youtube] 1htKBjuUWec: Downloading webpage\n", "[youtube] 1htKBjuUWec: Downloading tv client config\n", "[youtube] 1htKBjuUWec: Downloading tv player API JSON\n", "[youtube] 1htKBjuUWec: Downloading ios player API JSON\n", "[youtube] 1htKBjuUWec: Downloading m3u8 information\n", "[info] 1htKBjuUWec: Downloading 1 format(s): 251\n", "[download] Destination: files/my_audio_file.webm\n", "[download] 100% of 444.93KiB in 00:00:00 at 3.41MiB/s \n", "[ExtractAudio] Destination: files/my_audio_file.mp3\n", "Deleting original file files/my_audio_file.webm (pass -k to keep)\n" ] } ], "execution_count": 26 }, { "metadata": { "ExecuteTime": { "end_time": "2025-06-10T20:35:23.558866Z", "start_time": "2025-06-10T20:35:23.515883Z" } }, "cell_type": "code", "source": [ "audio_model_dir = './models_for_proj/wav2vec2-base-960h'\n", "model = Wav2Vec2ForCTC.from_pretrained(audio_model_dir)\n", "processor = Wav2Vec2Processor.from_pretrained(audio_model_dir)\n", "\n", "def read_mp3(f, normalized=False):\n", " \"\"\"Read MP3 file to numpy array.\"\"\"\n", " a = pydub.AudioSegment.from_mp3(f)\n", " y = np.array(a.get_array_of_samples())\n", " if a.channels == 2:\n", " y = y.reshape((-1, 2))\n", " # y = y.mean(axis=1)\n", " y = y[:,1]\n", " if normalized:\n", " return a.frame_rate, np.float32(y) / 2**15\n", " else:\n", " return a.frame_rate, y\n", "\n", "def describe_audio_tool(file_name: str) -> str:\n", " \"\"\"\n", " This tool receives a file name of an audio, uploads the audio and returns a detailed description of the audio.\n", " Inputs: file_name as str\n", " Outputs: audio detailed description as str\n", " \"\"\"\n", " # --------------------------------------------------------------------------- #\n", " file_dir = f'files/{file_name}'\n", " print(f\"{file_dir=}\")\n", " audio_input_sr, audio_input_np = read_mp3(file_dir)\n", " audio_input_t = torch.tensor(audio_input_np, dtype=torch.float32)\n", " target_sr = 16000\n", " resampler = T.Resample(audio_input_sr, target_sr, dtype=audio_input_t.dtype)\n", " resampled_audio_input_t: torch.Tensor = resampler(audio_input_t)\n", " resampled_audio_input_np = resampled_audio_input_t.numpy()\n", " # --------------------------------------------------------------------------- #\n", " inputs = processor(resampled_audio_input_np, sampling_rate=16000, return_tensors=\"pt\", padding=True)\n", " # Inference\n", " with torch.no_grad():\n", " logits = model(**inputs).logits\n", " # Decode\n", " predicted_ids = torch.argmax(logits, dim=-1)\n", " transcription = processor.decode(predicted_ids[0])\n", " return transcription" ], "id": "64f438af2b38765f", "outputs": [], "execution_count": 43 }, { "metadata": { "ExecuteTime": { "end_time": "2025-06-10T20:35:27.235493Z", "start_time": "2025-06-10T20:35:26.202459Z" } }, "cell_type": "code", "source": "describe_audio_tool(file_name=f'{file_name}.mp3')", "id": "b4a6ae10e1cbbcae", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "file_dir='files/my_audio_file.mp3'\n" ] }, { "data": { "text/plain": [ "\"ALIS COFFEE'S GRAY WO IS JUST THINKING YET HE TAT SOMESCHICKERY A CHIC TEK H IS NOT HOT EXTREMELY\"" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "execution_count": 44 }, { "metadata": {}, "cell_type": "code", "outputs": [], "execution_count": null, "source": "", "id": "ce9aaf764346b7e4" } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 5 }