samir72 commited on
Commit
77d5f0d
·
1 Parent(s): c54230f

Adding Azure Container App Under New Folder Name

Browse files
Youtubetranscription_summarizer.py CHANGED
@@ -3,6 +3,7 @@ from pathlib import Path
3
  from typing import Optional, Callable, Any
4
  import yt_dlp
5
  from faster_whisper import WhisperModel
 
6
 
7
 
8
  def main(url:str):
@@ -18,8 +19,6 @@ def main(url:str):
18
  #Summarize the transcript using Phi
19
  return transcript
20
 
21
- import socket
22
-
23
  def nslookup(domain):
24
  try:
25
  # Perform DNS lookup for the domain
 
3
  from typing import Optional, Callable, Any
4
  import yt_dlp
5
  from faster_whisper import WhisperModel
6
+ import socket
7
 
8
 
9
  def main(url:str):
 
19
  #Summarize the transcript using Phi
20
  return transcript
21
 
 
 
22
  def nslookup(domain):
23
  try:
24
  # Perform DNS lookup for the domain
app.py CHANGED
@@ -9,7 +9,7 @@ from openai import AzureOpenAI # official OpenAI SDK, works with Azure endpoint
9
  import json
10
  import subprocess
11
  import Youtubetranscription_summarizer
12
- from app.app.Youtubeextraction import extract # Youtube download helper functions
13
  #from pydantic import BaseModel, AnyUrl # Pydantic models for request validation in yiutube extraction
14
  #from fastapi import FastAPI, HTTPException # FastAPI for building the API
15
  #app = FastAPI() ## Initialize FastAPI app for testing in local
 
9
  import json
10
  import subprocess
11
  import Youtubetranscription_summarizer
12
+ from extract.app.Youtubeextraction import extract # Youtube download helper functions
13
  #from pydantic import BaseModel, AnyUrl # Pydantic models for request validation in yiutube extraction
14
  #from fastapi import FastAPI, HTTPException # FastAPI for building the API
15
  #app = FastAPI() ## Initialize FastAPI app for testing in local
extract/.DS_Store ADDED
Binary file (6.15 kB). View file
 
extract/Dockerfile ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ---------- Base ----------
2
+ #FROM python:3.11-slim
3
+ FROM cab337fa40e5acr.azurecr.io/python:3.11-slim
4
+
5
+ # ---------- System deps ----------
6
+ RUN apt-get update && apt-get install -y --no-install-recommends \
7
+ ffmpeg ca-certificates curl \
8
+ && rm -rf /var/lib/apt/lists/*
9
+
10
+ # ---------- Workdir ----------
11
+ WORKDIR /workspace
12
+
13
+ # ---------- Python deps ----------
14
+ # requirements.txt is at AUDIOSUMMARIZER/extract/requirements.txt
15
+
16
+ COPY app/requirements.txt .
17
+ RUN pip install --no-cache-dir --upgrade pip \
18
+ && pip install --no-cache-dir -r requirements.txt
19
+
20
+ # ---------- App code ----------
21
+ # Copy EVERYTHING under AUDIOSUMMARIZER/extract (includes subfolders: app/ and utils/)
22
+ COPY . /workspace/extract
23
+
24
+ # Make /workspace importable so "extract.app.Youtubeextraction" & "app.utils..." work
25
+ ENV PYTHONPATH=/workspace
26
+
27
+ # Runtime env (override at deploy)
28
+ ENV HOST=0.0.0.0
29
+ ENV PORT=8080
30
+ ENV AZURE_STORAGE_ACCOUNT=__SET_AT_DEPLOY__
31
+ ENV AZURE_BLOB_CONTAINER=__SET_AT_DEPLOY__
32
+
33
+ EXPOSE 8080
34
+
35
+ # Your ASGI app is defined in extract/app/Youtubeextraction.py as `app = FastAPI()`
36
+ CMD ["uvicorn", "extract.app.Youtubeextraction:app", "--host", "0.0.0.0", "--port", "8080"]
extract/__init__.py ADDED
File without changes
extract/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (153 Bytes). View file
 
extract/app/.DS_Store ADDED
Binary file (6.15 kB). View file
 
extract/app/Youtubeextraction.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, tempfile, subprocess, re, json, shutil, time
2
+ from fastapi import FastAPI, HTTPException
3
+ from pathlib import Path
4
+ from typing import Optional, Callable, Any
5
+ import yt_dlp
6
+ # from utils.storage import upload_and_sign # To remove circular import issue
7
+ from extract.utils.storage import upload_and_sign # To remove circular import issue
8
+ from extract.utils.retrieve_filepath import retrieve_file_path # To get the file path of cookies.txt
9
+
10
+ app = FastAPI()
11
+
12
+ def ensure_ffmpeg():
13
+ """
14
+ Verify that ffmpeg is available in PATH.
15
+ Raises RuntimeError with helpful guidance if missing.
16
+ Prints ffmpeg version to logs if found.
17
+ """
18
+ ffmpeg_path = shutil.which("ffmpeg")
19
+ if ffmpeg_path is None:
20
+ raise RuntimeError(
21
+ "FFmpeg not found in PATH.\n\n"
22
+ "👉 For Hugging Face Spaces:\n"
23
+ " • If using Gradio/Streamlit template → add a `packages.txt` file at repo root with a line: ffmpeg\n"
24
+ " • If using Docker template → add `apt-get install -y ffmpeg` in your Dockerfile\n\n"
25
+ "Without ffmpeg, yt-dlp cannot extract/convert audio."
26
+ )
27
+
28
+ try:
29
+ result = subprocess.run(
30
+ ["ffmpeg", "-version"],
31
+ stdout=subprocess.PIPE,
32
+ stderr=subprocess.STDOUT,
33
+ text=True,
34
+ check=False,
35
+ )
36
+ print("✅ ffmpeg found at:", ffmpeg_path)
37
+ print(result.stdout.splitlines()[0]) # show first line of version info
38
+ except Exception as e:
39
+ raise RuntimeError(f"ffmpeg was found at {ffmpeg_path} but could not run: {e}")
40
+
41
+ class YTDLPError(RuntimeError):
42
+ pass
43
+
44
+ def _require(bin_name: str):
45
+ if shutil.which(bin_name) is None:
46
+ raise YTDLPError(f"Required executable '{bin_name}' not found in PATH.")
47
+
48
+
49
+ @app.get("/health")
50
+ def health():
51
+ return {"ok": True}
52
+
53
+ @app.post("/extract")
54
+ def extract(
55
+ youtube_url: str,
56
+ out_dir: Optional[str] = None,
57
+ target_sr: int = 16000,
58
+ target_channels: int = 1,
59
+ quiet: bool = True,
60
+ keep_intermediate: bool = False,
61
+ progress_hook: Optional[Callable[[dict[str, Any]], None]] = None,
62
+ ) -> str:
63
+ """
64
+ Download YouTube audio via yt_dlp's Python API, extract to WAV,
65
+ and post-process with ffmpeg to 16 kHz mono. Returns path to the final WAV.
66
+
67
+ Args
68
+ ----
69
+ youtube_url : str
70
+ out_dir : Optional[str] Directory for outputs (temp dir if None).
71
+ target_sr : int Sample rate for final WAV (default 16000).
72
+ target_channels : int Channels for final WAV (default 1 = mono).
73
+ quiet : bool Suppress yt-dlp logs if True.
74
+ keep_intermediate : bool Keep the pre-downsampled WAV if True.
75
+ progress_hook : callable Optional yt-dlp progress hook.
76
+
77
+ Raises
78
+ ------
79
+ YTDLPError on failure.
80
+ """
81
+ if not youtube_url or not isinstance(youtube_url, str):
82
+ raise ValueError("youtube_url must be a non-empty string.")
83
+
84
+ _require("ffmpeg") # we call ffmpeg ourselves
85
+ # yt-dlp bundles ffmpeg via postprocessors, but we still run ffmpeg explicitly
86
+
87
+ work_dir = Path(out_dir or tempfile.mkdtemp(prefix="ytwav_")).resolve()
88
+ work_dir.mkdir(parents=True, exist_ok=True)
89
+
90
+ # First stage: let yt-dlp extract WAV (whatever SR/channels)
91
+ out_template = str(work_dir / "%(title).100B [%(id)s].%(ext)s")
92
+ hooks = [progress_hook] if progress_hook else []
93
+ ### Use cookies.txt if available
94
+ cookies_path = retrieve_file_path("cookies.txt")
95
+ #cookies_path = "./app/utils/cookies.txt"
96
+ if not cookies_path:
97
+ cookies_path = None
98
+ print("Cookie file NOT found in container!")
99
+ return f"User authentication cookie file NOT found in container! Please try again later."
100
+
101
+ ydl_opts = {
102
+ "cookiefile": cookies_path,
103
+ "format": "bestaudio/best",
104
+ "outtmpl": out_template,
105
+ "noplaylist": True,
106
+ "postprocessors": [
107
+ {
108
+ "key": "FFmpegExtractAudio",
109
+ "preferredcodec": "wav",
110
+ "preferredquality": "0",
111
+ }
112
+ ],
113
+ "quiet": quiet,
114
+ "verbose": not quiet,
115
+ "no_warnings": quiet,
116
+ "progress_hooks": hooks,
117
+ }
118
+
119
+ try:
120
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
121
+ ydl.extract_info(youtube_url, download=True)
122
+ except Exception as e:
123
+ #raise YTDLPError(f"yt-dlp API failed: {e}") from e
124
+ return f"yt-dlp API failed: {e}"
125
+
126
+ # Locate the produced WAV (pre-downsampled)
127
+ pre_wavs = list(work_dir.glob("*.wav"))
128
+ if not pre_wavs:
129
+ #raise YTDLPError("yt-dlp completed but no WAV was found.")
130
+ return "yt-dlp completed but no WAV was found."
131
+ pre_wav = max(pre_wavs, key=lambda p: p.stat().st_mtime)
132
+
133
+ # Second stage: force 16 kHz mono via ffmpeg
134
+ final_wav = pre_wav.with_name(pre_wav.stem + f".{target_sr}Hz.{target_channels}ch.wav")
135
+ try:
136
+ subprocess.run(
137
+ [
138
+ "ffmpeg", "-y",
139
+ "-i", str(pre_wav),
140
+ "-ac", str(target_channels),
141
+ "-ar", str(target_sr),
142
+ str(final_wav),
143
+ ],
144
+ check=True,
145
+ stdout=subprocess.PIPE if quiet else None,
146
+ stderr=subprocess.PIPE if quiet else None,
147
+ text=True,
148
+ )
149
+ except subprocess.CalledProcessError as e:
150
+ #raise YTDLPError(f"ffmpeg failed to resample: {e.stderr or e.stdout}") from e
151
+ return f"ffmpeg failed to resample: {e.stderr or e.stdout}"
152
+
153
+ # 3) upload + sign (short-lived)
154
+ signed = upload_and_sign(final_wav, ttl_minutes=45)
155
+
156
+ # Clean up intermediates if desired
157
+ if not keep_intermediate:
158
+ try:
159
+ if pre_wav.exists() and pre_wav != final_wav:
160
+ pre_wav.unlink()
161
+ except Exception:
162
+ pass
163
+
164
+ return signed
extract/app/__init__.py ADDED
File without changes
extract/app/__pycache__/Youtubeextraction.cpython-313.pyc ADDED
Binary file (7.03 kB). View file
 
extract/app/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (157 Bytes). View file
 
extract/app/requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ dotenv==0.9.9
2
+ requests==2.32.5
3
+ azure-identity==1.25.0
4
+ yt_dlp==2025.9.23
5
+ fastapi
6
+ uvicorn[standard]==0.30.6
7
+ azure-storage-blob==12.20.0
extract/utils/__init__.py ADDED
File without changes
extract/utils/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (149 Bytes). View file
 
extract/utils/__pycache__/retrieve_filepath.cpython-313.pyc ADDED
Binary file (890 Bytes). View file
 
extract/utils/__pycache__/storage.cpython-313.pyc ADDED
Binary file (2.72 kB). View file
 
extract/utils/cookies.txt ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Netscape HTTP Cookie File
2
+ # http://curl.haxx.se/rfc/cookie_spec.html
3
+ # This is a generated file! Do not edit.
4
+
5
+ .google.com TRUE /verify TRUE 1774643801 SNID ABablneQpL5nd50lg171XcyVs24gvpO4-3XC33KqvJzhOyKzSjsRCTu2WNs98YrIa4rthJXC9umV_gfOPujCE8rpvEpn3J6IGBQ
6
+ .youtube.com TRUE / TRUE 1793417029 PREF f4=4000000&tz=America.Los_Angeles
7
+ accounts.google.com FALSE / TRUE 1761424715 OTZ 8275479_84_88_104280_84_446940
8
+ accounts.google.com FALSE / TRUE 1793392730 __Host-GAPS 1:yqAYuvXMLM6p418ku6ftwLAiw66lW5JDAUnpfgajBCLlPRefQ12FH4OpX-afmjZbWzdGaHl9_g2zQNGp0TzWzmv9LvMnGA:-kKVsK7giPvGw84Q
9
+ .google.com TRUE / TRUE 1774384731 AEC AaJma5tejBcPWOwI5osyzZtLbsylZCmSH2lz0zhEN1Q4WqoOtfvlBhXmMA
10
+ ogs.google.com FALSE / TRUE 1761424734 OTZ 8275479_84_88_104280_84_446940
11
+ myaccount.google.com FALSE / TRUE 1793398313 OSID g.a0001wjtWOu_WLZdXb5Jtdi-tk_TQCulwBmAfNO6NopkIFhHEeO0oDV7zmz7iijIbHt8FFlaFwACgYKAU8SARYSFQHGX2Mic1nR4MKxTRPME0LtDGY7ZhoVAUF8yKpfjouTWz97PA6bCvAoOqoA0076
12
+ myaccount.google.com FALSE / TRUE 1793398313 __Secure-OSID g.a0001wjtWOu_WLZdXb5Jtdi-tk_TQCulwBmAfNO6NopkIFhHEeO0iVJNUJbpqueptsN8uW4S1QACgYKAR8SARYSFQHGX2Mi9FkBlKv3anwP8McJ3zPnnhoVAUF8yKp4GG9a4Pu8vHbyDK9x6Kq80076
13
+ www.google.com FALSE / TRUE 1761430314 OTZ 8275572_84_88_104280_84_446940
14
+ myaccount.google.com FALSE / TRUE 1761430314 OTZ 8275572_84_88_104280_84_446940
15
+ .youtube.com TRUE / TRUE 1758857100 GPS 1
16
+ .google.com TRUE / TRUE 1774643802 NID 525=lymNXAPmK1skz9GhpzWLwdQ4iJoD_pERbGKpfT4LxvfYHTFIl9pBIqK3euxYSbPLgHqkjkX7lmdbG-qobNxdcy3yrW8AmU_Vb6bNJeMTUYMMguEI51UHhtt8ykJedgnEModYiZs066xs9C3wcJEI_p77uKGD-5VZ4MNhZ-hwXOtJKDJmhotLjYezs57AnN2sFSWyqKGnaeJddliiW2KlW0eIJ7YVj9d6v1jGjGnMWFZM2-Xt_n2Z7J3_M3U-ylF3D97l8UKFzBrriUY5wkaZdLhkeTmWrz7MhEPWczFhXATk7N46sIdCBM5SxkPConsz7J5AR-UojRMysZo1S66PF7D5KoVcocPocZdxFHzJNoIEkOu7M-mwqdq3ployT6_6k_01YxGLxJPbLbpBjPBKVvh8fllXPratnm0t3tweLwCPy6sToVfv5xHhmHDNHgvToX8eXztvs1_CmWDzuKv9jjYPOTn_8BVejiacNs3GxbouJT81Kd7nwo2aOPDub61EoPjjz8Xnyl5n9RuS103e8PlzQzVroNdApo_oJ9kuEtNCqz_P8VTugVvCyix322EcvN1bVkoQuLqvmRdvf4-56tBaFU24pPW7cL2Nh5-qJM4Ce7btfNsFg5dj6KfhzXphgttNvyLtoyUxKz1ImuWRe8E_I4kMhNNsq-0G3O2-0pzr2xUK7cc
17
+ .google.com TRUE / FALSE 1793417027 SID g.a0001wjtWBkJ4wJeFkKN60XaQWJkpcU7XbGG74H7lkeAwdsDS1lzDYRSn4XTSQZLLOb_8cP0wAACgYKAa8SARYSFQHGX2MishTu2aIUO6jMkaY69qI5whoVAUF8yKrlVW_5LLcbhY-z_VWKPKK-0076
18
+ .google.com TRUE / TRUE 1793417027 __Secure-1PSID g.a0001wjtWBkJ4wJeFkKN60XaQWJkpcU7XbGG74H7lkeAwdsDS1lzYEk4ss5Ga5FZxA026Q3PowACgYKATQSARYSFQHGX2MiH5U4v4vLtLlQfSNMFvyerhoVAUF8yKoIiEMKUn6q2zQUoTesawGY0076
19
+ .google.com TRUE / TRUE 1793417027 __Secure-3PSID g.a0001wjtWBkJ4wJeFkKN60XaQWJkpcU7XbGG74H7lkeAwdsDS1lzPmwdT2LALENSFlwOvA6kdwACgYKAc0SARYSFQHGX2MiLys0aIcNHCDTZHKBpoDsfxoVAUF8yKqVGUVzQfVK3kw47eGoIzXE0076
20
+ .google.com TRUE / FALSE 1793417027 HSID Aa30XYEO0EbwBslvG
21
+ .google.com TRUE / TRUE 1793417027 SSID Ax01UBs9y7oEuhwW9
22
+ .google.com TRUE / FALSE 1793417027 APISID MqqEFVyUEaf_9Z6J/A8x1V3btbuj80Nf_S
23
+ .google.com TRUE / TRUE 1793417027 SAPISID ssGUmbdd194RXtKC/A19abMHPKJF5UjZsB
24
+ .google.com TRUE / TRUE 1793417027 __Secure-1PAPISID ssGUmbdd194RXtKC/A19abMHPKJF5UjZsB
25
+ .google.com TRUE / TRUE 1793417027 __Secure-3PAPISID ssGUmbdd194RXtKC/A19abMHPKJF5UjZsB
26
+ accounts.google.com FALSE / TRUE 1793417027 LSID s.youtube:g.a0001wjtWMIdCQ-G309xZtzMVmnz6HUCwFuLTNrsZr0ZP9xRUC40uL024qAHfEnmmAJQXw1TCQACgYKAXYSARYSFQHGX2MiPT4JjSx4k5O4c3Fe0nJ1yRoVAUF8yKrm6qJ7ebQOMCPIHCecZ7Sm0076
27
+ accounts.google.com FALSE / TRUE 1793417027 __Host-1PLSID s.youtube:g.a0001wjtWMIdCQ-G309xZtzMVmnz6HUCwFuLTNrsZr0ZP9xRUC40co5eg0gt5YUk79mUm-eB3gACgYKAc0SARYSFQHGX2Mix_fLnTRc4lvYYQiZJeI2GBoVAUF8yKpOc_BvpJh6XPy5h9Njy3-p0076
28
+ accounts.google.com FALSE / TRUE 1793417027 __Host-3PLSID s.youtube:g.a0001wjtWMIdCQ-G309xZtzMVmnz6HUCwFuLTNrsZr0ZP9xRUC40XpQEuTNC0_9mqDKFpJy9_wACgYKAZ0SARYSFQHGX2Mi1L5Qz2QaGQn5VJ-zRWaa_BoVAUF8yKqAKBIht30fkTzaBiDEcAMW0076
29
+ accounts.google.com FALSE / TRUE 1793417027 ACCOUNT_CHOOSER AFx_qI6mc8m87lZBbCrGpqKQwnCNaKnXceLmYwJRLLzJlSFupCSDJdMTV86dVAnSZ0t_TXcGfBow7dLJCzDCW42iFH164e-j9QR1d1m_ugn8s59IbdZWNfMaBh79fRp6FJ0eRTxlzJBVHLLeYbj5rdEoGCJKNUTAZw
30
+ .youtube.com TRUE / TRUE 1790393027 __Secure-1PSIDTS sidts-CjQBmkD5SxPGxTN9xFQqR0jIKppCWt45MEJ_XcwQnQDw_nfj7DONF47xy8VFb4XVw_bkTiroEAA
31
+ .youtube.com TRUE / TRUE 1790393027 __Secure-3PSIDTS sidts-CjQBmkD5SxPGxTN9xFQqR0jIKppCWt45MEJ_XcwQnQDw_nfj7DONF47xy8VFb4XVw_bkTiroEAA
32
+ .youtube.com TRUE / FALSE 1793417027 HSID AiEF6eJ88swa_qwf5
33
+ .youtube.com TRUE / TRUE 1793417027 SSID ARPWgjWmCWIdlgQd3
34
+ .youtube.com TRUE / FALSE 1793417027 APISID MqqEFVyUEaf_9Z6J/A8x1V3btbuj80Nf_S
35
+ .youtube.com TRUE / TRUE 1793417027 SAPISID ssGUmbdd194RXtKC/A19abMHPKJF5UjZsB
36
+ .youtube.com TRUE / TRUE 1793417027 __Secure-1PAPISID ssGUmbdd194RXtKC/A19abMHPKJF5UjZsB
37
+ .youtube.com TRUE / TRUE 1793417027 __Secure-3PAPISID ssGUmbdd194RXtKC/A19abMHPKJF5UjZsB
38
+ .youtube.com TRUE / FALSE 1793417027 SID g.a0001wjtWBkJ4wJeFkKN60XaQWJkpcU7XbGG74H7lkeAwdsDS1lzDYRSn4XTSQZLLOb_8cP0wAACgYKAa8SARYSFQHGX2MishTu2aIUO6jMkaY69qI5whoVAUF8yKrlVW_5LLcbhY-z_VWKPKK-0076
39
+ .youtube.com TRUE / TRUE 1793417027 __Secure-1PSID g.a0001wjtWBkJ4wJeFkKN60XaQWJkpcU7XbGG74H7lkeAwdsDS1lzYEk4ss5Ga5FZxA026Q3PowACgYKATQSARYSFQHGX2MiH5U4v4vLtLlQfSNMFvyerhoVAUF8yKoIiEMKUn6q2zQUoTesawGY0076
40
+ .youtube.com TRUE / TRUE 1793417027 __Secure-3PSID g.a0001wjtWBkJ4wJeFkKN60XaQWJkpcU7XbGG74H7lkeAwdsDS1lzPmwdT2LALENSFlwOvA6kdwACgYKAc0SARYSFQHGX2MiLys0aIcNHCDTZHKBpoDsfxoVAUF8yKqVGUVzQfVK3kw47eGoIzXE0076
41
+ .youtube.com TRUE / TRUE 1793417027 LOGIN_INFO AFmmF2swRQIgJ8TM9kR3K0ag_TbOndarfGMHck96yLnOHCeZUsvIge0CIQDOJHH5XfYu6pi_gX59DWxlIGe6LmhKG3Jo_U59eJIPUg:QUQ3MjNmdzFZVHpXQzNoSkgtSE1qbUVpWEMxdGhCMHFyOWNVU2xDYzNpRHMyZ1NKaEtYVUhnR0xKUnNlM2VBaXZGYVNOcnFwZEtvSFVLcmpWUHpBMmZBRWowV1cwMy1BWGNvYXdHMXdsWW5jYURxSkN4TXFVWXZfVDBaRWp2UHpnS2JBWFdJbUVKUmQ3eXhXMjVXZkZRTGdVYUdzYnJKdGxn
42
+ .google.com TRUE / FALSE 1790393027 SIDCC AKEyXzXCX8LKOlj6aDhXUTLs9sKLvpp7Bk1HPBTBzUgUb__cG4l4AOdru_QXF4CRsiGSWsDf
43
+ .google.com TRUE / TRUE 1790393027 __Secure-1PSIDCC AKEyXzUWej-MwEIo72NBDrRlvDNWaURzOvux0NpE52VJktaiwpt8xs8J32FerokUIvoTKJsdug
44
+ .google.com TRUE / TRUE 1790393027 __Secure-3PSIDCC AKEyXzVvkciMQaOGAxypKeCoOSnk5V2AaT18HGMeAVW8dUdcZa0rlq8LjNlXc9wElhzBYzYP8g
45
+ .youtube.com TRUE / FALSE 1790393031 SIDCC AKEyXzUX5vaO9zsgIyoG05GA_Hwz1yDVIHezT92XiXPBbCz-0CpHCIUIB_utitiTsrmnSNfY
46
+ .youtube.com TRUE / TRUE 1790393031 __Secure-1PSIDCC AKEyXzWfwVLeSEf2hjzTYKFhHAZVGoQMult6W6wsogIkCH0cPbIIalZIhmzGmGjVVcBVHocF
47
+ .youtube.com TRUE / TRUE 1790393031 __Secure-3PSIDCC AKEyXzXyCHp-2Hu_GGZdM-oG0Mktt8FwgaE-0NiwdoT572oQ8NYPzM6BIaEsszZveDKJJmd_
48
+ .google.com TRUE /verify TRUE 1774643801 SNID ABablneQpL5nd50lg171XcyVs24gvpO4-3XC33KqvJzhOyKzSjsRCTu2WNs98YrIa4rthJXC9umV_gfOPujCE8rpvEpn3J6IGBQ
49
+ .youtube.com TRUE / TRUE 0 YSC QlLyIMoyoro
50
+ .youtube.com TRUE / TRUE 1774409031 VISITOR_INFO1_LIVE 0Io5PoZoNhQ
51
+ .youtube.com TRUE / TRUE 1774409031 VISITOR_PRIVACY_METADATA CgJVUxIEGgAgXw%3D%3D
52
+ .youtube.com TRUE / TRUE 1793417029 PREF f4=4000000&tz=America.Los_Angeles
53
+ .youtube.com TRUE / TRUE 1774384705 __Secure-ROLLOUT_TOKEN CJvi5d304cLkxQEQ952ItOL0jwMYxOPstOL0jwM%3D
54
+ accounts.google.com FALSE / TRUE 1761424715 OTZ 8275479_84_88_104280_84_446940
55
+ accounts.google.com FALSE / TRUE 1793392730 __Host-GAPS 1:yqAYuvXMLM6p418ku6ftwLAiw66lW5JDAUnpfgajBCLlPRefQ12FH4OpX-afmjZbWzdGaHl9_g2zQNGp0TzWzmv9LvMnGA:-kKVsK7giPvGw84Q
56
+ .google.com TRUE / TRUE 1774384731 AEC AaJma5tejBcPWOwI5osyzZtLbsylZCmSH2lz0zhEN1Q4WqoOtfvlBhXmMA
57
+ ogs.google.com FALSE / TRUE 1761424734 OTZ 8275479_84_88_104280_84_446940
58
+ myaccount.google.com FALSE / TRUE 1793398313 OSID g.a0001wjtWOu_WLZdXb5Jtdi-tk_TQCulwBmAfNO6NopkIFhHEeO0oDV7zmz7iijIbHt8FFlaFwACgYKAU8SARYSFQHGX2Mic1nR4MKxTRPME0LtDGY7ZhoVAUF8yKpfjouTWz97PA6bCvAoOqoA0076
59
+ myaccount.google.com FALSE / TRUE 1793398313 __Secure-OSID g.a0001wjtWOu_WLZdXb5Jtdi-tk_TQCulwBmAfNO6NopkIFhHEeO0iVJNUJbpqueptsN8uW4S1QACgYKAR8SARYSFQHGX2Mi9FkBlKv3anwP8McJ3zPnnhoVAUF8yKp4GG9a4Pu8vHbyDK9x6Kq80076
60
+ www.google.com FALSE / TRUE 1761430314 OTZ 8275572_84_88_104280_84_446940
61
+ myaccount.google.com FALSE / TRUE 1761430314 OTZ 8275572_84_88_104280_84_446940
62
+ .doubleclick.net TRUE / TRUE 1774390365 APC AfxxVi4PoPaa_4bcRFWe3s7UAqKd77-pb40Xtnkg9fz_z_K9Jaqxwg
63
+ .doubleclick.net TRUE / TRUE 1774390365 receive-cookie-deprecation 1
64
+ .youtube.com TRUE / TRUE 1758857100 GPS 1
65
+ .google.com TRUE / TRUE 1774643802 NID 525=lymNXAPmK1skz9GhpzWLwdQ4iJoD_pERbGKpfT4LxvfYHTFIl9pBIqK3euxYSbPLgHqkjkX7lmdbG-qobNxdcy3yrW8AmU_Vb6bNJeMTUYMMguEI51UHhtt8ykJedgnEModYiZs066xs9C3wcJEI_p77uKGD-5VZ4MNhZ-hwXOtJKDJmhotLjYezs57AnN2sFSWyqKGnaeJddliiW2KlW0eIJ7YVj9d6v1jGjGnMWFZM2-Xt_n2Z7J3_M3U-ylF3D97l8UKFzBrriUY5wkaZdLhkeTmWrz7MhEPWczFhXATk7N46sIdCBM5SxkPConsz7J5AR-UojRMysZo1S66PF7D5KoVcocPocZdxFHzJNoIEkOu7M-mwqdq3ployT6_6k_01YxGLxJPbLbpBjPBKVvh8fllXPratnm0t3tweLwCPy6sToVfv5xHhmHDNHgvToX8eXztvs1_CmWDzuKv9jjYPOTn_8BVejiacNs3GxbouJT81Kd7nwo2aOPDub61EoPjjz8Xnyl5n9RuS103e8PlzQzVroNdApo_oJ9kuEtNCqz_P8VTugVvCyix322EcvN1bVkoQuLqvmRdvf4-56tBaFU24pPW7cL2Nh5-qJM4Ce7btfNsFg5dj6KfhzXphgttNvyLtoyUxKz1ImuWRe8E_I4kMhNNsq-0G3O2-0pzr2xUK7cc
66
+ .google.com TRUE / FALSE 1793417027 SID g.a0001wjtWBkJ4wJeFkKN60XaQWJkpcU7XbGG74H7lkeAwdsDS1lzDYRSn4XTSQZLLOb_8cP0wAACgYKAa8SARYSFQHGX2MishTu2aIUO6jMkaY69qI5whoVAUF8yKrlVW_5LLcbhY-z_VWKPKK-0076
67
+ .google.com TRUE / TRUE 1793417027 __Secure-1PSID g.a0001wjtWBkJ4wJeFkKN60XaQWJkpcU7XbGG74H7lkeAwdsDS1lzYEk4ss5Ga5FZxA026Q3PowACgYKATQSARYSFQHGX2MiH5U4v4vLtLlQfSNMFvyerhoVAUF8yKoIiEMKUn6q2zQUoTesawGY0076
68
+ .google.com TRUE / TRUE 1793417027 __Secure-3PSID g.a0001wjtWBkJ4wJeFkKN60XaQWJkpcU7XbGG74H7lkeAwdsDS1lzPmwdT2LALENSFlwOvA6kdwACgYKAc0SARYSFQHGX2MiLys0aIcNHCDTZHKBpoDsfxoVAUF8yKqVGUVzQfVK3kw47eGoIzXE0076
69
+ .google.com TRUE / FALSE 1793417027 HSID Aa30XYEO0EbwBslvG
70
+ .google.com TRUE / TRUE 1793417027 SSID Ax01UBs9y7oEuhwW9
71
+ .google.com TRUE / FALSE 1793417027 APISID MqqEFVyUEaf_9Z6J/A8x1V3btbuj80Nf_S
72
+ .google.com TRUE / TRUE 1793417027 SAPISID ssGUmbdd194RXtKC/A19abMHPKJF5UjZsB
73
+ .google.com TRUE / TRUE 1793417027 __Secure-1PAPISID ssGUmbdd194RXtKC/A19abMHPKJF5UjZsB
74
+ .google.com TRUE / TRUE 1793417027 __Secure-3PAPISID ssGUmbdd194RXtKC/A19abMHPKJF5UjZsB
75
+ accounts.google.com FALSE / TRUE 1793417027 LSID s.youtube:g.a0001wjtWMIdCQ-G309xZtzMVmnz6HUCwFuLTNrsZr0ZP9xRUC40uL024qAHfEnmmAJQXw1TCQACgYKAXYSARYSFQHGX2MiPT4JjSx4k5O4c3Fe0nJ1yRoVAUF8yKrm6qJ7ebQOMCPIHCecZ7Sm0076
76
+ accounts.google.com FALSE / TRUE 1793417027 __Host-1PLSID s.youtube:g.a0001wjtWMIdCQ-G309xZtzMVmnz6HUCwFuLTNrsZr0ZP9xRUC40co5eg0gt5YUk79mUm-eB3gACgYKAc0SARYSFQHGX2Mix_fLnTRc4lvYYQiZJeI2GBoVAUF8yKpOc_BvpJh6XPy5h9Njy3-p0076
77
+ accounts.google.com FALSE / TRUE 1793417027 __Host-3PLSID s.youtube:g.a0001wjtWMIdCQ-G309xZtzMVmnz6HUCwFuLTNrsZr0ZP9xRUC40XpQEuTNC0_9mqDKFpJy9_wACgYKAZ0SARYSFQHGX2Mi1L5Qz2QaGQn5VJ-zRWaa_BoVAUF8yKqAKBIht30fkTzaBiDEcAMW0076
78
+ accounts.google.com FALSE / TRUE 1793417027 ACCOUNT_CHOOSER AFx_qI6mc8m87lZBbCrGpqKQwnCNaKnXceLmYwJRLLzJlSFupCSDJdMTV86dVAnSZ0t_TXcGfBow7dLJCzDCW42iFH164e-j9QR1d1m_ugn8s59IbdZWNfMaBh79fRp6FJ0eRTxlzJBVHLLeYbj5rdEoGCJKNUTAZw
79
+ .youtube.com TRUE / TRUE 1790393027 __Secure-1PSIDTS sidts-CjQBmkD5SxPGxTN9xFQqR0jIKppCWt45MEJ_XcwQnQDw_nfj7DONF47xy8VFb4XVw_bkTiroEAA
80
+ .youtube.com TRUE / TRUE 1790393027 __Secure-3PSIDTS sidts-CjQBmkD5SxPGxTN9xFQqR0jIKppCWt45MEJ_XcwQnQDw_nfj7DONF47xy8VFb4XVw_bkTiroEAA
81
+ .youtube.com TRUE / FALSE 1793417027 HSID AiEF6eJ88swa_qwf5
82
+ .youtube.com TRUE / TRUE 1793417027 SSID ARPWgjWmCWIdlgQd3
83
+ .youtube.com TRUE / FALSE 1793417027 APISID MqqEFVyUEaf_9Z6J/A8x1V3btbuj80Nf_S
84
+ .youtube.com TRUE / TRUE 1793417027 SAPISID ssGUmbdd194RXtKC/A19abMHPKJF5UjZsB
85
+ .youtube.com TRUE / TRUE 1793417027 __Secure-1PAPISID ssGUmbdd194RXtKC/A19abMHPKJF5UjZsB
86
+ .youtube.com TRUE / TRUE 1793417027 __Secure-3PAPISID ssGUmbdd194RXtKC/A19abMHPKJF5UjZsB
87
+ .youtube.com TRUE / FALSE 1793417027 SID g.a0001wjtWBkJ4wJeFkKN60XaQWJkpcU7XbGG74H7lkeAwdsDS1lzDYRSn4XTSQZLLOb_8cP0wAACgYKAa8SARYSFQHGX2MishTu2aIUO6jMkaY69qI5whoVAUF8yKrlVW_5LLcbhY-z_VWKPKK-0076
88
+ .youtube.com TRUE / TRUE 1793417027 __Secure-1PSID g.a0001wjtWBkJ4wJeFkKN60XaQWJkpcU7XbGG74H7lkeAwdsDS1lzYEk4ss5Ga5FZxA026Q3PowACgYKATQSARYSFQHGX2MiH5U4v4vLtLlQfSNMFvyerhoVAUF8yKoIiEMKUn6q2zQUoTesawGY0076
89
+ .youtube.com TRUE / TRUE 1793417027 __Secure-3PSID g.a0001wjtWBkJ4wJeFkKN60XaQWJkpcU7XbGG74H7lkeAwdsDS1lzPmwdT2LALENSFlwOvA6kdwACgYKAc0SARYSFQHGX2MiLys0aIcNHCDTZHKBpoDsfxoVAUF8yKqVGUVzQfVK3kw47eGoIzXE0076
90
+ .youtube.com TRUE / TRUE 1793417027 LOGIN_INFO AFmmF2swRQIgJ8TM9kR3K0ag_TbOndarfGMHck96yLnOHCeZUsvIge0CIQDOJHH5XfYu6pi_gX59DWxlIGe6LmhKG3Jo_U59eJIPUg:QUQ3MjNmdzFZVHpXQzNoSkgtSE1qbUVpWEMxdGhCMHFyOWNVU2xDYzNpRHMyZ1NKaEtYVUhnR0xKUnNlM2VBaXZGYVNOcnFwZEtvSFVLcmpWUHpBMmZBRWowV1cwMy1BWGNvYXdHMXdsWW5jYURxSkN4TXFVWXZfVDBaRWp2UHpnS2JBWFdJbUVKUmQ3eXhXMjVXZkZRTGdVYUdzYnJKdGxn
91
+ .google.com TRUE / FALSE 1790393027 SIDCC AKEyXzXCX8LKOlj6aDhXUTLs9sKLvpp7Bk1HPBTBzUgUb__cG4l4AOdru_QXF4CRsiGSWsDf
92
+ .google.com TRUE / TRUE 1790393027 __Secure-1PSIDCC AKEyXzUWej-MwEIo72NBDrRlvDNWaURzOvux0NpE52VJktaiwpt8xs8J32FerokUIvoTKJsdug
93
+ .google.com TRUE / TRUE 1790393027 __Secure-3PSIDCC AKEyXzVvkciMQaOGAxypKeCoOSnk5V2AaT18HGMeAVW8dUdcZa0rlq8LjNlXc9wElhzBYzYP8g
94
+ .youtube.com TRUE / FALSE 1790393031 SIDCC AKEyXzUX5vaO9zsgIyoG05GA_Hwz1yDVIHezT92XiXPBbCz-0CpHCIUIB_utitiTsrmnSNfY
95
+ .youtube.com TRUE / TRUE 1790393031 __Secure-1PSIDCC AKEyXzWfwVLeSEf2hjzTYKFhHAZVGoQMult6W6wsogIkCH0cPbIIalZIhmzGmGjVVcBVHocF
96
+ .youtube.com TRUE / TRUE 1790393031 __Secure-3PSIDCC AKEyXzXyCHp-2Hu_GGZdM-oG0Mktt8FwgaE-0NiwdoT572oQ8NYPzM6BIaEsszZveDKJJmd_
extract/utils/probeytdlp.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import yt_dlp, traceback, sys, os
3
+ from http.cookiejar import MozillaCookieJar
4
+
5
+ class YDLLogger:
6
+ def debug(self, msg): print("[DEBUG]", msg)
7
+ def warning(self, msg): print("[WARN]", msg)
8
+ def error(self, msg): print("[ERROR]", msg)
9
+
10
+ def probe(url, cookies=None):
11
+ ydl_opts = {
12
+ "format": "bestaudio/best",
13
+ "cachedir": False,
14
+ "logger": YDLLogger(),
15
+ "no_warnings": False,
16
+ "quiet": False,
17
+ # don't try postprocessing during probe
18
+ "postprocessors": [],
19
+ # helpful to mimic a browser if site is picky:
20
+ "http_headers": {"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36"},
21
+ }
22
+ if cookies:
23
+ ydl_opts["cookiefile"] = cookies
24
+
25
+ try:
26
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
27
+ print("Probing (no download)...")
28
+ info = ydl.extract_info(url, download=False)
29
+ print("Top-level keys in info:", list(info.keys()))
30
+ formats = info.get("formats")
31
+ if formats:
32
+ print("Found formats (count):", len(formats))
33
+ for f in formats[:10]:
34
+ print(f" - id={f.get('format_id')}, ext={f.get('ext')}, abr={f.get('abr')}, vbr={f.get('vbr')}, note={f.get('format_note')}")
35
+ else:
36
+ print("No formats found. Inspecting other info fields:")
37
+ for k in ("webpage_url", "extractor", "requested_formats", "is_live", "entries"):
38
+ print(f" {k}: {info.get(k)}")
39
+ return info
40
+ except Exception as e:
41
+ print("EXCEPTION during probe:")
42
+ traceback.print_exc()
43
+ # also dump any HTML/diagnostic text if available in exception text
44
+ print("Exception message:", str(e))
45
+
46
+ if __name__ == "__main__":
47
+ cookies = None
48
+ if len(sys.argv) > 1:
49
+ cookies = sys.argv[1]
50
+ if not os.path.isfile(cookies):
51
+ print(f"Cookie file '{cookies}' not found.")
52
+ sys.exit(1)
53
+ url = "https://www.youtube.com/watch?v=wDchsz8nmbo"
54
+ probe(url, cookies)
extract/utils/retrieve_filepath.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ def retrieve_file_path(file_name):
3
+ path = os.path.dirname(os.path.abspath(__file__))
4
+ file_path = os.path.join(path, file_name)
5
+ if os.path.isfile(file_path):
6
+ return file_path
7
+ elif not os.path.exists(file_path):
8
+ print(f"'{file_path}' does not exist.")
9
+ return None
10
+ return None
extract/utils/storage.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ import os, uuid
3
+ from datetime import datetime, timedelta, timezone
4
+ from azure.identity import ManagedIdentityCredential, DefaultAzureCredential
5
+ from azure.storage.blob import (
6
+ BlobServiceClient, generate_blob_sas, BlobSasPermissions
7
+ )
8
+
9
+ load_dotenv()
10
+ ACCOUNT_NAME = os.getenv("AZURE_STORAGE_ACCOUNT")
11
+ CONTAINER = os.getenv("AZURE_BLOB_CONTAINER")
12
+
13
+ # Use Managed Identity in Azure; locally DefaultAzureCredential also works
14
+ def _credential():
15
+ # Tries MI in Azure; falls back to developer creds locally
16
+ return DefaultAzureCredential(exclude_interactive_browser_credential=False)
17
+
18
+ def _svc_client():
19
+ url = f"https://{ACCOUNT_NAME}.blob.core.windows.net"
20
+ return BlobServiceClient(account_url=url, credential=_credential())
21
+
22
+ def upload_and_sign(local_path: str, ttl_minutes: int = 45) -> str:
23
+ svc = _svc_client()
24
+ name = f"{uuid.uuid4()}/{os.path.basename(local_path)}"
25
+ blob = svc.get_blob_client(container=CONTAINER, blob=name)
26
+ with open(local_path, "rb") as f:
27
+ blob.upload_blob(f, overwrite=True, content_type="audio/wav")
28
+
29
+ # Get User Delegation Key (no account key needed)
30
+ udk = svc.get_user_delegation_key(
31
+ key_start_time=datetime.now(timezone.utc) - timedelta(minutes=5),
32
+ key_expiry_time=datetime.now(timezone.utc) + timedelta(hours=2),
33
+ )
34
+ sas = generate_blob_sas(
35
+ account_name=ACCOUNT_NAME,
36
+ container_name=CONTAINER,
37
+ blob_name=name,
38
+ user_delegation_key=udk,
39
+ permission=BlobSasPermissions(read=True),
40
+ expiry=datetime.now(timezone.utc) + timedelta(minutes=ttl_minutes),
41
+ )
42
+ return f"{blob.url}?{sas}"