Spaces:
Sleeping
Sleeping
samir72 commited on
Commit ·
f3e52a7
1
Parent(s): 2fdf75c
Secure and automated cookies
Browse files
extract/app/Youtubeextraction.py
CHANGED
|
@@ -6,6 +6,7 @@ import yt_dlp
|
|
| 6 |
# from utils.storage import upload_and_sign # To remove circular import issue
|
| 7 |
from extract.utils.storage import upload_and_sign # To remove circular import issue
|
| 8 |
from extract.utils.retrieve_filepath import retrieve_file_path # To get the file path of cookies.txt
|
|
|
|
| 9 |
|
| 10 |
app = FastAPI()
|
| 11 |
|
|
@@ -91,8 +92,11 @@ def extract(
|
|
| 91 |
out_template = str(work_dir / "%(title).100B [%(id)s].%(ext)s")
|
| 92 |
hooks = [progress_hook] if progress_hook else []
|
| 93 |
### Use cookies.txt if available
|
| 94 |
-
cookies_path = retrieve_file_path("cookies.txt")
|
| 95 |
#cookies_path = "./app/utils/cookies.txt"
|
|
|
|
|
|
|
|
|
|
| 96 |
if not cookies_path:
|
| 97 |
cookies_path = None
|
| 98 |
print("Cookie file NOT found in container!")
|
|
|
|
| 6 |
# from utils.storage import upload_and_sign # To remove circular import issue
|
| 7 |
from extract.utils.storage import upload_and_sign # To remove circular import issue
|
| 8 |
from extract.utils.retrieve_filepath import retrieve_file_path # To get the file path of cookies.txt
|
| 9 |
+
from extract.utils.cookies_refresher import start_cookies_refresher # To refresh cookies.txt periodically
|
| 10 |
|
| 11 |
app = FastAPI()
|
| 12 |
|
|
|
|
| 92 |
out_template = str(work_dir / "%(title).100B [%(id)s].%(ext)s")
|
| 93 |
hooks = [progress_hook] if progress_hook else []
|
| 94 |
### Use cookies.txt if available
|
| 95 |
+
#cookies_path = retrieve_file_path("cookies.txt")
|
| 96 |
#cookies_path = "./app/utils/cookies.txt"
|
| 97 |
+
# Call the cookies refresher to start refreshing cookies in background
|
| 98 |
+
start_cookies_refresher()
|
| 99 |
+
cookies_path = os.getenv("COOKIES_PATH")
|
| 100 |
if not cookies_path:
|
| 101 |
cookies_path = None
|
| 102 |
print("Cookie file NOT found in container!")
|
extract/app/__pycache__/Youtubeextraction.cpython-313.pyc
CHANGED
|
Binary files a/extract/app/__pycache__/Youtubeextraction.cpython-313.pyc and b/extract/app/__pycache__/Youtubeextraction.cpython-313.pyc differ
|
|
|
extract/utils/__pycache__/cookies_refresher.cpython-313.pyc
ADDED
|
Binary file (4.18 kB). View file
|
|
|
extract/utils/cookies_refresher.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os, time, hashlib, tempfile, threading
|
| 2 |
+
from azure.identity import DefaultAzureCredential
|
| 3 |
+
from azure.storage.blob import BlobClient
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
|
| 6 |
+
load_dotenv()
|
| 7 |
+
ACCOUNT = os.getenv("AZURE_STORAGE_ACCOUNT") # storage account name
|
| 8 |
+
CONTAINER= os.getenv("COOKIES_CONTAINER")
|
| 9 |
+
BLOB = os.getenv("COOKIES_BLOB")
|
| 10 |
+
OUT_PATH = os.getenv("COOKIES_PATH")
|
| 11 |
+
REFRESH = int(os.getenv("COOKIES_REFRESH_SEC"))
|
| 12 |
+
|
| 13 |
+
def _sha256(b: bytes) -> str: return hashlib.sha256(b).hexdigest()
|
| 14 |
+
def _read(path: str) -> bytes:
|
| 15 |
+
try:
|
| 16 |
+
with open(path, "rb") as f: return f.read()
|
| 17 |
+
except: return b""
|
| 18 |
+
|
| 19 |
+
def _atomic_write(path: str, data: bytes):
|
| 20 |
+
d = os.path.dirname(path) or "."
|
| 21 |
+
os.makedirs(d, exist_ok=True)
|
| 22 |
+
fd, tmp = tempfile.mkstemp(prefix=".cookies.", dir=d)
|
| 23 |
+
with os.fdopen(fd, "wb") as f: f.write(data)
|
| 24 |
+
os.replace(tmp, path)
|
| 25 |
+
try: os.chmod(path, 0o600)
|
| 26 |
+
except: pass
|
| 27 |
+
|
| 28 |
+
def refresh_once():
|
| 29 |
+
if not ACCOUNT:
|
| 30 |
+
print("[cookies] ACCOUNT not set"); return
|
| 31 |
+
cred = DefaultAzureCredential() # uses ACA managed identity
|
| 32 |
+
bc = BlobClient(
|
| 33 |
+
account_url=f"https://{ACCOUNT}.blob.core.windows.net",
|
| 34 |
+
container_name=CONTAINER,
|
| 35 |
+
blob_name=BLOB,
|
| 36 |
+
credential=cred,
|
| 37 |
+
)
|
| 38 |
+
new = bc.download_blob(max_concurrency=1).readall()
|
| 39 |
+
if not new.strip():
|
| 40 |
+
print("[cookies] WARN: blob is empty; skipping")
|
| 41 |
+
return
|
| 42 |
+
if _sha256(new) != _sha256(_read(OUT_PATH)):
|
| 43 |
+
_atomic_write(OUT_PATH, new)
|
| 44 |
+
print(f"[cookies] updated -> {OUT_PATH} (bytes={len(new)})")
|
| 45 |
+
|
| 46 |
+
def start_cookies_refresher():
|
| 47 |
+
# initial fetch before serving traffic
|
| 48 |
+
try: refresh_once()
|
| 49 |
+
except Exception as e: print(f"[cookies] initial refresh error: {e}")
|
| 50 |
+
# periodic refresh
|
| 51 |
+
def loop():
|
| 52 |
+
while True:
|
| 53 |
+
time.sleep(REFRESH)
|
| 54 |
+
try: refresh_once()
|
| 55 |
+
except Exception as e: print(f"[cookies] refresh error: {e}")
|
| 56 |
+
threading.Thread(target=loop, daemon=True).start()
|