Spaces:
Building
Building
Arnold Manzano commited on
Commit ·
8ac8653
1
Parent(s): da796b0
Attempt to run downloader
Browse files- app.py +2 -0
- tt_router.py +83 -0
app.py
CHANGED
|
@@ -153,6 +153,8 @@ async def index():
|
|
| 153 |
</form>
|
| 154 |
<p><strong>System Status:</strong> {status}</p>
|
| 155 |
<hr>
|
|
|
|
|
|
|
| 156 |
<h2>Processed Files</h2>
|
| 157 |
<p><button onclick="location.reload()">Refresh List</button></p>
|
| 158 |
<div class="grid">
|
|
|
|
| 153 |
</form>
|
| 154 |
<p><strong>System Status:</strong> {status}</p>
|
| 155 |
<hr>
|
| 156 |
+
<p><a href="/tt">TT page</a></p>
|
| 157 |
+
<hr>
|
| 158 |
<h2>Processed Files</h2>
|
| 159 |
<p><button onclick="location.reload()">Refresh List</button></p>
|
| 160 |
<div class="grid">
|
tt_router.py
CHANGED
|
@@ -1,8 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from fastapi import APIRouter
|
| 2 |
from fastapi.responses import HTMLResponse
|
|
|
|
|
|
|
| 3 |
|
| 4 |
router = APIRouter()
|
| 5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
@router.get("/tt", response_class=HTMLResponse)
|
| 7 |
async def read_tt():
|
|
|
|
| 8 |
return "<h1>This is the TT Page from a separate file!</h1>"
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import base64
|
| 3 |
+
import time
|
| 4 |
+
import uuid
|
| 5 |
+
from pathlib import Path
|
| 6 |
from fastapi import APIRouter
|
| 7 |
from fastapi.responses import HTMLResponse
|
| 8 |
+
from playwright.sync_api import sync_playwright
|
| 9 |
+
from playwright_stealth import Stealth
|
| 10 |
|
| 11 |
router = APIRouter()
|
| 12 |
|
| 13 |
+
def download_deduplicated(tiktok_url):
|
| 14 |
+
OUTPUT_DIR = Path("outputs")
|
| 15 |
+
|
| 16 |
+
with sync_playwright() as p:
|
| 17 |
+
browser = p.chromium.launch(headless=True)
|
| 18 |
+
context = browser.new_context()
|
| 19 |
+
page = context.new_page()
|
| 20 |
+
Stealth().apply_stealth_sync(page)
|
| 21 |
+
|
| 22 |
+
# Using a dictionary to prevent duplicates
|
| 23 |
+
# Key = URL, Value = Boolean (True if already processed)
|
| 24 |
+
captured_links = {}
|
| 25 |
+
|
| 26 |
+
def handle_request(request):
|
| 27 |
+
url = request.url
|
| 28 |
+
if "v16-webapp-prime.tiktok.com" in url and "video/tos" in url:
|
| 29 |
+
if url not in captured_links:
|
| 30 |
+
captured_links[url] = False # Found, but not yet downloaded
|
| 31 |
+
|
| 32 |
+
page.on("request", handle_request)
|
| 33 |
+
page.goto(tiktok_url)
|
| 34 |
+
|
| 35 |
+
print("Listening for unique streams... (6s)")
|
| 36 |
+
time.sleep(6)
|
| 37 |
+
|
| 38 |
+
# We take a snapshot of the keys NOW so it doesn't grow while we loop
|
| 39 |
+
unique_urls = list(captured_links.keys())
|
| 40 |
+
print(f"Found {len(unique_urls)} unique candidate links.")
|
| 41 |
+
|
| 42 |
+
for i, url in enumerate(unique_urls):
|
| 43 |
+
if i > 0:
|
| 44 |
+
break
|
| 45 |
+
print(f"[{i+1}/{len(unique_urls)}] Checking: {url}...")
|
| 46 |
+
|
| 47 |
+
js_fetch = f"""
|
| 48 |
+
async () => {{
|
| 49 |
+
try {{
|
| 50 |
+
const r = await fetch("{url}");
|
| 51 |
+
const b = await r.blob();
|
| 52 |
+
if (b.size === 0) return "EMPTY";
|
| 53 |
+
return new Promise(res => {{
|
| 54 |
+
const reader = new FileReader();
|
| 55 |
+
reader.onloadend = () => res(reader.result.split(',')[1]);
|
| 56 |
+
reader.readAsDataURL(b);
|
| 57 |
+
}});
|
| 58 |
+
}} catch {{ return null; }}
|
| 59 |
+
}}
|
| 60 |
+
"""
|
| 61 |
+
|
| 62 |
+
b64_result = page.evaluate(js_fetch)
|
| 63 |
+
|
| 64 |
+
if b64_result and b64_result != "EMPTY":
|
| 65 |
+
data = base64.b64decode(b64_result)
|
| 66 |
+
size_mb = len(data)/1024/1024
|
| 67 |
+
|
| 68 |
+
# Only save if it's a substantial file (over 100KB)
|
| 69 |
+
if size_mb > 0.1:
|
| 70 |
+
job_id = str(uuid.uuid4())[:8]
|
| 71 |
+
# filename = f"video_{i+1}_{'watermark' not in url.lower()}.mp4"
|
| 72 |
+
filename = f"input_{job_id}.mp4"
|
| 73 |
+
filepath = os.path.join(OUTPUT_DIR, filename)
|
| 74 |
+
with open(filepath, "wb") as f:
|
| 75 |
+
f.write(data)
|
| 76 |
+
print(f" -> SUCCESS: Saved {filename} ({size_mb:.2f} MB)")
|
| 77 |
+
else:
|
| 78 |
+
print(f" -> Skipped: File too small ({size_mb:.4f} MB)")
|
| 79 |
+
else:
|
| 80 |
+
print(f" -> Skipped: Empty or Failed response.")
|
| 81 |
+
|
| 82 |
+
browser.close()
|
| 83 |
+
print(f"\nDone! Check the '{OUTPUT_DIR}' folder.")
|
| 84 |
+
|
| 85 |
+
target = "https://www.tiktok.com/@_luna.rayne_/video/7582251394883718422"
|
| 86 |
+
# target = "https://www.tiktok.com/@_luna.rayne_/video/7597442279355223318"
|
| 87 |
+
|
| 88 |
@router.get("/tt", response_class=HTMLResponse)
|
| 89 |
async def read_tt():
|
| 90 |
+
download_deduplicated(target)
|
| 91 |
return "<h1>This is the TT Page from a separate file!</h1>"
|