Arnold Manzano commited on
Commit
8ac8653
·
1 Parent(s): da796b0

Attempt to run downloader

Browse files
Files changed (2) hide show
  1. app.py +2 -0
  2. tt_router.py +83 -0
app.py CHANGED
@@ -153,6 +153,8 @@ async def index():
153
  </form>
154
  <p><strong>System Status:</strong> {status}</p>
155
  <hr>
 
 
156
  <h2>Processed Files</h2>
157
  <p><button onclick="location.reload()">Refresh List</button></p>
158
  <div class="grid">
 
153
  </form>
154
  <p><strong>System Status:</strong> {status}</p>
155
  <hr>
156
+ <p><a href="/tt">TT page</a></p>
157
+ <hr>
158
  <h2>Processed Files</h2>
159
  <p><button onclick="location.reload()">Refresh List</button></p>
160
  <div class="grid">
tt_router.py CHANGED
@@ -1,8 +1,91 @@
 
 
 
 
 
1
  from fastapi import APIRouter
2
  from fastapi.responses import HTMLResponse
 
 
3
 
4
  router = APIRouter()
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  @router.get("/tt", response_class=HTMLResponse)
7
  async def read_tt():
 
8
  return "<h1>This is the TT Page from a separate file!</h1>"
 
1
+ import os
2
+ import base64
3
+ import time
4
+ import uuid
5
+ from pathlib import Path
6
  from fastapi import APIRouter
7
  from fastapi.responses import HTMLResponse
8
+ from playwright.sync_api import sync_playwright
9
+ from playwright_stealth import Stealth
10
 
11
  router = APIRouter()
12
 
13
+ def download_deduplicated(tiktok_url):
14
+ OUTPUT_DIR = Path("outputs")
15
+
16
+ with sync_playwright() as p:
17
+ browser = p.chromium.launch(headless=True)
18
+ context = browser.new_context()
19
+ page = context.new_page()
20
+ Stealth().apply_stealth_sync(page)
21
+
22
+ # Using a dictionary to prevent duplicates
23
+ # Key = URL, Value = Boolean (True if already processed)
24
+ captured_links = {}
25
+
26
+ def handle_request(request):
27
+ url = request.url
28
+ if "v16-webapp-prime.tiktok.com" in url and "video/tos" in url:
29
+ if url not in captured_links:
30
+ captured_links[url] = False # Found, but not yet downloaded
31
+
32
+ page.on("request", handle_request)
33
+ page.goto(tiktok_url)
34
+
35
+ print("Listening for unique streams... (6s)")
36
+ time.sleep(6)
37
+
38
+ # We take a snapshot of the keys NOW so it doesn't grow while we loop
39
+ unique_urls = list(captured_links.keys())
40
+ print(f"Found {len(unique_urls)} unique candidate links.")
41
+
42
+ for i, url in enumerate(unique_urls):
43
+ if i > 0:
44
+ break
45
+ print(f"[{i+1}/{len(unique_urls)}] Checking: {url}...")
46
+
47
+ js_fetch = f"""
48
+ async () => {{
49
+ try {{
50
+ const r = await fetch("{url}");
51
+ const b = await r.blob();
52
+ if (b.size === 0) return "EMPTY";
53
+ return new Promise(res => {{
54
+ const reader = new FileReader();
55
+ reader.onloadend = () => res(reader.result.split(',')[1]);
56
+ reader.readAsDataURL(b);
57
+ }});
58
+ }} catch {{ return null; }}
59
+ }}
60
+ """
61
+
62
+ b64_result = page.evaluate(js_fetch)
63
+
64
+ if b64_result and b64_result != "EMPTY":
65
+ data = base64.b64decode(b64_result)
66
+ size_mb = len(data)/1024/1024
67
+
68
+ # Only save if it's a substantial file (over 100KB)
69
+ if size_mb > 0.1:
70
+ job_id = str(uuid.uuid4())[:8]
71
+ # filename = f"video_{i+1}_{'watermark' not in url.lower()}.mp4"
72
+ filename = f"input_{job_id}.mp4"
73
+ filepath = os.path.join(OUTPUT_DIR, filename)
74
+ with open(filepath, "wb") as f:
75
+ f.write(data)
76
+ print(f" -> SUCCESS: Saved {filename} ({size_mb:.2f} MB)")
77
+ else:
78
+ print(f" -> Skipped: File too small ({size_mb:.4f} MB)")
79
+ else:
80
+ print(f" -> Skipped: Empty or Failed response.")
81
+
82
+ browser.close()
83
+ print(f"\nDone! Check the '{OUTPUT_DIR}' folder.")
84
+
85
+ target = "https://www.tiktok.com/@_luna.rayne_/video/7582251394883718422"
86
+ # target = "https://www.tiktok.com/@_luna.rayne_/video/7597442279355223318"
87
+
88
  @router.get("/tt", response_class=HTMLResponse)
89
  async def read_tt():
90
+ download_deduplicated(target)
91
  return "<h1>This is the TT Page from a separate file!</h1>"