Spaces:

athlonxpboy
/

tt-ff

Building

App Files Files Community

Arnold Manzano commited on Feb 5

Commit

8ac8653

1 Parent(s): da796b0

Attempt to run downloader

Browse files

Files changed (2) hide show

app.py +2 -0
tt_router.py +83 -0

app.py CHANGED Viewed

@@ -153,6 +153,8 @@ async def index():
             </form>
             <p><strong>System Status:</strong> {status}</p>
             <hr>
             <h2>Processed Files</h2>
             <p><button onclick="location.reload()">Refresh List</button></p>
             <div class="grid">

             </form>
             <p><strong>System Status:</strong> {status}</p>
             <hr>
+            <p><a href="/tt">TT page</a></p>
+            <hr>
             <h2>Processed Files</h2>
             <p><button onclick="location.reload()">Refresh List</button></p>
             <div class="grid">

tt_router.py CHANGED Viewed

@@ -1,8 +1,91 @@
 from fastapi import APIRouter
 from fastapi.responses import HTMLResponse
 router = APIRouter()
 @router.get("/tt", response_class=HTMLResponse)
 async def read_tt():
     return "<h1>This is the TT Page from a separate file!</h1>"

+import os
+import base64
+import time
+import uuid
+from pathlib import Path
 from fastapi import APIRouter
 from fastapi.responses import HTMLResponse
+from playwright.sync_api import sync_playwright
+from playwright_stealth import Stealth
 router = APIRouter()
+def download_deduplicated(tiktok_url):
+    OUTPUT_DIR = Path("outputs")
+    with sync_playwright() as p:
+        browser = p.chromium.launch(headless=True)
+        context = browser.new_context()
+        page = context.new_page()
+        Stealth().apply_stealth_sync(page)
+        # Using a dictionary to prevent duplicates
+        # Key = URL, Value = Boolean (True if already processed)
+        captured_links = {}
+        def handle_request(request):
+            url = request.url
+            if "v16-webapp-prime.tiktok.com" in url and "video/tos" in url:
+                if url not in captured_links:
+                    captured_links[url] = False # Found, but not yet downloaded
+        page.on("request", handle_request)
+        page.goto(tiktok_url)
+        print("Listening for unique streams... (6s)")
+        time.sleep(6)
+        # We take a snapshot of the keys NOW so it doesn't grow while we loop
+        unique_urls = list(captured_links.keys())
+        print(f"Found {len(unique_urls)} unique candidate links.")
+        for i, url in enumerate(unique_urls):
+            if i > 0:
+                break
+            print(f"[{i+1}/{len(unique_urls)}] Checking: {url}...")
+            js_fetch = f"""
+            async () => {{
+                try {{
+                    const r = await fetch("{url}");
+                    const b = await r.blob();
+                    if (b.size === 0) return "EMPTY";
+                    return new Promise(res => {{
+                        const reader = new FileReader();
+                        reader.onloadend = () => res(reader.result.split(',')[1]);
+                        reader.readAsDataURL(b);
+                    }});
+                }} catch {{ return null; }}
+            }}
+            """
+            b64_result = page.evaluate(js_fetch)
+            if b64_result and b64_result != "EMPTY":
+                data = base64.b64decode(b64_result)
+                size_mb = len(data)/1024/1024
+                # Only save if it's a substantial file (over 100KB)
+                if size_mb > 0.1:
+                    job_id = str(uuid.uuid4())[:8]
+                    # filename = f"video_{i+1}_{'watermark' not in url.lower()}.mp4"
+                    filename = f"input_{job_id}.mp4"
+                    filepath = os.path.join(OUTPUT_DIR, filename)
+                    with open(filepath, "wb") as f:
+                        f.write(data)
+                    print(f"    -> SUCCESS: Saved {filename} ({size_mb:.2f} MB)")
+                else:
+                    print(f"    -> Skipped: File too small ({size_mb:.4f} MB)")
+            else:
+                print(f"    -> Skipped: Empty or Failed response.")
+        browser.close()
+        print(f"\nDone! Check the '{OUTPUT_DIR}' folder.")
+target = "https://www.tiktok.com/@_luna.rayne_/video/7582251394883718422"
+# target = "https://www.tiktok.com/@_luna.rayne_/video/7597442279355223318"
 @router.get("/tt", response_class=HTMLResponse)
 async def read_tt():
+    download_deduplicated(target)
     return "<h1>This is the TT Page from a separate file!</h1>"