rrr4 / tools /debug_direct.py
minaewrw's picture
Initial backend deployment for Hugging Face Spaces
11757af
import asyncio
import base64
import sys
import os
import re
# Add backend to sys.path
sys.path.append(os.getcwd())
from backend.scraper.engine import scraper as larooza_scraper
async def debug_direct():
test_urls = [
"https://mxdrop.to/e/7kp4rlz7ip14pv",
"https://vidspeed.org/embed-3qwr1a0iitb9.html",
"https://vidmoly.net/embed-r3jhudzh1w51.html"
]
for url in test_urls:
print(f"\n--- Testing {url} ---")
direct = await larooza_scraper._extract_direct_url(url)
if direct:
print(f"SUCCESS: {direct}")
else:
print("FAILED to extract direct URL")
# Let's try to see if we can get HTML at all
headers = larooza_scraper.headers.copy()
headers["Referer"] = url
html = await larooza_scraper._get_html(url, headers=headers)
if html:
print(f"HTML Length: {len(html)}")
# Show snippets around patterns
for pattern in [r'file:', r'src:', r'file\s*:', r'source']:
match = re.search(pattern, html, re.IGNORECASE)
if match:
start = max(0, match.start() - 50)
end = min(len(html), match.end() + 200)
print(f"Snippet for '{pattern}':\n{html[start:end]}\n")
else:
print("Could not fetch HTML at all")
if __name__ == "__main__":
asyncio.run(debug_direct())