tera / main.py
sanch1tx's picture
Update main.py
c4a1a08 verified
from fastapi import FastAPI, Query, Response
from pydantic import BaseModel
from playwright.sync_api import sync_playwright
import json
app = FastAPI(title="TeraBox Scraper API")
# --- CORE SCRAPER FUNCTION ---
def run_scraper(terabox_url: str):
print(f"Processing URL: {terabox_url}")
try:
with sync_playwright() as p:
# Launch browser (headless for server environment)
browser = p.chromium.launch(headless=True)
# Create context with standard user agent
context = browser.new_context(
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
)
page = context.new_page()
# Track network requests
captured_urls = []
def capture_video_urls(response):
url = response.url
if any(ext in url for ext in ['.m3u8', '.mp4', 'stream', 'fast']):
if url not in captured_urls:
captured_urls.append(url)
page.on('response', capture_video_urls)
# Navigate to helper site
target_url = f"https://iteraplay.com/?url={terabox_url}"
page.goto(target_url, wait_until='networkidle', timeout=60000)
# Wait for video element
try:
page.wait_for_selector('video', timeout=30000)
except:
pass
# Extract data
video_data = page.evaluate("""
() => ({
qualities: window.videoQualityURLs || {},
videos: window.allVideoData || [],
current: window.currentVideoData || {},
metadata: window.currentVideoMetadata || {}
})
""")
# Get src attribute
video_src = None
try:
video_src = page.locator('video').get_attribute('src')
except:
pass
browser.close()
return {
"status": "success",
"original_url": terabox_url,
"video_source": video_src,
"qualities": video_data.get('qualities', {}),
"all_videos": video_data.get('videos', []),
"captured_network_urls": captured_urls
}
except Exception as e:
return {"status": "error", "message": str(e)}
# --- API ENDPOINTS ---
class ScrapeRequest(BaseModel):
url: str
@app.get("/")
def home():
return {"message": "Add /scrape?url=YOUR_URL to the address bar to use."}
# 1. GET Request (For Browser Use)
@app.get("/scrape")
def scrape_get(url: str = Query(..., description="The TeraBox URL to scrape")):
data = run_scraper(url)
# Return formatted JSON
return Response(content=json.dumps(data, indent=2), media_type="application/json")
# 2. POST Request (For Code/API Use)
@app.post("/scrape")
def scrape_post(request: ScrapeRequest):
data = run_scraper(request.url)
# Return formatted JSON
return Response(content=json.dumps(data, indent=2), media_type="application/json")
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)