QJMKWB1 / scraper.py
QJMKWB's picture
Upload 4 files
0c6e4dc verified
import requests
from bs4 import BeautifulSoup
import re
from playwright.sync_api import sync_playwright
import time
DOMENA = "https://mrkaj.me"
def search_movies(query):
api_url = f"{DOMENA}/se/j/json?q={query}"
headers = {"User-Agent": "Mozilla/5.0"}
try:
response = requests.get(api_url, headers=headers)
if response.status_code == 200:
return response.json()
except Exception as e:
print(f"Search error: {e}")
return []
def get_details(slug, media_type):
url = f"{DOMENA}/p/{slug}" if media_type in ["movie", "film"] else f"{DOMENA}/tv/{slug}/S01E01"
headers = {"User-Agent": "Mozilla/5.0"}
results = {"languages": [], "seasons": []}
try:
response = requests.get(url, headers=headers)
if response.status_code != 200:
return results
soup = BeautifulSoup(response.text, "html.parser")
# Languages
keyword_div = soup.find("div", class_="movie-keyword")
if keyword_div:
for podtrieda in keyword_div.find_all(recursive=False):
text = podtrieda.get_text(strip=True)
if text:
results["languages"].append(text)
# Seasons
if media_type not in ["movie", "film"]:
response_text = response.text
x = 0
while True:
x = response_text.find("Séria", x+1)
if x != -1:
seria_match = re.search(r'Séria\s+(\d+)', response_text[x:x+50])
if seria_match:
seria = seria_match.group(1)
y = response_text.find("</span>", x)
if y != -1:
pocet_epizod = ''.join(filter(str.isnumeric, response_text[y-10:y]))
results["seasons"].append({
"season": int(seria),
"episodes": int(pocet_epizod) if pocet_epizod else 0
})
else:
break
except Exception as e:
print(f"Details error: {e}")
return results
def get_stream_url(slug, media_type, season=None, episode=None, lang=None, source_idx=0):
if media_type in ["movie", "film"]:
url = f"{DOMENA}/p/{slug}"
else:
url = f"{DOMENA}/tv/{slug}/S{int(season):02d}E{int(episode):02d}"
# URL parameters for source and language
# Using the format expected by the site (?source=X&lang=Y)
params = []
if source_idx is not None:
params.append(f"source={source_idx}")
if lang:
params.append(f"lng={requests.utils.quote(lang)}")
if params:
url += "?" + "&".join(params)
print(f"Scanning URL: {url}")
with sync_playwright() as p:
# We need to install browsers in the Dockerfile
browser = p.chromium.launch(headless=True)
context = browser.new_context(user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36")
page = context.new_page()
found = {"stream": None, "vtt": None}
def handle_request(request):
if ".m3u8" in request.url and not found["stream"]:
found["stream"] = request.url
if request.url.endswith(".vtt") and not found["vtt"]:
found["vtt"] = request.url
page.on("request", handle_request)
try:
page.goto(url, wait_until="networkidle", timeout=30000)
# Additional wait or click if needed
for _ in range(10):
if found["stream"]:
time.sleep(1) # Wait for VTT
break
time.sleep(1)
# If not found, try to click the iframe if it exists (like in app.py)
if not found["stream"]:
try:
iframe = page.locator("iframe#frm")
if iframe.count() > 0:
iframe.click(force=True)
for _ in range(10):
if found["stream"]:
time.sleep(1)
break
time.sleep(1)
except:
pass
except Exception as e:
print(f"Playwright error: {e}")
browser.close()
return found