Update scraper4.py
Browse files- scraper4.py +12 -0
scraper4.py
CHANGED
|
@@ -67,8 +67,19 @@ async def search_movies(query):
|
|
| 67 |
try:
|
| 68 |
url = f"{DOMENA}/se/j/json?q={urllib.parse.quote(query)}"
|
| 69 |
success = await manager.goto_with_fallback(page, url)
|
|
|
|
| 70 |
if success:
|
|
|
|
| 71 |
content = await page.inner_text("body")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
return json.loads(content)
|
| 73 |
except Exception as e:
|
| 74 |
print(f"[SEARCH ERROR] {e}")
|
|
@@ -76,6 +87,7 @@ async def search_movies(query):
|
|
| 76 |
await page.close()
|
| 77 |
return []
|
| 78 |
|
|
|
|
| 79 |
async def get_details(slug, media_type):
|
| 80 |
print(f"[DETAILS] Info cez Tor: {slug}")
|
| 81 |
page = await manager.get_page()
|
|
|
|
| 67 |
try:
|
| 68 |
url = f"{DOMENA}/se/j/json?q={urllib.parse.quote(query)}"
|
| 69 |
success = await manager.goto_with_fallback(page, url)
|
| 70 |
+
|
| 71 |
if success:
|
| 72 |
+
# Získame surový text stránky
|
| 73 |
content = await page.inner_text("body")
|
| 74 |
+
|
| 75 |
+
# DEBUG: Ak je obsah podozrivo krátky, vypíšeme ho
|
| 76 |
+
if len(content) < 100:
|
| 77 |
+
print(f"[DEBUG] Surový obsah z webu: '{content}'")
|
| 78 |
+
|
| 79 |
+
if not content.strip():
|
| 80 |
+
print("[TOR] Web vrátil prázdnu stránku. Tor uzol je pravdepodobne blokovaný.")
|
| 81 |
+
return []
|
| 82 |
+
|
| 83 |
return json.loads(content)
|
| 84 |
except Exception as e:
|
| 85 |
print(f"[SEARCH ERROR] {e}")
|
|
|
|
| 87 |
await page.close()
|
| 88 |
return []
|
| 89 |
|
| 90 |
+
|
| 91 |
async def get_details(slug, media_type):
|
| 92 |
print(f"[DETAILS] Info cez Tor: {slug}")
|
| 93 |
page = await manager.get_page()
|