Spaces:
Running
Running
Added scarper debugingin
Browse files- code/app.py +65 -1
code/app.py
CHANGED
|
@@ -182,8 +182,31 @@ async def validate_url(
|
|
| 182 |
else:
|
| 183 |
return JSONResponse({"error": "Unsupported platform"}, status_code=400)
|
| 184 |
|
|
|
|
|
|
|
|
|
|
| 185 |
if not auction.get("image_urls"):
|
| 186 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
|
| 188 |
# 2. Ile zdjęć
|
| 189 |
total_available = len(auction["image_urls"])
|
|
@@ -254,6 +277,47 @@ async def validate_url(
|
|
| 254 |
}, status_code=500)
|
| 255 |
|
| 256 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
@app.get("/health")
|
| 258 |
def health():
|
| 259 |
return {"status": "ok", "message": "API running"}
|
|
|
|
| 182 |
else:
|
| 183 |
return JSONResponse({"error": "Unsupported platform"}, status_code=400)
|
| 184 |
|
| 185 |
+
print(f"🔍 DEBUG: Auction data: {auction}")
|
| 186 |
+
print(f"🔍 DEBUG: Image URLs: {auction.get('image_urls', [])}")
|
| 187 |
+
|
| 188 |
if not auction.get("image_urls"):
|
| 189 |
+
# Try fetching page HTML as an additional debug aid (may differ from JS-rendered content)
|
| 190 |
+
try:
|
| 191 |
+
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
|
| 192 |
+
page_resp = requests.get(url, headers=headers, timeout=10)
|
| 193 |
+
page_preview = page_resp.text[:2000]
|
| 194 |
+
page_status = page_resp.status_code
|
| 195 |
+
except Exception as e:
|
| 196 |
+
page_preview = None
|
| 197 |
+
page_status = str(e)
|
| 198 |
+
|
| 199 |
+
return JSONResponse({
|
| 200 |
+
"error": "No images found",
|
| 201 |
+
"debug": {
|
| 202 |
+
"url": url,
|
| 203 |
+
"auction_data": auction,
|
| 204 |
+
"has_image_urls_key": "image_urls" in auction,
|
| 205 |
+
"image_urls_value": auction.get("image_urls"),
|
| 206 |
+
"page_status": page_status,
|
| 207 |
+
"page_html_preview": page_preview
|
| 208 |
+
}
|
| 209 |
+
}, status_code=400)
|
| 210 |
|
| 211 |
# 2. Ile zdjęć
|
| 212 |
total_available = len(auction["image_urls"])
|
|
|
|
| 277 |
}, status_code=500)
|
| 278 |
|
| 279 |
|
| 280 |
+
@app.post("/debug_scrape")
|
| 281 |
+
async def debug_scrape(url: str = Form(...)):
|
| 282 |
+
"""Run scraper for a URL and return the raw auction dict and a small HTML preview.
|
| 283 |
+
This endpoint is for debugging only."""
|
| 284 |
+
try:
|
| 285 |
+
import requests
|
| 286 |
+
# Choose scraper
|
| 287 |
+
if "allegro.pl" in url:
|
| 288 |
+
from web_scraper_allegro import scrape_allegro_offer
|
| 289 |
+
auction = scrape_allegro_offer(url)
|
| 290 |
+
elif "olx.pl" in url:
|
| 291 |
+
from web_scraper_olx import scrape_olx_offer
|
| 292 |
+
auction = scrape_olx_offer(url)
|
| 293 |
+
elif "ebay." in url:
|
| 294 |
+
from web_scraper_ebay import scrape_ebay_offer
|
| 295 |
+
auction = scrape_ebay_offer(url)
|
| 296 |
+
else:
|
| 297 |
+
return JSONResponse({"error": "Unsupported platform"}, status_code=400)
|
| 298 |
+
|
| 299 |
+
# Try a simple GET to capture non-JS HTML
|
| 300 |
+
try:
|
| 301 |
+
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
|
| 302 |
+
page_resp = requests.get(url, headers=headers, timeout=10)
|
| 303 |
+
page_preview = page_resp.text[:2000]
|
| 304 |
+
page_status = page_resp.status_code
|
| 305 |
+
except Exception as e:
|
| 306 |
+
page_preview = None
|
| 307 |
+
page_status = str(e)
|
| 308 |
+
|
| 309 |
+
return JSONResponse({
|
| 310 |
+
"status": "ok",
|
| 311 |
+
"auction": auction,
|
| 312 |
+
"page_status": page_status,
|
| 313 |
+
"page_html_preview": page_preview
|
| 314 |
+
})
|
| 315 |
+
|
| 316 |
+
except Exception as e:
|
| 317 |
+
import traceback
|
| 318 |
+
return JSONResponse({"status": "error", "error": str(e), "traceback": traceback.format_exc()}, status_code=500)
|
| 319 |
+
|
| 320 |
+
|
| 321 |
@app.get("/health")
|
| 322 |
def health():
|
| 323 |
return {"status": "ok", "message": "API running"}
|