hatamo commited on
Commit
4e49379
·
1 Parent(s): 4ad5120

Added scarper debugingin

Browse files
Files changed (1) hide show
  1. code/app.py +65 -1
code/app.py CHANGED
@@ -182,8 +182,31 @@ async def validate_url(
182
  else:
183
  return JSONResponse({"error": "Unsupported platform"}, status_code=400)
184
 
 
 
 
185
  if not auction.get("image_urls"):
186
- return JSONResponse({"error": "No images"}, status_code=400)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
 
188
  # 2. Ile zdjęć
189
  total_available = len(auction["image_urls"])
@@ -254,6 +277,47 @@ async def validate_url(
254
  }, status_code=500)
255
 
256
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
  @app.get("/health")
258
  def health():
259
  return {"status": "ok", "message": "API running"}
 
182
  else:
183
  return JSONResponse({"error": "Unsupported platform"}, status_code=400)
184
 
185
+ print(f"🔍 DEBUG: Auction data: {auction}")
186
+ print(f"🔍 DEBUG: Image URLs: {auction.get('image_urls', [])}")
187
+
188
  if not auction.get("image_urls"):
189
+ # Try fetching page HTML as an additional debug aid (may differ from JS-rendered content)
190
+ try:
191
+ headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
192
+ page_resp = requests.get(url, headers=headers, timeout=10)
193
+ page_preview = page_resp.text[:2000]
194
+ page_status = page_resp.status_code
195
+ except Exception as e:
196
+ page_preview = None
197
+ page_status = str(e)
198
+
199
+ return JSONResponse({
200
+ "error": "No images found",
201
+ "debug": {
202
+ "url": url,
203
+ "auction_data": auction,
204
+ "has_image_urls_key": "image_urls" in auction,
205
+ "image_urls_value": auction.get("image_urls"),
206
+ "page_status": page_status,
207
+ "page_html_preview": page_preview
208
+ }
209
+ }, status_code=400)
210
 
211
  # 2. Ile zdjęć
212
  total_available = len(auction["image_urls"])
 
277
  }, status_code=500)
278
 
279
 
280
+ @app.post("/debug_scrape")
281
+ async def debug_scrape(url: str = Form(...)):
282
+ """Run scraper for a URL and return the raw auction dict and a small HTML preview.
283
+ This endpoint is for debugging only."""
284
+ try:
285
+ import requests
286
+ # Choose scraper
287
+ if "allegro.pl" in url:
288
+ from web_scraper_allegro import scrape_allegro_offer
289
+ auction = scrape_allegro_offer(url)
290
+ elif "olx.pl" in url:
291
+ from web_scraper_olx import scrape_olx_offer
292
+ auction = scrape_olx_offer(url)
293
+ elif "ebay." in url:
294
+ from web_scraper_ebay import scrape_ebay_offer
295
+ auction = scrape_ebay_offer(url)
296
+ else:
297
+ return JSONResponse({"error": "Unsupported platform"}, status_code=400)
298
+
299
+ # Try a simple GET to capture non-JS HTML
300
+ try:
301
+ headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
302
+ page_resp = requests.get(url, headers=headers, timeout=10)
303
+ page_preview = page_resp.text[:2000]
304
+ page_status = page_resp.status_code
305
+ except Exception as e:
306
+ page_preview = None
307
+ page_status = str(e)
308
+
309
+ return JSONResponse({
310
+ "status": "ok",
311
+ "auction": auction,
312
+ "page_status": page_status,
313
+ "page_html_preview": page_preview
314
+ })
315
+
316
+ except Exception as e:
317
+ import traceback
318
+ return JSONResponse({"status": "error", "error": str(e), "traceback": traceback.format_exc()}, status_code=500)
319
+
320
+
321
  @app.get("/health")
322
  def health():
323
  return {"status": "ok", "message": "API running"}