Spaces:
Paused
Paused
sohamw03
commited on
Commit
·
b6e302a
1
Parent(s):
e8287ed
fix: scraper
Browse files- backend/scraper.py +2 -0
backend/scraper.py
CHANGED
|
@@ -274,6 +274,8 @@ class CrawlForAIScraper:
|
|
| 274 |
# Remove units from width and height: get start of the entity till the first non-digit character
|
| 275 |
width = "".join([i for i in img.get("width", "0") if i.isdigit() or i == "."])
|
| 276 |
height = "".join([i for i in img.get("height", "0") if i.isdigit() or i == "."])
|
|
|
|
|
|
|
| 277 |
width, height = float(width), float(height)
|
| 278 |
if width > 300 and height > 300 and "pixel" not in src and "icon" not in src:
|
| 279 |
images.append((src, width, height))
|
|
|
|
| 274 |
# Remove units from width and height: get start of the entity till the first non-digit character
|
| 275 |
width = "".join([i for i in img.get("width", "0") if i.isdigit() or i == "."])
|
| 276 |
height = "".join([i for i in img.get("height", "0") if i.isdigit() or i == "."])
|
| 277 |
+
if width == '' or height == '':
|
| 278 |
+
continue
|
| 279 |
width, height = float(width), float(height)
|
| 280 |
if width > 300 and height > 300 and "pixel" not in src and "icon" not in src:
|
| 281 |
images.append((src, width, height))
|