Spaces:
Runtime error
Runtime error
Niv Sardi
commited on
Commit
·
f7e5bce
1
Parent(s):
304ab5e
bugfix: correctly position logos and fix selenium code
Browse files- crawler/imtool.py +19 -13
- crawler/screenshot.py +3 -2
crawler/imtool.py
CHANGED
|
@@ -76,9 +76,10 @@ def crop(fn, logos):
|
|
| 76 |
im = cv2.imread(fn)
|
| 77 |
|
| 78 |
(h, w, c) = im.shape
|
|
|
|
| 79 |
(tx, ty)= (
|
| 80 |
-
math.ceil(w/(
|
| 81 |
-
math.ceil(h/(
|
| 82 |
)
|
| 83 |
|
| 84 |
print('shape', basename, tx, ty, w, h, logos)
|
|
@@ -86,18 +87,21 @@ def crop(fn, logos):
|
|
| 86 |
for y in range(ty):
|
| 87 |
color = (0,x*(255/tx),y*(255/ty))
|
| 88 |
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
tw
|
| 94 |
-
|
| 95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
|
| 97 |
start = floor_point(f.x, f.y)
|
| 98 |
end = floor_point(f.x + f.w, f.y + f.h)
|
| 99 |
|
| 100 |
-
print(x, y, start, end, logos)
|
| 101 |
im = cv2.rectangle(im, start, end, color, 10)
|
| 102 |
li = []
|
| 103 |
for l in logos:
|
|
@@ -144,9 +148,9 @@ def crop(fn, logos):
|
|
| 144 |
with open(txt_name, 'w') as f:
|
| 145 |
for p in li:
|
| 146 |
print(p)
|
| 147 |
-
|
| 148 |
-
floor_point(p.x, p.y),
|
| 149 |
-
floor_point(p.x + p.w, p.y + p.h),
|
| 150 |
c,
|
| 151 |
5)
|
| 152 |
cx = p.w/2 + p.x
|
|
@@ -155,6 +159,8 @@ def crop(fn, logos):
|
|
| 155 |
a = f"{basename} {cx/TILE_SIZE} {cy/TILE_SIZE} {p.w/TILE_SIZE} {p.h/TILE_SIZE}"
|
| 156 |
f.write(a)
|
| 157 |
print(a)
|
|
|
|
|
|
|
| 158 |
cv2.imwrite(f'{debug_out}/{basename}.debug.png', im)
|
| 159 |
|
| 160 |
if __name__ == '__main__':
|
|
|
|
| 76 |
im = cv2.imread(fn)
|
| 77 |
|
| 78 |
(h, w, c) = im.shape
|
| 79 |
+
(tw, th) = (min(w, TILE_SIZE), min(h, TILE_SIZE))
|
| 80 |
(tx, ty)= (
|
| 81 |
+
math.ceil(w/(tw*TILE_OVERLAP)),
|
| 82 |
+
math.ceil(h/(th*TILE_OVERLAP))
|
| 83 |
)
|
| 84 |
|
| 85 |
print('shape', basename, tx, ty, w, h, logos)
|
|
|
|
| 87 |
for y in range(ty):
|
| 88 |
color = (0,x*(255/tx),y*(255/ty))
|
| 89 |
|
| 90 |
+
|
| 91 |
+
if tx < 2:
|
| 92 |
+
xs = 0
|
| 93 |
+
else:
|
| 94 |
+
xs = (w - tw)*x/(tx - 1)
|
| 95 |
+
if ty < 2:
|
| 96 |
+
ys = 0
|
| 97 |
+
else:
|
| 98 |
+
ys = (h - th)*y/(ty - 1)
|
| 99 |
+
|
| 100 |
+
f = BoundingBox(xs, ys, tw, th)
|
| 101 |
|
| 102 |
start = floor_point(f.x, f.y)
|
| 103 |
end = floor_point(f.x + f.w, f.y + f.h)
|
| 104 |
|
|
|
|
| 105 |
im = cv2.rectangle(im, start, end, color, 10)
|
| 106 |
li = []
|
| 107 |
for l in logos:
|
|
|
|
| 148 |
with open(txt_name, 'w') as f:
|
| 149 |
for p in li:
|
| 150 |
print(p)
|
| 151 |
+
dim = cv2.rectangle(nim,
|
| 152 |
+
floor_point(p.x - p.w/2, p.y - p.h/2),
|
| 153 |
+
floor_point(p.x + p.w/2, p.y + p.h/2),
|
| 154 |
c,
|
| 155 |
5)
|
| 156 |
cx = p.w/2 + p.x
|
|
|
|
| 159 |
a = f"{basename} {cx/TILE_SIZE} {cy/TILE_SIZE} {p.w/TILE_SIZE} {p.h/TILE_SIZE}"
|
| 160 |
f.write(a)
|
| 161 |
print(a)
|
| 162 |
+
cv2.imwrite(f'{debug_out}/{basename}{x}{y}.debug.png', dim)
|
| 163 |
+
|
| 164 |
cv2.imwrite(f'{debug_out}/{basename}.debug.png', im)
|
| 165 |
|
| 166 |
if __name__ == '__main__':
|
crawler/screenshot.py
CHANGED
|
@@ -17,11 +17,12 @@ options.add_argument("--window-size=1920x8000")
|
|
| 17 |
def coord_to_point(c):
|
| 18 |
x = math.floor(c['x'] + c['width']/2)
|
| 19 |
y = math.floor(c['y'] + c['height']/2)
|
| 20 |
-
return f"{x} {y} {math.
|
| 21 |
|
| 22 |
driver = webdriver.Firefox(options=options)
|
| 23 |
def sc_entity(e: Entity):
|
| 24 |
print(e)
|
|
|
|
| 25 |
driver.get(e.url)
|
| 26 |
driver.save_screenshot(f"{e.DATA_PATH}/{e.bco}.png")
|
| 27 |
driver.save_full_page_screenshot(f"{e.DATA_PATH}/{e.bco}.full.png")
|
|
@@ -29,7 +30,7 @@ def sc_entity(e: Entity):
|
|
| 29 |
logos = driver.find_elements(By.CSS_SELECTOR, selectors.logo)
|
| 30 |
with open(f"{e.DATA_PATH}/{e.bco}.full.txt", 'w') as f:
|
| 31 |
for i in logos:
|
| 32 |
-
f.write(f"{e.bco} {coord_to_point(i.rect)}")
|
| 33 |
|
| 34 |
if __name__ == '__main__':
|
| 35 |
sc_entity(Entity.from_dict({'url': 'http://www.bbva.com.ar', 'bco': 'debug'}))
|
|
|
|
| 17 |
def coord_to_point(c):
|
| 18 |
x = math.floor(c['x'] + c['width']/2)
|
| 19 |
y = math.floor(c['y'] + c['height']/2)
|
| 20 |
+
return f"{x} {y} {math.ceil(c['width'])} {math.ceil(c['height'])}"
|
| 21 |
|
| 22 |
driver = webdriver.Firefox(options=options)
|
| 23 |
def sc_entity(e: Entity):
|
| 24 |
print(e)
|
| 25 |
+
driver.implicitly_wait(10)
|
| 26 |
driver.get(e.url)
|
| 27 |
driver.save_screenshot(f"{e.DATA_PATH}/{e.bco}.png")
|
| 28 |
driver.save_full_page_screenshot(f"{e.DATA_PATH}/{e.bco}.full.png")
|
|
|
|
| 30 |
logos = driver.find_elements(By.CSS_SELECTOR, selectors.logo)
|
| 31 |
with open(f"{e.DATA_PATH}/{e.bco}.full.txt", 'w') as f:
|
| 32 |
for i in logos:
|
| 33 |
+
f.write(f"{e.bco} {coord_to_point(i.rect)}\n")
|
| 34 |
|
| 35 |
if __name__ == '__main__':
|
| 36 |
sc_entity(Entity.from_dict({'url': 'http://www.bbva.com.ar', 'bco': 'debug'}))
|