Spaces:
Runtime error
Runtime error
Niv Sardi
commited on
Commit
·
bbf5506
1
Parent(s):
fac6e9b
python/get_entities: moar asserts and checks
Browse files- python/get_entities.py +5 -2
python/get_entities.py
CHANGED
|
@@ -24,21 +24,24 @@ with open(f'{defaults.MAIN_CSV_PATH}.tmp', 'w', newline='') as csvfile:
|
|
| 24 |
|
| 25 |
bar = ChargingBar('get entities', max=len(options))
|
| 26 |
for o in options[1:]:
|
|
|
|
| 27 |
def get_bco():
|
| 28 |
(name, bco)= (o.text, o.attrs['value'])
|
| 29 |
page = requests.post(URL, data={'bco': bco})
|
| 30 |
soup = BeautifulSoup(page.content, 'html.parser')
|
|
|
|
| 31 |
try:
|
| 32 |
img = soup.select_one(selectors.logosbancos).attrs['src']
|
| 33 |
img = img.replace('../', 'https://www.bcra.gob.ar/')
|
| 34 |
fn = f"{defaults.LOGOS_DATA_PATH}/{bco}.0.png"
|
| 35 |
web.get_img_logo(img, fn)
|
| 36 |
except AttributeError as err:
|
| 37 |
-
print('img
|
| 38 |
img = None
|
| 39 |
|
| 40 |
a = soup.select_one(selectors.entity_http)
|
| 41 |
try:
|
|
|
|
| 42 |
a = a.attrs['href']
|
| 43 |
except AttributeError:
|
| 44 |
a = soup.select_one(selectors.entity_mailto)
|
|
@@ -54,7 +57,7 @@ with open(f'{defaults.MAIN_CSV_PATH}.tmp', 'w', newline='') as csvfile:
|
|
| 54 |
try:
|
| 55 |
get_bco()
|
| 56 |
except Exception as e:
|
| 57 |
-
print(f'Error processing: {
|
| 58 |
|
| 59 |
i+=1
|
| 60 |
bar.next()
|
|
|
|
| 24 |
|
| 25 |
bar = ChargingBar('get entities', max=len(options))
|
| 26 |
for o in options[1:]:
|
| 27 |
+
assert(o)
|
| 28 |
def get_bco():
|
| 29 |
(name, bco)= (o.text, o.attrs['value'])
|
| 30 |
page = requests.post(URL, data={'bco': bco})
|
| 31 |
soup = BeautifulSoup(page.content, 'html.parser')
|
| 32 |
+
img = None
|
| 33 |
try:
|
| 34 |
img = soup.select_one(selectors.logosbancos).attrs['src']
|
| 35 |
img = img.replace('../', 'https://www.bcra.gob.ar/')
|
| 36 |
fn = f"{defaults.LOGOS_DATA_PATH}/{bco}.0.png"
|
| 37 |
web.get_img_logo(img, fn)
|
| 38 |
except AttributeError as err:
|
| 39 |
+
print(f'couldnt extract image from {img}: {err}')
|
| 40 |
img = None
|
| 41 |
|
| 42 |
a = soup.select_one(selectors.entity_http)
|
| 43 |
try:
|
| 44 |
+
assert(a)
|
| 45 |
a = a.attrs['href']
|
| 46 |
except AttributeError:
|
| 47 |
a = soup.select_one(selectors.entity_mailto)
|
|
|
|
| 57 |
try:
|
| 58 |
get_bco()
|
| 59 |
except Exception as e:
|
| 60 |
+
print(f'Error processing: {o.url}')
|
| 61 |
|
| 62 |
i+=1
|
| 63 |
bar.next()
|