Update app.py
Browse files
app.py
CHANGED
|
@@ -15,7 +15,9 @@ api=HfApi(token="")
|
|
| 15 |
filename="urls"
|
| 16 |
filename2="pages"
|
| 17 |
|
| 18 |
-
def init():
|
|
|
|
|
|
|
| 19 |
r = requests.get(f'{save_data}crawl/{filename}.json')
|
| 20 |
print(f'status code main:: {r.status_code}')
|
| 21 |
if r.status_code==200:
|
|
@@ -335,14 +337,22 @@ def sitemap(url,file_state,level):
|
|
| 335 |
print (e)
|
| 336 |
except Exception as e:
|
| 337 |
print (e)
|
| 338 |
-
|
| 339 |
url_front=[]
|
|
|
|
| 340 |
for ea_link in link2['TREE']:
|
| 341 |
url_list=ea_link['URL'].split("/")
|
| 342 |
url_front.append("".join(x for x in url_list[:3]))
|
|
|
|
| 343 |
print(f'URL_FRONT:: {url_front}')
|
| 344 |
#url_key=sort
|
| 345 |
-
uri_key
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 346 |
######## Save Database ########
|
| 347 |
uid=uuid.uuid4()
|
| 348 |
#for ea in list(uri_key.keys()):
|
|
|
|
| 15 |
filename="urls"
|
| 16 |
filename2="pages"
|
| 17 |
|
| 18 |
+
def init(filename=None):
|
| 19 |
+
if filename==None:
|
| 20 |
+
filename=filename
|
| 21 |
r = requests.get(f'{save_data}crawl/{filename}.json')
|
| 22 |
print(f'status code main:: {r.status_code}')
|
| 23 |
if r.status_code==200:
|
|
|
|
| 337 |
print (e)
|
| 338 |
except Exception as e:
|
| 339 |
print (e)
|
| 340 |
+
'''url_page=[]
|
| 341 |
url_front=[]
|
| 342 |
+
url_json=[]
|
| 343 |
for ea_link in link2['TREE']:
|
| 344 |
url_list=ea_link['URL'].split("/")
|
| 345 |
url_front.append("".join(x for x in url_list[:3]))
|
| 346 |
+
url_page.append("/".join(z for z in url_list[3:]))
|
| 347 |
print(f'URL_FRONT:: {url_front}')
|
| 348 |
#url_key=sort
|
| 349 |
+
for each_link in uri_key.keys():
|
| 350 |
+
out_file=init(f'{each_link}.json')
|
| 351 |
+
|
| 352 |
+
'''
|
| 353 |
+
|
| 354 |
+
uri_key=sort_doc(link2['TREE'],file_state,8)
|
| 355 |
+
|
| 356 |
######## Save Database ########
|
| 357 |
uid=uuid.uuid4()
|
| 358 |
#for ea in list(uri_key.keys()):
|