Spaces:
Sleeping
Sleeping
Update scrape_3gpp.py
Browse files- scrape_3gpp.py +17 -3
scrape_3gpp.py
CHANGED
|
@@ -428,9 +428,23 @@ def extractionPrincipale(url, excel_file=None, status_list=None, progress=gr.Pro
|
|
| 428 |
|
| 429 |
# After processing all files and directories
|
| 430 |
# Read the guide.xlsx file into a DataFrame to map 'TDoc' to 'Source'
|
| 431 |
-
|
| 432 |
-
|
| 433 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 434 |
tdoc_source_map = {row['TDoc']: row['Source'] for index, row in guide_df.iterrows()}
|
| 435 |
tdoc_status_map = {row['TDoc']: row['TDoc Status'] for index, row in guide_df.iterrows()}
|
| 436 |
# Update the 'Source' in your data based on matching 'Nom du fichier' with 'TDoc'
|
|
|
|
| 428 |
|
| 429 |
# After processing all files and directories
|
| 430 |
# Read the guide.xlsx file into a DataFrame to map 'TDoc' to 'Source'
|
| 431 |
+
guide_df = None
|
| 432 |
+
|
| 433 |
+
# Attempt to load the guide.xlsx file if it exists
|
| 434 |
+
guide_file_path = 'guide.xlsx'
|
| 435 |
+
if os.path.exists(guide_file_path):
|
| 436 |
+
guide_df = pd.read_excel(guide_file_path, usecols=['Source', 'TDoc', 'TDoc Status'])
|
| 437 |
+
else:
|
| 438 |
+
print(f"Warning: {guide_file_path} not found.")
|
| 439 |
+
|
| 440 |
+
# Proceed with the rest of the function, ensuring guide_df is checked before use
|
| 441 |
+
if guide_df is not None:
|
| 442 |
+
tdoc_source_map = {row['TDoc']: row['Source'] for index, row in guide_df.iterrows()}
|
| 443 |
+
# Use tdoc_source_map as needed
|
| 444 |
+
else:
|
| 445 |
+
print("Error: guide_df is not initialized. Exiting function.")
|
| 446 |
+
return
|
| 447 |
+
|
| 448 |
tdoc_source_map = {row['TDoc']: row['Source'] for index, row in guide_df.iterrows()}
|
| 449 |
tdoc_status_map = {row['TDoc']: row['TDoc Status'] for index, row in guide_df.iterrows()}
|
| 450 |
# Update the 'Source' in your data based on matching 'Nom du fichier' with 'TDoc'
|