Spaces:

Almaatla
/

Standard_Intelligence_Dev

Sleeping

App Files Files Community

MaksG commited on Mar 26, 2024

Commit

9b3fe22

verified ·

1 Parent(s): 24c67cc

Update scrape_3gpp.py

Browse files

Files changed (1) hide show

scrape_3gpp.py +24 -25

scrape_3gpp.py CHANGED Viewed

@@ -426,33 +426,32 @@ def extractionPrincipale(url, excel_file=None, status_list=None, progress=gr.Pro
                     status = ""
                     data.append([url+ "/" + folder + '.zip', folder , category, title, source,status, contenu])
-                    # After processing all files and directories
-                    # Read the guide.xlsx file into a DataFrame to map 'TDoc' to 'Source'
-                    guide_df = None
-    # Attempt to load the guide.xlsx file if it exists
-                    guide_file_path = 'guide.xlsx'
-                    if os.path.exists(guide_file_path):
-                        guide_df = pd.read_excel(guide_file_path, usecols=['Source', 'TDoc', 'TDoc Status'])
                     else:
-                        print(f"Warning: {guide_file_path} not found.")
-                    # Proceed with the rest of the function, ensuring guide_df is checked before use
-                    if guide_df is not None:
-                        tdoc_source_map = {row['TDoc']: row['Source'] for index, row in guide_df.iterrows()}
-                        # Use tdoc_source_map as needed
-                    else:
-                        print("Error: guide_df is not initialized. Exiting function.")
-                        return
-                    tdoc_source_map = {row['TDoc']: row['Source'] for index, row in guide_df.iterrows()}
-                    tdoc_status_map = {row['TDoc']: row['TDoc Status'] for index, row in guide_df.iterrows()}
-                    # Update the 'Source' in your data based on matching 'Nom du fichier' with 'TDoc'
-                    for item in data:
-                        nom_du_fichier = item[1]  # Assuming 'Nom du fichier' is the first item in your data list
-                        if nom_du_fichier in tdoc_source_map:
-                            item[4] = tdoc_source_map[nom_du_fichier]  # Update the 'Source' field, assuming it's the fourth item
-                            item[5] = tdoc_status_map[nom_du_fichier]
                     processed_count += 1

                     status = ""
                     data.append([url+ "/" + folder + '.zip', folder , category, title, source,status, contenu])
+                    guide_file = 'guide.xlsx'
+                    if os.path.exists(guide_file):
+                        # If guide.xlsx exists, proceed with operations that require it
+                        try:
+                            guide_df = pd.read_excel(guide_file, usecols=['Source', 'TDoc', 'TDoc Status'])
+                            # Continue with the operations that require guide.xlsx
+                            # For example, reading the file, processing the data, etc.
+                            tdoc_source_map = {row['TDoc']: row['Source'] for index, row in guide_df.iterrows()}
+                            tdoc_status_map = {row['TDoc']: row['TDoc Status'] for index, row in guide_df.iterrows()}
+                            # Update the 'Source' in your data based on matching 'Nom du fichier' with 'TDoc'
+                            for item in data:
+                                nom_du_fichier = item[1]  # Assuming 'Nom du fichier' is the first item in your data list
+                                if nom_du_fichier in tdoc_source_map:
+                                    item[4] = tdoc_source_map[nom_du_fichier]  # Update the 'Source' field, assuming it's the fourth item
+                                    item[5] = tdoc_status_map[nom_du_fichier]
+                            # Your code that depends on guide.xlsx goes here
+                        except Exception as e:
+                            print(f"An error occurred while processing {guide_file}: {e}")
+                            # Handle any errors that arise during processing
                     else:
+                        print(f"File {guide_file} not found. Skipping operations that require this file.")
+                        # Since guide.xlsx is not found, skip the related operations
                     processed_count += 1