| import pandas as pd | |
| import os | |
| file1 = 'SnakeCLEF2023-TrainMetadata-iNat.csv' | |
| root = '/data1/dataset/SnakeCLEF2024/' | |
| filehmp = 'SnakeCLEF2023-TrainMetadata-HM.csv' | |
| df1 = pd.read_csv(file1) | |
| path1 = 'SnakeCLEF2023-large_size/' | |
| df1['image_path'] = path1 + df1['image_path'] | |
| df2 = pd.read_csv(filehmp) | |
| df_full = pd.concat([df1, df2],axis=0, ignore_index=True) | |
| df_full['endemic'] = df_full['endemic'].astype(bool) | |
| df_full['class_id'] = df_full['class_id'].astype(int) | |
| for col in df_full.columns: | |
| if col not in ['endemic', 'class_id']: | |
| df_full[col] = df_full[col].astype(str) | |
| image_exists = df_full['image_path'].apply(lambda x: os.path.exists(os.path.join(root, x))) | |
| df_full = df_full[image_exists].reset_index(drop=True) | |
| df_full.to_csv('train_full.csv', index=False) | |
| print('suceess') |