Spaces:
Sleeping
Sleeping
| ######################################## IMPORTING REQUIRED LIBRARIES #################################### | |
| import os | |
| import sys | |
| import pandas as pd | |
| sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
| data_folder = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data') | |
| from utilities import get_data, input_filter, clean_data, autogenerate_labels | |
| ################################################## INPUTS ################################################ | |
| # left_lat = 18.889833 | |
| # left_lon = 72.779844 | |
| # dist = 35 | |
| def data_sourcing(left_lat, left_lon, dist): | |
| lat, lon = input_filter(lat = left_lat, lon=left_lon) | |
| df = get_data(lat, lon, dist) | |
| df.to_csv(f'{data_folder}/MMR_DATA.csv', index=False) | |
| return df | |
| def data_clean_for_training(df): | |
| df = clean_data(df) | |
| df.to_csv(f'{data_folder}/MMR_DATA_CLEAN.csv', index=False) | |
| return df | |
| if __name__ == '__main__': | |
| # df = data_sourcing() ## testing the data sourcing endpoint | |
| # if df: | |
| # print("Data loaded successfully !!") | |
| # clean_df = data_clean_for_training(df) | |
| df = pd.read_csv(f'{data_folder}/MMR_DATA.csv') | |
| df = clean_data(df) | |
| labelled_df, embeddings_df = autogenerate_labels(df) | |
| labelled_df.to_csv(f'{data_folder}/MMR_DATA_CLEAN_LABELLED.csv', index=False) | |
| embeddings_df.to_csv(f'{data_folder}/MMR_CLEAN_EMBEDDINGS.csv', index=False) |