Spaces:
Runtime error
Runtime error
| # -*- coding: utf-8 -*- | |
| """SummarizingData.ipynb | |
| Automatically generated by Colaboratory. | |
| Original file is located at | |
| https://colab.research.google.com/drive/1Wo7aUHTjFTRVpiK4efjRHI2gsA6fRip5 | |
| """ | |
| # Import pandas | |
| import pandas as pd | |
| # Use pandas to read in recent_grads_url | |
| recent_grads = pd.read_csv("/content/recent_grads.csv") | |
| # Print the shape | |
| print(recent_grads.shape) | |
| from google.colab import drive | |
| drive.mount('/content/drive') | |
| # Print .dtypes | |
| print(recent_grads.dtypes) | |
| # Output summary statistics | |
| print(recent_grads.describe()) | |
| # Exclude data of type object | |
| print(recent_grads.describe(exclude=["object"])) | |
| # Names of the columns we're searching for missing values | |
| columns = ['median', 'p25th', 'p75th'] | |
| # Take a look at the dtypes | |
| print(recent_grads[columns].dtypes) | |
| # Find how missing values are represented | |
| print(recent_grads["median"].unique()) | |
| # Replace missing values with NaN | |
| for column in columns: | |
| recent_grads.loc[recent_grads[column] == 'UN', column] = np.nan | |
| import numpy as np | |
| import pandas as pd | |
| # Assuming 'recent_grads' is your DataFrame and 'columns' is a list of columns needing correction | |
| # Replace missing values with NaN | |
| for column in columns: | |
| recent_grads.loc[recent_grads[column] == 'UN', column] = np.nan | |
| # Select sharewomen column | |
| sw_col = recent_grads['sharewomen'] | |
| # Output first five rows | |
| print(sw_col.head()) | |
| # Import numpy | |
| import numpy as np | |
| # Use max to output maximum values | |
| max_sw = recent_grads['sharewomen'].max() | |
| # Print column max | |
| print(max_sw) | |
| # Output the row containing the maximum percentage of women | |
| #print(sw_col) | |
| print(recent_grads[(recent_grads['sharewomen']==max_sw)]) | |
| # Convert to numpy array | |
| import numpy as np | |
| recent_grads_np=np.array(recent_grads[['unemployed', 'low_wage_jobs']]) | |
| # Print the type of recent_grads_np | |
| print(type(recent_grads_np)) | |
| print(np.corrcoef(recent_grads_np[:,0], recent_grads_np[:,1])) |