Spaces:
Sleeping
Sleeping
| --- Cell 1 --- | |
| SIMMAX.ipynb | |
| --- Cell 2 --- | |
| # Paleotemperatures with SIMMAX | |
| --- Cell 3 --- | |
| from pangaeapy import PanDataSet | |
| import math | |
| import sys | |
| import pandas as pd | |
| analogDS = PanDataSet('10.1594/PANGAEA.77352') | |
| sampleDS=PanDataSet('10.1594/PANGAEA.55156') | |
| #The annual mean | |
| #levitus=pd.read_csv('woa13_decav_t00mn01v2.csv',sep=';') | |
| #summer | |
| levitus=pd.read_csv('woa13_decav_t15mn01.csv',sep=';') | |
| #Compare with: | |
| #http://discovery.ucl.ac.uk/101363/1/2002PA000774.pdf | |
| --- Cell 4 --- | |
| def getLevitusTemp(lat, lon, lev): | |
| depth=30 | |
| t=0 | |
| #select the values for the upper water column | |
| temp=lev[['0','5','10','15','20','25','30','35','40','45','50']][(lev['LATITUDE']== (math.floor(lat)+0.5)) & (lev['LONGITUDE']== (math.floor(lon)+0.5))] | |
| #delete empty cells | |
| temp=temp.dropna(axis=1).to_dict(orient='list') | |
| #cast dict keys from string to int | |
| temp={int(key): value for key, value in temp.items()} | |
| temp_keys=list(temp.keys()) | |
| #find the closest available water depth value | |
| closest=min(temp_keys, key=lambda x:abs(x-depth)) | |
| if len(temp[closest])>0: | |
| t=temp[closest][0] | |
| if t==None: | |
| print(str(lat)+' x '+str(lon)) | |
| t=0 | |
| else: | |
| print(str(lat)+' + '+str(lon)) | |
| t=0 | |
| return t | |
| analogDS.data['Temperature']=analogDS.data.apply(lambda x: getLevitusTemp(x['Latitude'],x['Longitude'], levitus), axis=1) | |
| --- Cell 5 --- | |
| sampleDS.data[(sampleDS.data['Event']=='GIK16458-1')].head() | |
| --- Cell 6 --- | |
| #Sometimes the dataset also contains data from different depths of one event, we simplify the procedure here a bit and remove event duplicates e.g. deeper samples | |
| sampleDS.data.drop_duplicates(subset='Event', keep='first', inplace=True) | |
| sampleDS.data[(sampleDS.data['Event']=='GIK16458-1')].head() | |
| --- Cell 7 --- | |
| def getDistance(row): | |
| R = 6372800 # Earth radius in meters | |
| lat1, lon1 = row['Latitude1'], row['Longitude1'] | |
| lat2, lon2 = row['Latitude2'], row['Longitude2'] | |
| phi1, phi2 = math.radians(lat1), math.radians(lat2) | |
| dphi = math.radians(lat2 - lat1) | |
| dlambda = math.radians(lon2 - lon1) | |
| a = math.sin(dphi/2)**2 + \ | |
| math.cos(phi1)*math.cos(phi2)*math.sin(dlambda/2)**2 | |
| return 2*R*math.atan2(math.sqrt(a), math.sqrt(1 - a))/1000 | |
| #Remove duplicates | |
| #Merge to receive a sampl x analog matrix | |
| Distances = pd.merge(sampleDS.data[['Event','Latitude','Longitude', 'Depth']].assign(k=1), analogDS.data[['Event','Latitude','Longitude','Temperature']].assign(k=1), on='k', suffixes=('1', '2')).drop('k', axis=1) | |
| Distances['Event1']=Distances['Event1']+'_'+Distances['Depth'].map(str) | |
| Distances['Distance']=Distances.apply(getDistance, axis=1) | |
| #But we need to avoid to run into a division by zero trap, preperae by the simmean algo which will snap if distance is zero: (sj/dj) | |
| #Therefore we need to replace zero distances by a very, very low distance e.g. 0.000000001 m or so | |
| Distances.loc[Distances['Distance']==0,'Distance']=0.00000000001 | |
| Distances.set_index(['Event1', 'Event2'], inplace=True) | |
| --- Cell 8 --- | |
| Distances.head() | |
| --- Cell 9 --- | |
| #Forams used by Pflaumann et. al in SIMMAX28 | |
| #G. mentum = Globorotalia cultrata and tumida | |
| foramCols=[] | |
| foramParams=['G. bulloides','G. calida','G. falconensis','G. quinqueloba','G. rubescens','G. digitata','G. aequilateralis', | |
| 'G. conglobatus','G. ruber p','G. ruber w', 'G. tenellus','G. trilobus tril','G. trilobus sac','O. universa', | |
| 'S. dehiscens','G. crassaformis','G. mentum', 'G. hirsuta', 'G. inflata', 'G. scitula','G. truncatulinoides', | |
| 'N. dutertrei','N. pachyderma s','P/D int','G. glutinata','P. obliquiloculata'] | |
| for fP in foramParams: | |
| if fP in sampleDS.data.columns: | |
| foramCols.append(fP) | |
| --- Cell 10 --- | |
| #Lets do the SIMMAX | |
| import numpy as np | |
| #sum of squared percentages | |
| sampleDS.data['SQRTSUM']=np.sqrt((sampleDS.data[foramCols]**2).sum(axis=1)) | |
| analogDS.data['SQRTSUM']=np.sqrt((analogDS.data[foramCols]**2).sum(axis=1)) | |
| sampleIdx=sampleDS.data[foramCols].div(sampleDS.data['SQRTSUM'], axis=0) | |
| analogIdx=analogDS.data[foramCols].div(analogDS.data['SQRTSUM'], axis=0) | |
| sampleIdx['Event']=sampleDS.data['Event']+'_'+sampleDS.data['Depth'].map(str) | |
| sampleIdx.set_index('Event', inplace=True) | |
| analogIdx['Event']=analogDS.data['Event'] | |
| analogIdx.set_index('Event', inplace=True) | |
| SimIndex = pd.DataFrame( | |
| (analogIdx[foramCols].values * sampleIdx[foramCols].values[:, None]).reshape(-1, analogIdx.shape[1]), | |
| pd.MultiIndex.from_product([sampleIdx.index, analogIdx.index]), | |
| sampleIdx.columns | |
| ) | |
| Similars=pd.DataFrame(SimIndex.sum(axis=1), columns=['Similarity']) | |
| Similars['Distance']=Distances['Distance'] | |
| Similars['Latitude']=Distances['Latitude1'] | |
| Similars['Longitude']=Distances['Longitude1'] | |
| Similars['Temperature']=Distances['Temperature'] | |
| --- Cell 11 --- | |
| #s=Similars.loc['PS1231-2'].sort_values(['Similarity'],ascending=False) | |
| #s[s['Similarity']>=0.999] | |
| s=Similars[Similars['Similarity']>=0.999] | |
| --- Cell 12 --- | |
| #Original SIMMAX is using those values with similarity index >0.79 only | |
| #mostSimilars=Similars.loc[(Similars['Similarity']>=0.79)] | |
| #The revised SIMMAX is using the 10 top most similars with sim inded >0.9 | |
| mostSimilars=Similars[Similars['Similarity']>=0.9].sort_values(by='Similarity',ascending=False).groupby(level=0).head(10).sort_index(level=0,sort_remaining=False) | |
| mostSimilars.index.names=['sampleEvent', 'analogEvent'] | |
| mostSimilars | |
| --- Cell 13 --- | |
| analogDS.data[analogDS.data['Event'].isin(['V27-23','PS1707-1','RC13-275','RC11-79'])] | |
| --- Cell 14 --- | |
| #Paleotemperature calculations following Pflaumann's Formula (3) | |
| PaleoTemp=pd.DataFrame() | |
| PaleoTemp['TEMP']=((mostSimilars['Temperature']*mostSimilars['Similarity']/ mostSimilars['Distance']).groupby(level=0).sum())/((mostSimilars['Similarity']/mostSimilars['Distance']).groupby(level=0).sum()) | |
| PaleoTemp['LAT']=mostSimilars.groupby(level=0)['Latitude'].max() | |
| PaleoTemp['LON']=mostSimilars.groupby(level=0)['Longitude'].max() | |
| --- Cell 15 --- | |
| import cartopy.crs as ccrs | |
| import cartopy.feature as feat | |
| import matplotlib.pyplot as plt | |
| proj=ccrs.LambertCylindrical() | |
| plt.figure(figsize=(10,10),dpi=200) | |
| ax = plt.axes(projection=proj) | |
| land_50m = feat.NaturalEarthFeature('physical', 'land', '50m',edgecolor='grey',facecolor=feat.COLORS['land']) | |
| #PaleoTemp = PaleoTemp[np.isfinite(PaleoTemp['TEMP'])] | |
| #print(PaleoTemp) | |
| ax.tricontourf(PaleoTemp['LON'],PaleoTemp['LAT'], PaleoTemp['TEMP'],levels=[1,1.5,2,2.5,3.5,4,5,6,8,12,16,20,22,24,26,28],cmap = "rainbow",transform=ccrs.Geodetic()) | |
| ax.add_feature(land_50m) | |
| ax.scatter(PaleoTemp['LON'],PaleoTemp['LAT'],label=None, alpha=0.5, s=1,cmap = "rainbow",transform=ccrs.Geodetic()) | |
| for i, point in PaleoTemp.iterrows(): | |
| ax.text(point['LON'], point['LAT'], str(point['TEMP'].round(1)),fontsize=7, transform=ccrs.Geodetic()) | |
| --- Cell 16 --- | |
| ## References: | |
| --- Cell 17 --- | |
| print(analogDS.citation) | |
| print() | |
| print(sampleDS.citation) | |
| --- Cell 18 --- | |