--- Cell 1 --- SIMMAX.ipynb --- Cell 2 --- # Paleotemperatures with SIMMAX --- Cell 3 --- from pangaeapy import PanDataSet import math import sys import pandas as pd analogDS = PanDataSet('10.1594/PANGAEA.77352') sampleDS=PanDataSet('10.1594/PANGAEA.55156') #The annual mean #levitus=pd.read_csv('woa13_decav_t00mn01v2.csv',sep=';') #summer levitus=pd.read_csv('woa13_decav_t15mn01.csv',sep=';') #Compare with: #http://discovery.ucl.ac.uk/101363/1/2002PA000774.pdf --- Cell 4 --- def getLevitusTemp(lat, lon, lev): depth=30 t=0 #select the values for the upper water column temp=lev[['0','5','10','15','20','25','30','35','40','45','50']][(lev['LATITUDE']== (math.floor(lat)+0.5)) & (lev['LONGITUDE']== (math.floor(lon)+0.5))] #delete empty cells temp=temp.dropna(axis=1).to_dict(orient='list') #cast dict keys from string to int temp={int(key): value for key, value in temp.items()} temp_keys=list(temp.keys()) #find the closest available water depth value closest=min(temp_keys, key=lambda x:abs(x-depth)) if len(temp[closest])>0: t=temp[closest][0] if t==None: print(str(lat)+' x '+str(lon)) t=0 else: print(str(lat)+' + '+str(lon)) t=0 return t analogDS.data['Temperature']=analogDS.data.apply(lambda x: getLevitusTemp(x['Latitude'],x['Longitude'], levitus), axis=1) --- Cell 5 --- sampleDS.data[(sampleDS.data['Event']=='GIK16458-1')].head() --- Cell 6 --- #Sometimes the dataset also contains data from different depths of one event, we simplify the procedure here a bit and remove event duplicates e.g. deeper samples sampleDS.data.drop_duplicates(subset='Event', keep='first', inplace=True) sampleDS.data[(sampleDS.data['Event']=='GIK16458-1')].head() --- Cell 7 --- def getDistance(row): R = 6372800 # Earth radius in meters lat1, lon1 = row['Latitude1'], row['Longitude1'] lat2, lon2 = row['Latitude2'], row['Longitude2'] phi1, phi2 = math.radians(lat1), math.radians(lat2) dphi = math.radians(lat2 - lat1) dlambda = math.radians(lon2 - lon1) a = math.sin(dphi/2)**2 + \ math.cos(phi1)*math.cos(phi2)*math.sin(dlambda/2)**2 return 2*R*math.atan2(math.sqrt(a), math.sqrt(1 - a))/1000 #Remove duplicates #Merge to receive a sampl x analog matrix Distances = pd.merge(sampleDS.data[['Event','Latitude','Longitude', 'Depth']].assign(k=1), analogDS.data[['Event','Latitude','Longitude','Temperature']].assign(k=1), on='k', suffixes=('1', '2')).drop('k', axis=1) Distances['Event1']=Distances['Event1']+'_'+Distances['Depth'].map(str) Distances['Distance']=Distances.apply(getDistance, axis=1) #But we need to avoid to run into a division by zero trap, preperae by the simmean algo which will snap if distance is zero: (sj/dj) #Therefore we need to replace zero distances by a very, very low distance e.g. 0.000000001 m or so Distances.loc[Distances['Distance']==0,'Distance']=0.00000000001 Distances.set_index(['Event1', 'Event2'], inplace=True) --- Cell 8 --- Distances.head() --- Cell 9 --- #Forams used by Pflaumann et. al in SIMMAX28 #G. mentum = Globorotalia cultrata and tumida foramCols=[] foramParams=['G. bulloides','G. calida','G. falconensis','G. quinqueloba','G. rubescens','G. digitata','G. aequilateralis', 'G. conglobatus','G. ruber p','G. ruber w', 'G. tenellus','G. trilobus tril','G. trilobus sac','O. universa', 'S. dehiscens','G. crassaformis','G. mentum', 'G. hirsuta', 'G. inflata', 'G. scitula','G. truncatulinoides', 'N. dutertrei','N. pachyderma s','P/D int','G. glutinata','P. obliquiloculata'] for fP in foramParams: if fP in sampleDS.data.columns: foramCols.append(fP) --- Cell 10 --- #Lets do the SIMMAX import numpy as np #sum of squared percentages sampleDS.data['SQRTSUM']=np.sqrt((sampleDS.data[foramCols]**2).sum(axis=1)) analogDS.data['SQRTSUM']=np.sqrt((analogDS.data[foramCols]**2).sum(axis=1)) sampleIdx=sampleDS.data[foramCols].div(sampleDS.data['SQRTSUM'], axis=0) analogIdx=analogDS.data[foramCols].div(analogDS.data['SQRTSUM'], axis=0) sampleIdx['Event']=sampleDS.data['Event']+'_'+sampleDS.data['Depth'].map(str) sampleIdx.set_index('Event', inplace=True) analogIdx['Event']=analogDS.data['Event'] analogIdx.set_index('Event', inplace=True) SimIndex = pd.DataFrame( (analogIdx[foramCols].values * sampleIdx[foramCols].values[:, None]).reshape(-1, analogIdx.shape[1]), pd.MultiIndex.from_product([sampleIdx.index, analogIdx.index]), sampleIdx.columns ) Similars=pd.DataFrame(SimIndex.sum(axis=1), columns=['Similarity']) Similars['Distance']=Distances['Distance'] Similars['Latitude']=Distances['Latitude1'] Similars['Longitude']=Distances['Longitude1'] Similars['Temperature']=Distances['Temperature'] --- Cell 11 --- #s=Similars.loc['PS1231-2'].sort_values(['Similarity'],ascending=False) #s[s['Similarity']>=0.999] s=Similars[Similars['Similarity']>=0.999] --- Cell 12 --- #Original SIMMAX is using those values with similarity index >0.79 only #mostSimilars=Similars.loc[(Similars['Similarity']>=0.79)] #The revised SIMMAX is using the 10 top most similars with sim inded >0.9 mostSimilars=Similars[Similars['Similarity']>=0.9].sort_values(by='Similarity',ascending=False).groupby(level=0).head(10).sort_index(level=0,sort_remaining=False) mostSimilars.index.names=['sampleEvent', 'analogEvent'] mostSimilars --- Cell 13 --- analogDS.data[analogDS.data['Event'].isin(['V27-23','PS1707-1','RC13-275','RC11-79'])] --- Cell 14 --- #Paleotemperature calculations following Pflaumann's Formula (3) PaleoTemp=pd.DataFrame() PaleoTemp['TEMP']=((mostSimilars['Temperature']*mostSimilars['Similarity']/ mostSimilars['Distance']).groupby(level=0).sum())/((mostSimilars['Similarity']/mostSimilars['Distance']).groupby(level=0).sum()) PaleoTemp['LAT']=mostSimilars.groupby(level=0)['Latitude'].max() PaleoTemp['LON']=mostSimilars.groupby(level=0)['Longitude'].max() --- Cell 15 --- import cartopy.crs as ccrs import cartopy.feature as feat import matplotlib.pyplot as plt proj=ccrs.LambertCylindrical() plt.figure(figsize=(10,10),dpi=200) ax = plt.axes(projection=proj) land_50m = feat.NaturalEarthFeature('physical', 'land', '50m',edgecolor='grey',facecolor=feat.COLORS['land']) #PaleoTemp = PaleoTemp[np.isfinite(PaleoTemp['TEMP'])] #print(PaleoTemp) ax.tricontourf(PaleoTemp['LON'],PaleoTemp['LAT'], PaleoTemp['TEMP'],levels=[1,1.5,2,2.5,3.5,4,5,6,8,12,16,20,22,24,26,28],cmap = "rainbow",transform=ccrs.Geodetic()) ax.add_feature(land_50m) ax.scatter(PaleoTemp['LON'],PaleoTemp['LAT'],label=None, alpha=0.5, s=1,cmap = "rainbow",transform=ccrs.Geodetic()) for i, point in PaleoTemp.iterrows(): ax.text(point['LON'], point['LAT'], str(point['TEMP'].round(1)),fontsize=7, transform=ccrs.Geodetic()) --- Cell 16 --- ## References: --- Cell 17 --- print(analogDS.citation) print() print(sampleDS.citation) --- Cell 18 ---