File size: 6,950 Bytes
de15ddc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
--- Cell 1 ---
SIMMAX.ipynb

--- Cell 2 ---
# Paleotemperatures with SIMMAX 

--- Cell 3 ---
from pangaeapy import PanDataSet
import math
import sys
import pandas as pd
analogDS = PanDataSet('10.1594/PANGAEA.77352')
sampleDS=PanDataSet('10.1594/PANGAEA.55156')
#The annual mean
#levitus=pd.read_csv('woa13_decav_t00mn01v2.csv',sep=';')
#summer
levitus=pd.read_csv('woa13_decav_t15mn01.csv',sep=';')

#Compare with:
#http://discovery.ucl.ac.uk/101363/1/2002PA000774.pdf

--- Cell 4 ---
def getLevitusTemp(lat, lon, lev):
    depth=30
    t=0
    #select the values for the upper water column
    temp=lev[['0','5','10','15','20','25','30','35','40','45','50']][(lev['LATITUDE']== (math.floor(lat)+0.5)) & (lev['LONGITUDE']== (math.floor(lon)+0.5))]
    #delete empty cells
    temp=temp.dropna(axis=1).to_dict(orient='list')
    #cast dict keys from string to int
    temp={int(key): value for key, value in temp.items()} 
    temp_keys=list(temp.keys())
    #find the closest available water depth value
    closest=min(temp_keys, key=lambda x:abs(x-depth))
    if len(temp[closest])>0:
        t=temp[closest][0]
        if t==None:
            print(str(lat)+' x '+str(lon))
            t=0
    else:
        print(str(lat)+' + '+str(lon))
        t=0
    return t
analogDS.data['Temperature']=analogDS.data.apply(lambda x: getLevitusTemp(x['Latitude'],x['Longitude'], levitus), axis=1)

--- Cell 5 ---
sampleDS.data[(sampleDS.data['Event']=='GIK16458-1')].head()

--- Cell 6 ---
#Sometimes the dataset also contains data from different depths of one event, we simplify the procedure here a bit and remove event duplicates e.g. deeper samples
sampleDS.data.drop_duplicates(subset='Event', keep='first', inplace=True)
sampleDS.data[(sampleDS.data['Event']=='GIK16458-1')].head()

--- Cell 7 ---
def getDistance(row):
    
    R = 6372800  # Earth radius in meters
    lat1, lon1 = row['Latitude1'], row['Longitude1']
    lat2, lon2 = row['Latitude2'], row['Longitude2']
    
    phi1, phi2 = math.radians(lat1), math.radians(lat2) 
    dphi       = math.radians(lat2 - lat1)
    dlambda    = math.radians(lon2 - lon1)
    
    a = math.sin(dphi/2)**2 + \
        math.cos(phi1)*math.cos(phi2)*math.sin(dlambda/2)**2
    
    return 2*R*math.atan2(math.sqrt(a), math.sqrt(1 - a))/1000
#Remove duplicates

#Merge to receive a sampl x analog matrix
Distances = pd.merge(sampleDS.data[['Event','Latitude','Longitude', 'Depth']].assign(k=1), analogDS.data[['Event','Latitude','Longitude','Temperature']].assign(k=1), on='k', suffixes=('1', '2')).drop('k', axis=1)
Distances['Event1']=Distances['Event1']+'_'+Distances['Depth'].map(str)
Distances['Distance']=Distances.apply(getDistance, axis=1)
#But we need to avoid to run into a division by zero trap, preperae by the simmean algo which will snap if distance is zero: (sj/dj)
#Therefore we need to replace zero distances by a very, very low distance e.g. 0.000000001 m or so
Distances.loc[Distances['Distance']==0,'Distance']=0.00000000001
Distances.set_index(['Event1', 'Event2'], inplace=True)  

--- Cell 8 ---
Distances.head()  

--- Cell 9 ---
#Forams used by Pflaumann et. al in SIMMAX28
#G. mentum = Globorotalia cultrata and tumida
foramCols=[]
foramParams=['G. bulloides','G. calida','G. falconensis','G. quinqueloba','G. rubescens','G. digitata','G. aequilateralis',
             'G. conglobatus','G. ruber p','G. ruber w', 'G. tenellus','G. trilobus tril','G. trilobus sac','O. universa',
             'S. dehiscens','G. crassaformis','G. mentum', 'G. hirsuta', 'G. inflata', 'G. scitula','G. truncatulinoides',
             'N. dutertrei','N. pachyderma s','P/D int','G. glutinata','P. obliquiloculata']

for fP in foramParams:
    if fP in sampleDS.data.columns:
        foramCols.append(fP)

--- Cell 10 ---
#Lets do the SIMMAX
import numpy as np
#sum of squared percentages 
sampleDS.data['SQRTSUM']=np.sqrt((sampleDS.data[foramCols]**2).sum(axis=1))
analogDS.data['SQRTSUM']=np.sqrt((analogDS.data[foramCols]**2).sum(axis=1))
sampleIdx=sampleDS.data[foramCols].div(sampleDS.data['SQRTSUM'], axis=0)
analogIdx=analogDS.data[foramCols].div(analogDS.data['SQRTSUM'], axis=0)
sampleIdx['Event']=sampleDS.data['Event']+'_'+sampleDS.data['Depth'].map(str)
sampleIdx.set_index('Event', inplace=True)
analogIdx['Event']=analogDS.data['Event']
analogIdx.set_index('Event', inplace=True)
SimIndex = pd.DataFrame(
    (analogIdx[foramCols].values * sampleIdx[foramCols].values[:, None]).reshape(-1, analogIdx.shape[1]),
    pd.MultiIndex.from_product([sampleIdx.index, analogIdx.index]),
   sampleIdx.columns
)
Similars=pd.DataFrame(SimIndex.sum(axis=1), columns=['Similarity'])
Similars['Distance']=Distances['Distance']
Similars['Latitude']=Distances['Latitude1']
Similars['Longitude']=Distances['Longitude1']
Similars['Temperature']=Distances['Temperature']

--- Cell 11 ---

#s=Similars.loc['PS1231-2'].sort_values(['Similarity'],ascending=False)
#s[s['Similarity']>=0.999]
s=Similars[Similars['Similarity']>=0.999]

--- Cell 12 ---
#Original SIMMAX is using those values with similarity index >0.79 only
#mostSimilars=Similars.loc[(Similars['Similarity']>=0.79)]
#The revised SIMMAX is using the 10 top most similars with sim inded >0.9
mostSimilars=Similars[Similars['Similarity']>=0.9].sort_values(by='Similarity',ascending=False).groupby(level=0).head(10).sort_index(level=0,sort_remaining=False)
mostSimilars.index.names=['sampleEvent', 'analogEvent']  
mostSimilars

--- Cell 13 ---
analogDS.data[analogDS.data['Event'].isin(['V27-23','PS1707-1','RC13-275','RC11-79'])]

--- Cell 14 ---
#Paleotemperature calculations following Pflaumann's Formula (3)
PaleoTemp=pd.DataFrame()
PaleoTemp['TEMP']=((mostSimilars['Temperature']*mostSimilars['Similarity']/ mostSimilars['Distance']).groupby(level=0).sum())/((mostSimilars['Similarity']/mostSimilars['Distance']).groupby(level=0).sum())

PaleoTemp['LAT']=mostSimilars.groupby(level=0)['Latitude'].max()
PaleoTemp['LON']=mostSimilars.groupby(level=0)['Longitude'].max()

--- Cell 15 ---
import cartopy.crs as ccrs
import cartopy.feature as feat
import matplotlib.pyplot as plt
proj=ccrs.LambertCylindrical()
plt.figure(figsize=(10,10),dpi=200)
ax = plt.axes(projection=proj)
land_50m = feat.NaturalEarthFeature('physical', 'land', '50m',edgecolor='grey',facecolor=feat.COLORS['land'])

#PaleoTemp = PaleoTemp[np.isfinite(PaleoTemp['TEMP'])]
#print(PaleoTemp)

ax.tricontourf(PaleoTemp['LON'],PaleoTemp['LAT'], PaleoTemp['TEMP'],levels=[1,1.5,2,2.5,3.5,4,5,6,8,12,16,20,22,24,26,28],cmap = "rainbow",transform=ccrs.Geodetic())
ax.add_feature(land_50m)
ax.scatter(PaleoTemp['LON'],PaleoTemp['LAT'],label=None, alpha=0.5, s=1,cmap = "rainbow",transform=ccrs.Geodetic())

for i, point in PaleoTemp.iterrows():
        ax.text(point['LON'], point['LAT'], str(point['TEMP'].round(1)),fontsize=7, transform=ccrs.Geodetic())

--- Cell 16 ---
## References:

--- Cell 17 ---
print(analogDS.citation)
print()
print(sampleDS.citation)

--- Cell 18 ---