dabbu2000 commited on
Commit
b78af38
·
1 Parent(s): 4f585b7

Delete germanShepherdCatalog.py

Browse files
Files changed (1) hide show
  1. germanShepherdCatalog.py +0 -258
germanShepherdCatalog.py DELETED
@@ -1,258 +0,0 @@
1
- import streamlit as st
2
- import numpy as np
3
- import os
4
- import sys
5
- import urllib
6
- import math
7
-
8
- from utils import *
9
-
10
- class LoadCatalogue:
11
- def __init__(self, germanShepherdData='data/'):
12
-
13
- self.germanShepherdRunningData = os.popen('hostname').read().startswith('Abhinit-Sundar-HP-Desktop')
14
- self.germanShepherdRunningData = False
15
- if self.germanShepherdRunningData:
16
- self.germanShepherdData = germanShepherdData
17
-
18
- else:
19
- self.germanShepherdData = 'https://www.pedigreedatabase.com/german_shepherd_dog/search.html'
20
- self.germanShepherdDataLocal = 'data/'
21
-
22
- def germanShepherdLocalURL(self, germanShepherdFileInput, check_fullsize=False, fullsize=None):
23
- if self.germanShepherdRunningData:
24
- return germanShepherdFileInput
25
-
26
- germanShepherdLocalFilepath = os.path.join(self.germanShepherdDataLocal, os.path.basename(germanShepherdFileInput))
27
- if not os.path.exists(germanShepherdLocalFilepath) or check_fullsize:
28
-
29
- germanShepherdFilesizeLocal = 0
30
-
31
- if os.path.exists(germanShepherdLocalFilepath):
32
- germanShepherdFileInfo = os.stat(germanShepherdLocalFilepath)
33
- germanShepherdFilesizeLocal = germanShepherdFileInfo.st_size
34
-
35
- if fullsize != filesize_local:
36
- lab = "Downloading {:s}... ".format(filepath_local)
37
- if os.path.basename(file_in) == 'representations.npy':
38
- lab += 'This one may take a while (up to ~5 mins), please stand by!\nOnce downloaded subsequent runs will be fast'
39
-
40
- with st.spinner(lab):
41
- urllib.request.urlretrieve(file_in, filepath_local)
42
-
43
- return filepath_local
44
-
45
- # cache works on local version, but not when deployed to share.streamlit.io
46
- # due to small memory allowance. Do not use for now
47
- # @st.cache(persist=True, max_entries=1, allow_output_mutation=True, ttl=3600, hash_funcs={dict: lambda _: None})#
48
- def download_catalogue_files(self, include_extra_features=True, file_ext='.npy',
49
- extra_features=['mag', 'photometric_redshift',
50
- 'source_type'])
51
- self.extra_features = extra_features
52
- full_catalogue = {}
53
-
54
-
55
- # always load in ra and dec
56
- self.features_radec = ['ra', 'dec']
57
- for fstr in self.features_radec:
58
- self.get_local_or_url(os.path.join(self.data_loc, fstr+file_ext))
59
- if fstr =='ra':
60
- full_catalogue['ngals_tot'] = np.load(self.get_local_or_url(os.path.join(self.data_loc, fstr+file_ext)), mmap_mode='r').shape[0]
61
-
62
- # add in additional catalogue features of interest
63
- if include_extra_features:
64
- # download files to use later, as don't actually need this info yet
65
- for fstr in self.extra_features:
66
- self.get_local_or_url(os.path.join(self.data_loc, fstr+file_ext))
67
-
68
- return full_catalogue
69
-
70
-
71
- def load_catalogue_coordinates(self, include_extra_features=True,
72
- extra_features=['mag', 'photometric_redshift',
73
- 'source_type']):
74
- """
75
- Return dictionary containing galaxy catalogue information.
76
- Parameters
77
- ----------
78
- include_extra_features : bool
79
- Whether or not to include additional catalogue info beyond sky location
80
- extra_features : list of strings
81
- The extra features to include
82
- """
83
- # if not self.running_local:
84
- # st.write('Needs to retreive a few large files the first time you run it - please stand by!')
85
-
86
- self.extra_features = extra_features
87
- full_catalogue = {}
88
- file_type = '.npy'
89
-
90
- # always load in ra and dec
91
- self.features_radec = ['ra', 'dec']
92
- for fstr in self.features_radec:
93
- full_catalogue[fstr] = np.load(self.get_local_or_url(os.path.join(self.data_loc, fstr+file_type)))
94
-
95
- # add in additional catalogue features of interest
96
- if include_extra_features:
97
- # download files to use later, as don't actually need this info yet
98
- for fstr in self.extra_features:
99
- self.get_local_or_url(os.path.join(self.data_loc, fstr+file_type))
100
-
101
- return full_catalogue
102
-
103
- # cache works on local version, but not when deployed to share.streamlit.io
104
- # due to small memory allowance. Do not use for now
105
- # @st.cache(persist=True, max_entries=1, allow_output_mutation=True, ttl=3600, hash_funcs={dict: lambda _: None})# #(suppress_st_warning=True)
106
- def load_representations(self):
107
- """Return array containing galaxy image representations."""
108
- # Keep seperate from loading in catalogues, as when representation file starts to get large will need to add in chunked access
109
- representations = np.load(self.get_local_or_url(os.path.join(self.data_loc, 'representations.npy'), check_fullsize=True, fullsize=224000128))
110
-
111
- return representations
112
-
113
-
114
- class Catalogue:
115
- """Contains a variety of operations to perform on galaxy catalogue/representation pairs"""
116
- def __init__(self, full_catalogue, representations=None,
117
- data_loc='data/', file_ext='.npy'):
118
-
119
- self.full_catalogue = full_catalogue
120
- self.representations = representations
121
-
122
- self.data_loc = data_loc
123
- self.file_ext = '.npy'
124
- self.pixel_size = 0.262 / 3600 # arcsec to degrees
125
-
126
-
127
- def load_from_catalogue_indices(self, inds_load=None, include_extra_features=True,
128
- extra_features=['mag', 'photometric_redshift',
129
- 'source_type']):
130
- """
131
- Return dictionary containing galaxy catalogue information for desired indices.
132
- Parameters
133
- ----------
134
- inds_load : array of ints
135
- Indices to load from disk
136
- include_extra_features : bool
137
- Whether or not to include additional catalogue info beyond sky location
138
- extra_features : list of strings
139
- The extra features to include
140
- """
141
- source_type_dict = {0: 'DEV',
142
- 1: 'EXP',
143
- 2: 'REX',
144
- 3: 'SER'}
145
-
146
- if inds_load is None:
147
- inds_load=self.similar_inds
148
-
149
- self.extra_features = extra_features
150
- file_type = '.npy'
151
-
152
- similarity_catalogue = {}
153
-
154
- # always load in ra and dec
155
- self.features_radec = ['ra', 'dec']
156
- for fstr in self.features_radec:
157
- similarity_catalogue[fstr] = np.load(os.path.join(self.data_loc, fstr+file_type), mmap_mode='r')[inds_load]
158
-
159
- # option to add in additional catalogue features of interest
160
- if include_extra_features:
161
- for fstr in self.extra_features:
162
- similarity_catalogue[fstr] = np.load(os.path.join(self.data_loc, fstr+file_type), mmap_mode='r')[inds_load]
163
-
164
- if fstr=='source_type': # map from bytes to string
165
- similarity_catalogue[fstr] = np.array([source_type_dict[i] for i in similarity_catalogue[fstr]])
166
-
167
- return similarity_catalogue
168
-
169
-
170
- def search_catalogue(self, ra, dec, nnearest=1, far_distance_npix=10):
171
- """Return array index and ra dec of nearest galaxy to search point (query_ra, query_dec)"""
172
-
173
- self.search_ra = ra
174
- self.search_dec = dec
175
-
176
- # calculate angular seperation of all objects from query point
177
- # perform in chunks, as streamlit app does not have large memory
178
- # and concurrent users will crash the ram allocation
179
- min_sep = 1e9
180
- chunksize = 1000000
181
- nchunks = math.ceil(self.full_catalogue['ngals_tot']/chunksize)
182
- for ichunk in range(nchunks):
183
- istart = ichunk*chunksize
184
- iend = (ichunk+1)*chunksize
185
-
186
- rai = np.load(os.path.join(self.data_loc, 'ra'+self.file_ext), mmap_mode='r')[istart:iend]
187
- deci = np.load(os.path.join(self.data_loc, 'dec'+self.file_ext), mmap_mode='r')[istart:iend]
188
-
189
- sep = angular_separation(self.search_ra, self.search_dec, rai, deci)
190
-
191
- min_sep_i = np.min(sep)
192
- if min_sep_i < min_sep:
193
- query_ind = np.argmin(sep)
194
- self.query_distance = sep[query_ind]
195
- self.query_ra = rai[query_ind]
196
- self.query_dec = deci[query_ind]
197
- self.query_ind = query_ind + istart
198
- min_sep = min_sep_i
199
-
200
-
201
- if self.query_distance > far_distance_npix*self.pixel_size:
202
- # notify of bad query
203
- st.write(('\nClosest galaxy in catalogue is quite far away from search point ({:.3f} degrees). Either this galaxy is not yet in our database, or is not in the DECaLS DR9 footprint. Using galaxy at (RA, Dec)=({:.4f}, {:.4f}) instead\n'.format(self.query_distance, self.query_ra, self.query_dec)))
204
-
205
- del sep
206
-
207
- def similarity_search(self, nnearest=5, min_angular_separation=96,
208
- similarity_inv=False, model_version='v1'):
209
- """
210
- Return indices and similarity scores to nearest nnearest data samples.
211
- First index returned is the query galaxy.
212
-
213
- Parameters
214
- ----------
215
- nnearest: int
216
- Number of most similar galaxies to return
217
- min_angular_separation: int
218
- Minimum angular seperation of galaxies in pixelsize. Anything below is thrown out
219
- similarity_inv: bool
220
- If True returns most similar, if False returns least similar
221
- """
222
-
223
- nnearest_intermediate = int(nnearest*1.5) # some may be thrown out due to angular seperation constraints, so oversample
224
- # Calculate similarity on the fly
225
- # self.similar_inds, self.similarity_score = calculate_similarity(self.representations, self.query_ind, nnearest=nnearest_intermediate, similarity_inv=similarity_inv)
226
-
227
- # Use precalculated values
228
- self.similar_inds, self.similarity_score = retrieve_similarity(self.query_ind,
229
- model_version=model_version)
230
-
231
- if similarity_inv:
232
- # append query to start of list, as it will no longer be most similar to itself
233
- self.similar_inds = np.insert(self.similar_inds, 0, self.query_ind)
234
- self.similarity_score = np.insert(self.similarity_score, 0, 1.)
235
-
236
- # now remove galaxies that are suspiciously close to each other on the sky
237
- # which happens when individual galaxies in a cluster are included as separate sources in the catalogue
238
- similarity_dict = self.load_from_catalogue_indices(include_extra_features=False,
239
- inds_load=self.similar_inds)
240
-
241
- # all vs all calculation
242
- sep = angular_separation(similarity_dict['ra'][np.newaxis, ...],
243
- similarity_dict['dec'][np.newaxis, ...],
244
- similarity_dict['ra'][..., np.newaxis],
245
- similarity_dict['dec'][..., np.newaxis])
246
-
247
- # compile indices of galaxies too close in angular coordinates
248
- germanShepherdDeleteIndices = set()
249
- for y in range(sep.shape[0]):
250
- inds_cuti = set(np.where(sep[y, (y+1):] < min_angular_separation*self.pixel_size)[0]+(i+1))
251
- germanShepherdDeleteIndices = germanShepherdDeleteIndices | inds_cuti
252
-
253
- germanShepherdDeleteIndices = sorted(germanShepherdDeleteIndices)
254
- self.similar_inds = np.delete(self.similar_inds, germanShepherdDeleteIndices)
255
- self.similarity_score = np.delete(self.similarity_score, germanShepherdDeleteIndices)
256
-
257
- self.similar_inds = self.similar_inds[:germanShepherdNumNearest]
258
- self.similarity_score = self.similarity_score[:germanShepherdNumNearest]