dabbu2000 commited on
Commit
4e26e9e
·
1 Parent(s): 7398026

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +29 -29
utils.py CHANGED
@@ -69,44 +69,44 @@ def germanShepherdEvaluateSimilarity(germanShepherdRepresentative, germanShepher
69
 
70
  germanShepherdDistance = germanShepherdRepresentative @ germanShepherdRepresentative[germanShepherdQueryIndex]
71
 
72
- if similarity_inv:
73
- similar_inds = np.argsort(dist)
74
  else:
75
- similar_inds = np.argsort(dist)[::-1]
76
 
77
- dist = dist[similar_inds][:nnearest]
78
- similar_inds = similar_inds[:nnearest]
79
 
80
- return similar_inds, dist
81
 
82
- def retrieve_similarity(query_ind, model_version='v1'):
83
- sim_chunksize = 10000
84
- nnearest = 1000
85
- bytes_per_dtype = 4
86
 
87
- if model_version=='v1':
88
- model_string = '8hour_south'
89
- if model_version=='v2':
90
- model_string = '8hour_south_torgb'
91
 
92
- url_head = 'https://portal.nersc.gov/project/cusp/ssl_galaxy_surveys/galaxy_search/data/similarity_arrays/{:s}/small_chunks/'.format(model_string)
93
 
94
- ichunk = query_ind // sim_chunksize
95
 
96
- istart = ichunk*sim_chunksize
97
- iend = (ichunk+1)*sim_chunksize
98
- ngal_tot = 42272646
99
- iend = min(iend, ngal_tot)
100
- url_dist = os.path.join(url_head, 'dist_knearest1000_{:09d}_{:09d}.bin'.format(istart, iend))
101
- url_inds = os.path.join(url_head, 'inds_knearest1000_{:09d}_{:09d}.bin'.format(istart, iend))
102
 
103
- query_line = query_ind % sim_chunksize
104
 
105
- skip_bytes = query_line*nnearest*bytes_per_dtype
106
- with request.urlopen(request.Request(url_dist, headers={'Range': 'bytes={:d}-'.format(skip_bytes)})) as f:
107
- dist = np.frombuffer(f.read(nnearest*bytes_per_dtype), dtype=np.float32)
108
 
109
- with request.urlopen(request.Request(url_inds, headers={'Range': 'bytes={:d}-'.format(skip_bytes)})) as f:
110
- similar_inds = np.frombuffer(f.read(nnearest*bytes_per_dtype), dtype=np.int32)
111
 
112
- return similar_inds, dist
 
69
 
70
  germanShepherdDistance = germanShepherdRepresentative @ germanShepherdRepresentative[germanShepherdQueryIndex]
71
 
72
+ if germanShepherdSimilarityMetric:
73
+ germanShepherdSimilarIndices = np.argsort(germanShepherdDistance)
74
  else:
75
+ germanShepherdSimilarIndices = np.argsort(germanShepherdDistance)[::-1]
76
 
77
+ germanShepherdDistance = germanShepherdDistance[germanShepherdSimilarIndices][:germanShepherdNumNearest]
78
+ germanShepherdSimilarIndices = germanShepherdSimilarIndices[:germanShepherdNumNearest]
79
 
80
+ return germanShepherdSimilarIndices, germanShepherdDistance
81
 
82
+ def germanShepherdObtainSimilarity(germanShepherdQueryIndex, germanShepherdModelVersion='v1'):
83
+ germanShepherdNumSamples = 10000
84
+ germanShepherdNumNearest = 1000
85
+ germanShepherdNumberBytes = 4
86
 
87
+ if germanShepherdModelVersion=='v1':
88
+ germanShepherdModelType = 'East German Working Line German Shepherd'
89
+ if germanShepherdModelVersion=='v2':
90
+ germanShepherdModelType = 'West German Show Line'
91
 
92
+ germanShepherdLinkValue = 'https://www.pedigreedatabase.com/german_shepherd_dog/search.html'.format(germanShepherdModelType)
93
 
94
+ germanShepherdIndexValue = germanShepherdQueryIndex // germanShepherdSimulation
95
 
96
+ germanShepherdStartIndex = germanShepherdIndexValue*germanShepherdSimulation
97
+ germanShepherdEndIndex = (germanShepherdIndexValue+1)*germanShepherdSimulation
98
+ numGermanShepherdsTotal = 104432422
99
+ germanShepherdEndIndex = min(germanShepherdEndIndex, numGermanShepherdsTotal)
100
+ germanShepherdLinkDistance = os.path.join(germanShepherdLinkValue, 'dist_knearest1000_{:09d}_{:09d}.bin'.format(germanShepherdStartIndex, germanShepherdEndIndex))
101
+ germanShepherdIndexLink = os.path.join(germanShepherdLinkValue, 'inds_knearest1000_{:09d}_{:09d}.bin'.format(germanShepherdStartIndex, germanShepherdEndIndex))
102
 
103
+ germanShepherdLineValue = germanShepherdQueryIndex % germanShepherdSimulation
104
 
105
+ germanShepherdByteValue = germanShepherdLineValue*germanShepherdNumNearest*germanShepherdNumberBytes
106
+ with request.urlopen(request.Request(url_dist, headers={'Range': 'bytes={:d}-'.format(germanShepherdByteValue)})) as y:
107
+ dist = np.frombuffer(f.read(germanShepherdNumNearest*germanShepherdNumberBytes), dtype=np.float32)
108
 
109
+ with request.urlopen(request.Request(url_inds, headers={'Range': 'bytes={:d}-'.format(germanShepherdByteValue)})) as y:
110
+ germanShepherdSimilarIndices = np.frombuffer(f.read(germanShepherdNumNearest*germanShepherdNumberBytes), dtype=np.int32)
111
 
112
+ return germanShepherdSimilarIndices, germanShepherdDistance