MaHaWo commited on
Commit
cf40c2c
·
1 Parent(s): b52f3c1

replace tensorflow based functionality with non tf stuff

Browse files
google_perch_tflite/model.py CHANGED
@@ -9,6 +9,7 @@ from iSparrow import ModelBase
9
 
10
  import numpy as np
11
  from pathlib import Path
 
12
 
13
  class Model(ModelBase):
14
  """
@@ -70,7 +71,7 @@ class Model(ModelBase):
70
 
71
  logits = self.model.get_tensor(self.output_layer_index)
72
 
73
- confidence = tf.nn.softmax(logits).numpy()
74
 
75
  return confidence
76
 
 
9
 
10
  import numpy as np
11
  from pathlib import Path
12
+ from scipy.special import softmax
13
 
14
  class Model(ModelBase):
15
  """
 
71
 
72
  logits = self.model.get_tensor(self.output_layer_index)
73
 
74
+ confidence = softmax(logits)
75
 
76
  return confidence
77
 
google_perch_tflite/preprocessor.py CHANGED
@@ -19,7 +19,6 @@ class Preprocessor(ppb.PreprocessorBase):
19
  )
20
 
21
  def process_audio_data(self, rawdata: np.array)->np.array:
22
- self.chunks = []
23
 
24
  # raise when sampling rate is unequal.
25
  if self.actual_sampling_rate != self.sample_rate:
@@ -27,13 +26,29 @@ class Preprocessor(ppb.PreprocessorBase):
27
  "Sampling rate is not the desired one. Desired sampling rate: {self.sample_rate}, actual sampling rate: {self.actual_sampling_rate}"
28
  )
29
 
30
- frame_length = int(self.sample_secs * self.sample_rate)
31
- step_length = int(self.sample_secs - self.overlap) * self.sample_rate
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
- self.chunks = tf_split_signal_into_chunks(
34
- rawdata, frame_length, step_length, pad_end=True
35
- ).numpy()
 
 
36
 
 
 
37
  print(
38
  "process audio data google: complete, read ",
39
  str(len(self.chunks)),
 
19
  )
20
 
21
  def process_audio_data(self, rawdata: np.array)->np.array:
 
22
 
23
  # raise when sampling rate is unequal.
24
  if self.actual_sampling_rate != self.sample_rate:
 
26
  "Sampling rate is not the desired one. Desired sampling rate: {self.sample_rate}, actual sampling rate: {self.actual_sampling_rate}"
27
  )
28
 
29
+ seconds = self.sample_secs
30
+ minlen = 1.5
31
+
32
+ self.chunks = []
33
+
34
+ for i in range(
35
+ 0, len(rawdata), int((seconds - self.overlap) * self.sample_rate)
36
+ ):
37
+
38
+ split = rawdata[i : (i + int(seconds * self.actual_sampling_rate))]
39
+
40
+ # End of signal?
41
+ if len(split) < int(minlen * self.actual_sampling_rate):
42
+ break
43
 
44
+ # Signal chunk too short? Fill with zeros.
45
+ if len(split) < int(self.actual_sampling_rate * seconds):
46
+ temp = np.zeros((int(self.actual_sampling_rate * seconds)))
47
+ temp[: len(split)] = split
48
+ split = temp
49
 
50
+ self.chunks.append(split)
51
+
52
  print(
53
  "process audio data google: complete, read ",
54
  str(len(self.chunks)),