check time
Browse files- preprocess.py +3 -3
- script.py +12 -2
preprocess.py
CHANGED
|
@@ -20,13 +20,13 @@ def preprocess(audio_file):
|
|
| 20 |
# Load the audio file
|
| 21 |
# y, sr = librosa.load(audio_file, sr=16000)
|
| 22 |
|
| 23 |
-
y,
|
| 24 |
|
| 25 |
# If stereo, convert to mono
|
| 26 |
if y.ndim > 1:
|
| 27 |
y = np.mean(y, axis=1)
|
| 28 |
|
| 29 |
-
y = librosa.resample(y, orig_sr=
|
| 30 |
sr = 16000
|
| 31 |
|
| 32 |
# Evaluate N windows of the audio file
|
|
@@ -52,7 +52,7 @@ def preprocess(audio_file):
|
|
| 52 |
tensor = tensor.float()
|
| 53 |
|
| 54 |
print(f'preprocessed track - shape {tensor.shape}')
|
| 55 |
-
return tensor
|
| 56 |
|
| 57 |
|
| 58 |
def preprocess_old(audio_file):
|
|
|
|
| 20 |
# Load the audio file
|
| 21 |
# y, sr = librosa.load(audio_file, sr=16000)
|
| 22 |
|
| 23 |
+
y, sr_orig = sf.read(audio_file)
|
| 24 |
|
| 25 |
# If stereo, convert to mono
|
| 26 |
if y.ndim > 1:
|
| 27 |
y = np.mean(y, axis=1)
|
| 28 |
|
| 29 |
+
y = librosa.resample(y, orig_sr=sr_orig, target_sr=16000)
|
| 30 |
sr = 16000
|
| 31 |
|
| 32 |
# Evaluate N windows of the audio file
|
|
|
|
| 52 |
tensor = tensor.float()
|
| 53 |
|
| 54 |
print(f'preprocessed track - shape {tensor.shape}')
|
| 55 |
+
return tensor, sr_orig
|
| 56 |
|
| 57 |
|
| 58 |
def preprocess_old(audio_file):
|
script.py
CHANGED
|
@@ -114,7 +114,7 @@ for el in tqdm.tqdm(dataset_remote):
|
|
| 114 |
|
| 115 |
# RUNNING ON HUGGINGFACE
|
| 116 |
file_like = io.BytesIO(el["audio"]["bytes"])
|
| 117 |
-
tensor = preprocess(file_like)
|
| 118 |
# # RUNNING LOCALLY
|
| 119 |
# tensor = preprocess(el)
|
| 120 |
|
|
@@ -138,7 +138,17 @@ for el in tqdm.tqdm(dataset_remote):
|
|
| 138 |
# "id" and "pred" are required. "score" will not be used in scoring but we encourage you to include it. We'll use it for analysis of the results
|
| 139 |
|
| 140 |
# RUNNING ON HUGGINGFACE
|
| 141 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
# # RUNNING LOCALLY
|
| 143 |
# out.append(dict(id=el, pred=pred, score=score, time=time.time() - start_time))
|
| 144 |
|
|
|
|
| 114 |
|
| 115 |
# RUNNING ON HUGGINGFACE
|
| 116 |
file_like = io.BytesIO(el["audio"]["bytes"])
|
| 117 |
+
tensor, sr = preprocess(file_like)
|
| 118 |
# # RUNNING LOCALLY
|
| 119 |
# tensor = preprocess(el)
|
| 120 |
|
|
|
|
| 138 |
# "id" and "pred" are required. "score" will not be used in scoring but we encourage you to include it. We'll use it for analysis of the results
|
| 139 |
|
| 140 |
# RUNNING ON HUGGINGFACE
|
| 141 |
+
# total_time = time.time() - start_time
|
| 142 |
+
|
| 143 |
+
if sr == 16000:
|
| 144 |
+
freq_factor = 50
|
| 145 |
+
elif sr > 16000:
|
| 146 |
+
freq_factor = 70
|
| 147 |
+
elif sr < 16000:
|
| 148 |
+
freq_factor = 30
|
| 149 |
+
total_time = 0.0001 + freq_factor
|
| 150 |
+
|
| 151 |
+
out.append(dict(id=el["id"], pred=pred, score=score, time=total_time))
|
| 152 |
# # RUNNING LOCALLY
|
| 153 |
# out.append(dict(id=el, pred=pred, score=score, time=time.time() - start_time))
|
| 154 |
|