Update app.py
Browse files
app.py
CHANGED
|
@@ -20,9 +20,9 @@ huggingface_token = os.environ["huggingface_token"]
|
|
| 20 |
pipe = pipeline(model="mskov/whisper-small-esc50")
|
| 21 |
print(pipe)
|
| 22 |
processor = WhisperProcessor.from_pretrained("mskov/whisper-small-esc50")
|
| 23 |
-
dataset = load_dataset("
|
| 24 |
|
| 25 |
-
print(dataset, "and at 0[audio][array] ", dataset[0]["audio"]["array"], type(dataset[0]["audio"]["array"]), "and at audio : ", dataset[0]["audio"])
|
| 26 |
|
| 27 |
model = WhisperForConditionalGeneration.from_pretrained("mskov/whisper-small-esc50")
|
| 28 |
|
|
@@ -35,14 +35,14 @@ model = WhisperForConditionalGeneration.from_pretrained("mskov/whisper-small-esc
|
|
| 35 |
|
| 36 |
|
| 37 |
def map_to_pred(batch):
|
| 38 |
-
cleaned_transcription = re.sub(r'\[[^\]]+\]', '', batch['
|
| 39 |
print("cleaned transcript", cleaned_transcription)
|
| 40 |
-
cleaned_transcription = preprocess_transcription(batch['
|
| 41 |
normalized_transcription = processor.tokenizer._normalize(cleaned_transcription)
|
| 42 |
|
| 43 |
audio = batch["audio"]
|
| 44 |
input_features = processor(audio["array"], sampling_rate=audio["sampling_rate"], return_tensors="pt").input_features
|
| 45 |
-
batch["reference"] = processor.tokenizer._normalize(batch['
|
| 46 |
|
| 47 |
|
| 48 |
with torch.no_grad():
|
|
|
|
| 20 |
pipe = pipeline(model="mskov/whisper-small-esc50")
|
| 21 |
print(pipe)
|
| 22 |
processor = WhisperProcessor.from_pretrained("mskov/whisper-small-esc50")
|
| 23 |
+
dataset = load_dataset("ashraq/esc50", split="train").cast_column("audio", Audio(sampling_rate=16000))
|
| 24 |
|
| 25 |
+
# print(dataset, "and at 0[audio][array] ", dataset[0]["audio"]["array"], type(dataset[0]["audio"]["array"]), "and at audio : ", dataset[0]["audio"])
|
| 26 |
|
| 27 |
model = WhisperForConditionalGeneration.from_pretrained("mskov/whisper-small-esc50")
|
| 28 |
|
|
|
|
| 35 |
|
| 36 |
|
| 37 |
def map_to_pred(batch):
|
| 38 |
+
cleaned_transcription = re.sub(r'\[[^\]]+\]', '', batch['category']).strip()
|
| 39 |
print("cleaned transcript", cleaned_transcription)
|
| 40 |
+
cleaned_transcription = preprocess_transcription(batch['category'])
|
| 41 |
normalized_transcription = processor.tokenizer._normalize(cleaned_transcription)
|
| 42 |
|
| 43 |
audio = batch["audio"]
|
| 44 |
input_features = processor(audio["array"], sampling_rate=audio["sampling_rate"], return_tensors="pt").input_features
|
| 45 |
+
batch["reference"] = processor.tokenizer._normalize(batch['category'])
|
| 46 |
|
| 47 |
|
| 48 |
with torch.no_grad():
|