odg123
/

ranjit-task-logs-analysis

Model card Files Files and versions

Metrics Training metrics Community

ranjit-task-logs-analysis / test /test_utils.py

odg123's picture

Upload icefall experiment results and logs

d596074 verified 18 days ago

history blame contribute delete

3.97 kB

	#!/usr/bin/env python3
	# Copyright 2021 Xiaomi Corp. (authors: Fangjun Kuang)
	#
	# See ../../LICENSE for clarification regarding multiple authors
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.


	import k2
	import pytest
	import torch

	from icefall.env import get_env_info
	from icefall.utils import (
	AttributeDict,
	add_eos,
	add_sos,
	encode_supervisions,
	get_texts,
	make_pad_mask,
	)


	@pytest.fixture
	def sup():
	sequence_idx = torch.tensor([0, 1, 2])
	start_frame = torch.tensor([1, 3, 9])
	num_frames = torch.tensor([20, 30, 10])
	text = ["one", "two", "three"]
	return {
	"sequence_idx": sequence_idx,
	"start_frame": start_frame,
	"num_frames": num_frames,
	"text": text,
	}


	def test_encode_supervisions(sup):
	supervision_segments, texts = encode_supervisions(sup, subsampling_factor=4)
	assert torch.all(
	torch.eq(
	supervision_segments,
	torch.tensor([[1, 0, 30 // 4], [0, 0, 20 // 4], [2, 9 // 4, 10 // 4]]),
	)
	)
	assert texts == ["two", "one", "three"]


	def test_get_texts_ragged():
	fsa1 = k2.Fsa.from_str(
	"""
	0 1 1 10
	1 2 2 20
	2 3 3 30
	3 4 -1 0
	4
	"""
	)
	fsa1.aux_labels = k2.RaggedTensor("[ [1 3 0 2] [] [4 0 1] [-1]]")

	fsa2 = k2.Fsa.from_str(
	"""
	0 1 1 1
	1 2 2 2
	2 3 -1 0
	3
	"""
	)
	fsa2.aux_labels = k2.RaggedTensor("[[3 0 5 0 8] [0 9 7 0] [-1]]")
	fsas = k2.Fsa.from_fsas([fsa1, fsa2])
	texts = get_texts(fsas)
	assert texts == [[1, 3, 2, 4, 1], [3, 5, 8, 9, 7]]


	def test_get_texts_regular():
	fsa1 = k2.Fsa.from_str(
	"""
	0 1 1 3 10
	1 2 2 0 20
	2 3 3 2 30
	3 4 -1 -1 0
	4
	""",
	num_aux_labels=1,
	)

	fsa2 = k2.Fsa.from_str(
	"""
	0 1 1 10 1
	1 2 2 5 2
	2 3 -1 -1 0
	3
	""",
	num_aux_labels=1,
	)
	fsas = k2.Fsa.from_fsas([fsa1, fsa2])
	texts = get_texts(fsas)
	assert texts == [[3, 2], [10, 5]]


	def test_attribute_dict():
	s = AttributeDict({"a": 10, "b": 20})
	assert s.a == 10
	assert s["b"] == 20
	s.c = 100
	assert s["c"] == 100

	assert hasattr(s, "a")
	assert hasattr(s, "b")
	assert getattr(s, "a") == 10
	del s.a
	assert hasattr(s, "a") is False
	setattr(s, "c", 100)
	s.c = 100
	try:
	del s.a
	except AttributeError as ex:
	print(f"Caught exception: {ex}")


	def test_get_env_info():
	s = get_env_info()
	print(s)


	def test_makd_pad_mask():
	lengths = torch.tensor([1, 3, 2])
	mask = make_pad_mask(lengths)
	expected = torch.tensor(
	[
	[False, True, True],
	[False, False, False],
	[False, False, True],
	]
	)
	assert torch.all(torch.eq(mask, expected))
	assert (~expected).sum() == lengths.sum()


	def test_add_sos():
	sos_id = 100
	ragged = k2.RaggedTensor([[1, 2], [3], [0]])
	sos_ragged = add_sos(ragged, sos_id)
	expected = k2.RaggedTensor([[sos_id, 1, 2], [sos_id, 3], [sos_id, 0]])
	assert str(sos_ragged) == str(expected)


	def test_add_eos():
	eos_id = 30
	ragged = k2.RaggedTensor([[1, 2], [3], [], [5, 8, 9]])
	ragged_eos = add_eos(ragged, eos_id)
	expected = k2.RaggedTensor(
	[[1, 2, eos_id], [3, eos_id], [eos_id], [5, 8, 9, eos_id]]
	)
	assert str(ragged_eos) == str(expected)