Spaces:
Runtime error
Runtime error
| # Copyright (c) 2024 Microsoft Corporation. | |
| # Licensed under the MIT License | |
| import unittest | |
| import pandas as pd | |
| import pytest | |
| from graphrag.index.verbs.text.split import text_split_df | |
| class TestTextSplit(unittest.TestCase): | |
| def test_empty_string(self): | |
| input = pd.DataFrame([{"in": ""}]) | |
| result = text_split_df(input, "in", "out", ",").to_dict(orient="records") | |
| assert len(result) == 1 | |
| assert result[0]["out"] == [] | |
| def test_string_without_seperator(self): | |
| input = pd.DataFrame([{"in": "test_string_without_seperator"}]) | |
| result = text_split_df(input, "in", "out", ",").to_dict(orient="records") | |
| assert len(result) == 1 | |
| assert result[0]["out"] == ["test_string_without_seperator"] | |
| def test_string_with_seperator(self): | |
| input = pd.DataFrame([{"in": "test_1,test_2"}]) | |
| result = text_split_df(input, "in", "out", ",").to_dict(orient="records") | |
| assert len(result) == 1 | |
| assert result[0]["out"] == ["test_1", "test_2"] | |
| def test_row_with_list_as_column(self): | |
| input = pd.DataFrame([{"in": ["test_1", "test_2"]}]) | |
| result = text_split_df(input, "in", "out", ",").to_dict(orient="records") | |
| assert len(result) == 1 | |
| assert result[0]["out"] == ["test_1", "test_2"] | |
| def test_non_string_column_throws_error(self): | |
| input = pd.DataFrame([{"in": 5}]) | |
| with pytest.raises(TypeError): | |
| text_split_df(input, "in", "out", ",").to_dict(orient="records") | |
| def test_more_than_one_row_returns_correctly(self): | |
| input = pd.DataFrame([{"in": "row_1_1,row_1_2"}, {"in": "row_2_1,row_2_2"}]) | |
| result = text_split_df(input, "in", "out", ",").to_dict(orient="records") | |
| assert len(result) == 2 | |
| assert result[0]["out"] == ["row_1_1", "row_1_2"] | |
| assert result[1]["out"] == ["row_2_1", "row_2_2"] | |