InfiAgent / tests /src /utils /test_string_utils.py
g3eIL's picture
Upload 80 files
77320e4 verified
import unittest
from src.utils import (
is_image_link,
extract_urls,
extract_and_replace_url,
replace_latex_format
)
class TestStringUtils(unittest.TestCase):
def setUp(self):
self.image_links = [
'http://tosv.byted.org/obj/codegraph/sandbox/temp/ws987dcaa4dba3400b91707f6825146f4e/1696533095597078758.png'
]
self.raw_inputs = [
'Ran code\nSTDOUT:\n0.04886713118710795',
'Ran code\nSTDOUT:\n0.04886713118710795\nGenerated an image: http://tosv.byted.org/obj/codegraph/sandbox/temp/ws987dcaa4dba3400b91707f6825146f4e/1696533095597078758.png',
'Ran code\nSTDOUT:\n0.04886713118710795\n Add here on purpose Generated an image: http://tosv.byted.org/obj/codegraph/sandbox/temp/ws987dcaa4dba3400b91707f6825146f4e/1696533095597078758.png',
'Ran code\nSTDOUT:\n0.04886713118710795\nGenerated an image: http://tosv.byted.org/obj/codegraph/sandbox/temp/ws987dcaa4dba3400b91707f6825146f4e/1696533095597078758.png\nGenerated files on server: http://tosv.byted.org/obj/codegraph/sandbox/fake.csv',
]
self.format_outputs = [
'```python\nRan code\nSTDOUT:\n0.04886713118710795```\n',
'```python\nRan code\nSTDOUT:\n0.04886713118710795\n```\n![The Image](http://tosv.byted.org/obj/codegraph/sandbox/temp/ws987dcaa4dba3400b91707f6825146f4e/1696533095597078758.png)',
'```python\nRan code\nSTDOUT:\n0.04886713118710795\n Add here on purpose ```\n![The Image](http://tosv.byted.org/obj/codegraph/sandbox/temp/ws987dcaa4dba3400b91707f6825146f4e/1696533095597078758.png)',
'```python\nRan code\nSTDOUT:\n0.04886713118710795\n\n```\n![The Image](http://tosv.byted.org/obj/codegraph/sandbox/temp/ws987dcaa4dba3400b91707f6825146f4e/1696533095597078758.png)\n[fake.csv](http://tosv.byted.org/obj/codegraph/sandbox/fake.csv)',
]
self.extracted_urls = [
[],
['http://tosv.byted.org/obj/codegraph/sandbox/temp/ws987dcaa4dba3400b91707f6825146f4e/1696533095597078758.png'],
['http://tosv.byted.org/obj/codegraph/sandbox/temp/ws987dcaa4dba3400b91707f6825146f4e/1696533095597078758.png'],
['http://tosv.byted.org/obj/codegraph/sandbox/temp/ws987dcaa4dba3400b91707f6825146f4e/1696533095597078758.png', 'http://tosv.byted.org/obj/codegraph/sandbox/fake.csv']
]
self.raw_latex_input = [
'for people who earn $10,000 or $100,000 in each country:\n\n\\[\n\\begin{array}{|c|c|}\n\\hline\n\\textbf{native-country} & \\textbf{hours-per-week} \\\\\n\\hline\n?\\ & 45.55 \\\\\n Cambodia & 40.00 \\\\\n Canada & 45.64 \\\\\n... &... \\\\\n United-States & 45.51 \\\\\n Vietnam & 39.20 \\\\\n Yugoslavia & 49.50 \\\\\n\\hline\n\\end{array}\n\\]\n\n(Note: The country labeled "?" is likely a placeholder for countries with missing or unknown native-country values.)\n\nPlease note that this dataset contains non-binary values for the native-country column.'
]
self.raw_latex_output = [
'for people who earn $10,000 or $100,000 in each country:\n\n$$\n\\begin{array}{|c|c|}\n\\hline\n\\textbf{native-country} & \\textbf{hours-per-week} \\\\\n\\hline\n?\\ & 45.55 \\\\\n Cambodia & 40.00 \\\\\n Canada & 45.64 \\\\\n... &... \\\\\n United-States & 45.51 \\\\\n Vietnam & 39.20 \\\\\n Yugoslavia & 49.50 \\\\\n\\hline\n\\end{array}\n$$\n\n(Note: The country labeled "?" is likely a placeholder for countries with missing or unknown native-country values.)\n\nPlease note that this dataset contains non-binary values for the native-country column.'
]
def test_is_image_link(self):
for link in self.image_links:
assert is_image_link(link) == True
def test_extract_urls(self):
for input_text, expected_matched_urls in zip(self.raw_inputs, self.extracted_urls):
matched_urls = extract_urls(input_text)
assert len(matched_urls) == len(expected_matched_urls)
for url, expect_url in zip(matched_urls, expected_matched_urls):
assert url == expect_url
def test_extract_and_replace(self):
for input_text, expected_output_text in zip(self.raw_inputs, self.format_outputs):
real_output = extract_and_replace_url(input_text)
if expected_output_text != real_output:
print(f'real_output: {real_output}\nexpected_output: {expected_output_text}')
assert expected_output_text == real_output
def test_replace_latex_format(self):
for input_text, expected_output_text in zip(self.raw_latex_input, self.raw_latex_output):
real_output = replace_latex_format(input_text)
assert expected_output_text == real_output
if __name__ == '__main__':
unittest.main()