Spaces:
No application file
No application file
| # This code is part of the Biopython distribution and governed by its | |
| # license. Please see the LICENSE file that should have been included | |
| # as part of this package. | |
| # | |
| """Useful utilities for helping in parsing GenBank files.""" | |
| class FeatureValueCleaner: | |
| r"""Provide specialized capabilities for cleaning up values in features. | |
| This class is designed to provide a mechanism to clean up and process | |
| values in the key/value pairs of GenBank features. This is useful | |
| because in cases like:: | |
| /translation="MED | |
| YDPWNLRFQSKYKSRDA" | |
| you'll otherwise end up with white space in it. | |
| This cleaning needs to be done on a case by case basis since it is | |
| impossible to interpret whether you should be concatenating everything | |
| (as in translations), or combining things with spaces (as might be | |
| the case with /notes). | |
| >>> cleaner = FeatureValueCleaner(["translation"]) | |
| >>> cleaner | |
| FeatureValueCleaner(['translation']) | |
| >>> cleaner.clean_value("translation", "MED\nYDPWNLRFQSKYKSRDA") | |
| 'MEDYDPWNLRFQSKYKSRDA' | |
| """ | |
| keys_to_process = ["translation"] | |
| def __init__(self, to_process=keys_to_process): | |
| """Initialize with the keys we should deal with.""" | |
| self._to_process = to_process | |
| def __repr__(self): | |
| """Return a string representation of the class.""" | |
| return f"{self.__class__.__name__}({self._to_process!r})" | |
| def clean_value(self, key_name, value): | |
| """Clean the specified value and return it. | |
| If the value is not specified to be dealt with, the original value | |
| will be returned. | |
| """ | |
| if key_name in self._to_process: | |
| try: | |
| cleaner = getattr(self, f"_clean_{key_name}") | |
| except AttributeError: | |
| raise AssertionError(f"No function to clean key: {key_name}") from None | |
| value = cleaner(value) | |
| return value | |
| def _clean_translation(self, value): | |
| """Concatenate a translation value to one long protein string (PRIVATE).""" | |
| translation_parts = value.split() | |
| return "".join(translation_parts) | |
| if __name__ == "__main__": | |
| from Bio._utils import run_doctest | |
| run_doctest() | |