| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| import bisect |
| import re |
| from typing import Dict, List, Tuple |
|
|
| _line_start_re = re.compile(r'^', re.M) |
|
|
| class LineNumbers: |
| """ |
| Class to convert between character offsets in a text string, and pairs (line, column) of 1-based |
| line and 0-based column numbers, as used by tokens and AST nodes. |
| |
| This class expects unicode for input and stores positions in unicode. But it supports |
| translating to and from utf8 offsets, which are used by ast parsing. |
| """ |
| def __init__(self, text): |
| |
| |
| self._line_offsets = [m.start(0) for m in _line_start_re.finditer(text)] |
| self._text = text |
| self._text_len = len(text) |
| self._utf8_offset_cache = {} |
|
|
| def from_utf8_col(self, line, utf8_column): |
| |
| """ |
| Given a 1-based line number and 0-based utf8 column, returns a 0-based unicode column. |
| """ |
| offsets = self._utf8_offset_cache.get(line) |
| if offsets is None: |
| end_offset = self._line_offsets[line] if line < len(self._line_offsets) else self._text_len |
| line_text = self._text[self._line_offsets[line - 1] : end_offset] |
|
|
| offsets = [i for i,c in enumerate(line_text) for byte in c.encode('utf8')] |
| offsets.append(len(line_text)) |
| self._utf8_offset_cache[line] = offsets |
|
|
| return offsets[max(0, min(len(offsets)-1, utf8_column))] |
|
|
| def line_to_offset(self, line, column): |
| |
| """ |
| Converts 1-based line number and 0-based column to 0-based character offset into text. |
| """ |
| line -= 1 |
| if line >= len(self._line_offsets): |
| return self._text_len |
| elif line < 0: |
| return 0 |
| else: |
| return min(self._line_offsets[line] + max(0, column), self._text_len) |
|
|
| def offset_to_line(self, offset): |
| |
| """ |
| Converts 0-based character offset to pair (line, col) of 1-based line and 0-based column |
| numbers. |
| """ |
| offset = max(0, min(self._text_len, offset)) |
| line_index = bisect.bisect_right(self._line_offsets, offset) - 1 |
| return (line_index + 1, offset - self._line_offsets[line_index]) |
|
|
|
|
|
|