Buckets:
| # | |
| # Copyright (C) 2009-2020 the sqlparse authors and contributors | |
| # <see AUTHORS file> | |
| # | |
| # This module is part of python-sqlparse and is released under | |
| # the BSD License: https://opensource.org/licenses/BSD-3-Clause | |
| """SQL Lexer""" | |
| import re | |
| from threading import Lock | |
| # This code is based on the SqlLexer in pygments. | |
| # http://pygments.org/ | |
| # It's separated from the rest of pygments to increase performance | |
| # and to allow some customizations. | |
| from io import TextIOBase | |
| from sqlparse import tokens, keywords | |
| from sqlparse.utils import consume | |
| class Lexer: | |
| """The Lexer supports configurable syntax. | |
| To add support for additional keywords, use the `add_keywords` method.""" | |
| _default_instance = None | |
| _lock = Lock() | |
| # Development notes: | |
| # - This class is prepared to be able to support additional SQL dialects | |
| # in the future by adding additional functions that take the place of | |
| # the function default_initialization(). | |
| # - The lexer class uses an explicit singleton behavior with the | |
| # instance-getter method get_default_instance(). This mechanism has | |
| # the advantage that the call signature of the entry-points to the | |
| # sqlparse library are not affected. Also, usage of sqlparse in third | |
| # party code does not need to be adapted. On the other hand, the current | |
| # implementation does not easily allow for multiple SQL dialects to be | |
| # parsed in the same process. | |
| # Such behavior can be supported in the future by passing a | |
| # suitably initialized lexer object as an additional parameter to the | |
| # entry-point functions (such as `parse`). Code will need to be written | |
| # to pass down and utilize such an object. The current implementation | |
| # is prepared to support this thread safe approach without the | |
| # default_instance part needing to change interface. | |
| def get_default_instance(cls): | |
| """Returns the lexer instance used internally | |
| by the sqlparse core functions.""" | |
| with cls._lock: | |
| if cls._default_instance is None: | |
| cls._default_instance = cls() | |
| cls._default_instance.default_initialization() | |
| return cls._default_instance | |
| def default_initialization(self): | |
| """Initialize the lexer with default dictionaries. | |
| Useful if you need to revert custom syntax settings.""" | |
| self.clear() | |
| self.set_SQL_REGEX(keywords.SQL_REGEX) | |
| self.add_keywords(keywords.KEYWORDS_COMMON) | |
| self.add_keywords(keywords.KEYWORDS_ORACLE) | |
| self.add_keywords(keywords.KEYWORDS_MYSQL) | |
| self.add_keywords(keywords.KEYWORDS_PLPGSQL) | |
| self.add_keywords(keywords.KEYWORDS_HQL) | |
| self.add_keywords(keywords.KEYWORDS_MSACCESS) | |
| self.add_keywords(keywords.KEYWORDS_SNOWFLAKE) | |
| self.add_keywords(keywords.KEYWORDS_BIGQUERY) | |
| self.add_keywords(keywords.KEYWORDS) | |
| def clear(self): | |
| """Clear all syntax configurations. | |
| Useful if you want to load a reduced set of syntax configurations. | |
| After this call, regexps and keyword dictionaries need to be loaded | |
| to make the lexer functional again.""" | |
| self._SQL_REGEX = [] | |
| self._keywords = [] | |
| def set_SQL_REGEX(self, SQL_REGEX): | |
| """Set the list of regex that will parse the SQL.""" | |
| FLAGS = re.IGNORECASE | re.UNICODE | |
| self._SQL_REGEX = [ | |
| (re.compile(rx, FLAGS).match, tt) | |
| for rx, tt in SQL_REGEX | |
| ] | |
| def add_keywords(self, keywords): | |
| """Add keyword dictionaries. Keywords are looked up in the same order | |
| that dictionaries were added.""" | |
| self._keywords.append(keywords) | |
| def is_keyword(self, value): | |
| """Checks for a keyword. | |
| If the given value is in one of the KEYWORDS_* dictionary | |
| it's considered a keyword. Otherwise, tokens.Name is returned. | |
| """ | |
| val = value.upper() | |
| for kwdict in self._keywords: | |
| if val in kwdict: | |
| return kwdict[val], value | |
| else: | |
| return tokens.Name, value | |
| def get_tokens(self, text, encoding=None): | |
| """ | |
| Return an iterable of (tokentype, value) pairs generated from | |
| `text`. If `unfiltered` is set to `True`, the filtering mechanism | |
| is bypassed even if filters are defined. | |
| Also preprocess the text, i.e. expand tabs and strip it if | |
| wanted and applies registered filters. | |
| Split ``text`` into (tokentype, text) pairs. | |
| ``stack`` is the initial stack (default: ``['root']``) | |
| """ | |
| if isinstance(text, TextIOBase): | |
| text = text.read() | |
| if isinstance(text, str): | |
| pass | |
| elif isinstance(text, bytes): | |
| if encoding: | |
| text = text.decode(encoding) | |
| else: | |
| try: | |
| text = text.decode('utf-8') | |
| except UnicodeDecodeError: | |
| text = text.decode('unicode-escape') | |
| else: | |
| raise TypeError("Expected text or file-like object, got {!r}". | |
| format(type(text))) | |
| iterable = enumerate(text) | |
| for pos, char in iterable: | |
| for rexmatch, action in self._SQL_REGEX: | |
| m = rexmatch(text, pos) | |
| if not m: | |
| continue | |
| elif isinstance(action, tokens._TokenType): | |
| yield action, m.group() | |
| elif action is keywords.PROCESS_AS_KEYWORD: | |
| yield self.is_keyword(m.group()) | |
| consume(iterable, m.end() - pos - 1) | |
| break | |
| else: | |
| yield tokens.Error, char | |
| def tokenize(sql, encoding=None): | |
| """Tokenize sql. | |
| Tokenize *sql* using the :class:`Lexer` and return a 2-tuple stream | |
| of ``(token type, value)`` items. | |
| """ | |
| return Lexer.get_default_instance().get_tokens(sql, encoding) | |
Xet Storage Details
- Size:
- 5.99 kB
- Xet hash:
- 53088561c2ced3da649f9943a872062c6a3c7d130278528c4eb70cbfb6bab403
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.