diff --git a/.venv/lib/python3.11/site-packages/distro/__init__.py b/.venv/lib/python3.11/site-packages/distro/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7686fe85a7cc94188da76bfb1c10ad2a10821256 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/distro/__init__.py @@ -0,0 +1,54 @@ +from .distro import ( + NORMALIZED_DISTRO_ID, + NORMALIZED_LSB_ID, + NORMALIZED_OS_ID, + LinuxDistribution, + __version__, + build_number, + codename, + distro_release_attr, + distro_release_info, + id, + info, + like, + linux_distribution, + lsb_release_attr, + lsb_release_info, + major_version, + minor_version, + name, + os_release_attr, + os_release_info, + uname_attr, + uname_info, + version, + version_parts, +) + +__all__ = [ + "NORMALIZED_DISTRO_ID", + "NORMALIZED_LSB_ID", + "NORMALIZED_OS_ID", + "LinuxDistribution", + "build_number", + "codename", + "distro_release_attr", + "distro_release_info", + "id", + "info", + "like", + "linux_distribution", + "lsb_release_attr", + "lsb_release_info", + "major_version", + "minor_version", + "name", + "os_release_attr", + "os_release_info", + "uname_attr", + "uname_info", + "version", + "version_parts", +] + +__version__ = __version__ diff --git a/.venv/lib/python3.11/site-packages/distro/__main__.py b/.venv/lib/python3.11/site-packages/distro/__main__.py new file mode 100644 index 0000000000000000000000000000000000000000..0c01d5b08b6b44379b931d54d7fcf5221fdc9fde --- /dev/null +++ b/.venv/lib/python3.11/site-packages/distro/__main__.py @@ -0,0 +1,4 @@ +from .distro import main + +if __name__ == "__main__": + main() diff --git a/.venv/lib/python3.11/site-packages/distro/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/distro/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ab6b0a54035235c94332c49cab59ac83f05db790 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/distro/__pycache__/__init__.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/distro/__pycache__/__main__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/distro/__pycache__/__main__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3c6227d94f723a900bfa1fc7ad3f380cc30a61ed Binary files /dev/null and b/.venv/lib/python3.11/site-packages/distro/__pycache__/__main__.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/distro/__pycache__/distro.cpython-311.pyc b/.venv/lib/python3.11/site-packages/distro/__pycache__/distro.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..46bb1e00bb3d55b876ed7d3c6bcbe7dcbbe2c5b7 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/distro/__pycache__/distro.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/distro/distro.py b/.venv/lib/python3.11/site-packages/distro/distro.py new file mode 100644 index 0000000000000000000000000000000000000000..78ccdfa402ac29a8ef8aaddf7b527a0efb568d43 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/distro/distro.py @@ -0,0 +1,1403 @@ +#!/usr/bin/env python +# Copyright 2015-2021 Nir Cohen +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +The ``distro`` package (``distro`` stands for Linux Distribution) provides +information about the Linux distribution it runs on, such as a reliable +machine-readable distro ID, or version information. + +It is the recommended replacement for Python's original +:py:func:`platform.linux_distribution` function, but it provides much more +functionality. An alternative implementation became necessary because Python +3.5 deprecated this function, and Python 3.8 removed it altogether. Its +predecessor function :py:func:`platform.dist` was already deprecated since +Python 2.6 and removed in Python 3.8. Still, there are many cases in which +access to OS distribution information is needed. See `Python issue 1322 +`_ for more information. +""" + +import argparse +import json +import logging +import os +import re +import shlex +import subprocess +import sys +import warnings +from typing import ( + Any, + Callable, + Dict, + Iterable, + Optional, + Sequence, + TextIO, + Tuple, + Type, +) + +try: + from typing import TypedDict +except ImportError: + # Python 3.7 + TypedDict = dict + +__version__ = "1.9.0" + + +class VersionDict(TypedDict): + major: str + minor: str + build_number: str + + +class InfoDict(TypedDict): + id: str + version: str + version_parts: VersionDict + like: str + codename: str + + +_UNIXCONFDIR = os.environ.get("UNIXCONFDIR", "/etc") +_UNIXUSRLIBDIR = os.environ.get("UNIXUSRLIBDIR", "/usr/lib") +_OS_RELEASE_BASENAME = "os-release" + +#: Translation table for normalizing the "ID" attribute defined in os-release +#: files, for use by the :func:`distro.id` method. +#: +#: * Key: Value as defined in the os-release file, translated to lower case, +#: with blanks translated to underscores. +#: +#: * Value: Normalized value. +NORMALIZED_OS_ID = { + "ol": "oracle", # Oracle Linux + "opensuse-leap": "opensuse", # Newer versions of OpenSuSE report as opensuse-leap +} + +#: Translation table for normalizing the "Distributor ID" attribute returned by +#: the lsb_release command, for use by the :func:`distro.id` method. +#: +#: * Key: Value as returned by the lsb_release command, translated to lower +#: case, with blanks translated to underscores. +#: +#: * Value: Normalized value. +NORMALIZED_LSB_ID = { + "enterpriseenterpriseas": "oracle", # Oracle Enterprise Linux 4 + "enterpriseenterpriseserver": "oracle", # Oracle Linux 5 + "redhatenterpriseworkstation": "rhel", # RHEL 6, 7 Workstation + "redhatenterpriseserver": "rhel", # RHEL 6, 7 Server + "redhatenterprisecomputenode": "rhel", # RHEL 6 ComputeNode +} + +#: Translation table for normalizing the distro ID derived from the file name +#: of distro release files, for use by the :func:`distro.id` method. +#: +#: * Key: Value as derived from the file name of a distro release file, +#: translated to lower case, with blanks translated to underscores. +#: +#: * Value: Normalized value. +NORMALIZED_DISTRO_ID = { + "redhat": "rhel", # RHEL 6.x, 7.x +} + +# Pattern for content of distro release file (reversed) +_DISTRO_RELEASE_CONTENT_REVERSED_PATTERN = re.compile( + r"(?:[^)]*\)(.*)\()? *(?:STL )?([\d.+\-a-z]*\d) *(?:esaeler *)?(.+)" +) + +# Pattern for base file name of distro release file +_DISTRO_RELEASE_BASENAME_PATTERN = re.compile(r"(\w+)[-_](release|version)$") + +# Base file names to be looked up for if _UNIXCONFDIR is not readable. +_DISTRO_RELEASE_BASENAMES = [ + "SuSE-release", + "altlinux-release", + "arch-release", + "base-release", + "centos-release", + "fedora-release", + "gentoo-release", + "mageia-release", + "mandrake-release", + "mandriva-release", + "mandrivalinux-release", + "manjaro-release", + "oracle-release", + "redhat-release", + "rocky-release", + "sl-release", + "slackware-version", +] + +# Base file names to be ignored when searching for distro release file +_DISTRO_RELEASE_IGNORE_BASENAMES = ( + "debian_version", + "lsb-release", + "oem-release", + _OS_RELEASE_BASENAME, + "system-release", + "plesk-release", + "iredmail-release", + "board-release", + "ec2_version", +) + + +def linux_distribution(full_distribution_name: bool = True) -> Tuple[str, str, str]: + """ + .. deprecated:: 1.6.0 + + :func:`distro.linux_distribution()` is deprecated. It should only be + used as a compatibility shim with Python's + :py:func:`platform.linux_distribution()`. Please use :func:`distro.id`, + :func:`distro.version` and :func:`distro.name` instead. + + Return information about the current OS distribution as a tuple + ``(id_name, version, codename)`` with items as follows: + + * ``id_name``: If *full_distribution_name* is false, the result of + :func:`distro.id`. Otherwise, the result of :func:`distro.name`. + + * ``version``: The result of :func:`distro.version`. + + * ``codename``: The extra item (usually in parentheses) after the + os-release version number, or the result of :func:`distro.codename`. + + The interface of this function is compatible with the original + :py:func:`platform.linux_distribution` function, supporting a subset of + its parameters. + + The data it returns may not exactly be the same, because it uses more data + sources than the original function, and that may lead to different data if + the OS distribution is not consistent across multiple data sources it + provides (there are indeed such distributions ...). + + Another reason for differences is the fact that the :func:`distro.id` + method normalizes the distro ID string to a reliable machine-readable value + for a number of popular OS distributions. + """ + warnings.warn( + "distro.linux_distribution() is deprecated. It should only be used as a " + "compatibility shim with Python's platform.linux_distribution(). Please use " + "distro.id(), distro.version() and distro.name() instead.", + DeprecationWarning, + stacklevel=2, + ) + return _distro.linux_distribution(full_distribution_name) + + +def id() -> str: + """ + Return the distro ID of the current distribution, as a + machine-readable string. + + For a number of OS distributions, the returned distro ID value is + *reliable*, in the sense that it is documented and that it does not change + across releases of the distribution. + + This package maintains the following reliable distro ID values: + + ============== ========================================= + Distro ID Distribution + ============== ========================================= + "ubuntu" Ubuntu + "debian" Debian + "rhel" RedHat Enterprise Linux + "centos" CentOS + "fedora" Fedora + "sles" SUSE Linux Enterprise Server + "opensuse" openSUSE + "amzn" Amazon Linux + "arch" Arch Linux + "buildroot" Buildroot + "cloudlinux" CloudLinux OS + "exherbo" Exherbo Linux + "gentoo" GenToo Linux + "ibm_powerkvm" IBM PowerKVM + "kvmibm" KVM for IBM z Systems + "linuxmint" Linux Mint + "mageia" Mageia + "mandriva" Mandriva Linux + "parallels" Parallels + "pidora" Pidora + "raspbian" Raspbian + "oracle" Oracle Linux (and Oracle Enterprise Linux) + "scientific" Scientific Linux + "slackware" Slackware + "xenserver" XenServer + "openbsd" OpenBSD + "netbsd" NetBSD + "freebsd" FreeBSD + "midnightbsd" MidnightBSD + "rocky" Rocky Linux + "aix" AIX + "guix" Guix System + "altlinux" ALT Linux + ============== ========================================= + + If you have a need to get distros for reliable IDs added into this set, + or if you find that the :func:`distro.id` function returns a different + distro ID for one of the listed distros, please create an issue in the + `distro issue tracker`_. + + **Lookup hierarchy and transformations:** + + First, the ID is obtained from the following sources, in the specified + order. The first available and non-empty value is used: + + * the value of the "ID" attribute of the os-release file, + + * the value of the "Distributor ID" attribute returned by the lsb_release + command, + + * the first part of the file name of the distro release file, + + The so determined ID value then passes the following transformations, + before it is returned by this method: + + * it is translated to lower case, + + * blanks (which should not be there anyway) are translated to underscores, + + * a normalization of the ID is performed, based upon + `normalization tables`_. The purpose of this normalization is to ensure + that the ID is as reliable as possible, even across incompatible changes + in the OS distributions. A common reason for an incompatible change is + the addition of an os-release file, or the addition of the lsb_release + command, with ID values that differ from what was previously determined + from the distro release file name. + """ + return _distro.id() + + +def name(pretty: bool = False) -> str: + """ + Return the name of the current OS distribution, as a human-readable + string. + + If *pretty* is false, the name is returned without version or codename. + (e.g. "CentOS Linux") + + If *pretty* is true, the version and codename are appended. + (e.g. "CentOS Linux 7.1.1503 (Core)") + + **Lookup hierarchy:** + + The name is obtained from the following sources, in the specified order. + The first available and non-empty value is used: + + * If *pretty* is false: + + - the value of the "NAME" attribute of the os-release file, + + - the value of the "Distributor ID" attribute returned by the lsb_release + command, + + - the value of the "" field of the distro release file. + + * If *pretty* is true: + + - the value of the "PRETTY_NAME" attribute of the os-release file, + + - the value of the "Description" attribute returned by the lsb_release + command, + + - the value of the "" field of the distro release file, appended + with the value of the pretty version ("" and "" + fields) of the distro release file, if available. + """ + return _distro.name(pretty) + + +def version(pretty: bool = False, best: bool = False) -> str: + """ + Return the version of the current OS distribution, as a human-readable + string. + + If *pretty* is false, the version is returned without codename (e.g. + "7.0"). + + If *pretty* is true, the codename in parenthesis is appended, if the + codename is non-empty (e.g. "7.0 (Maipo)"). + + Some distributions provide version numbers with different precisions in + the different sources of distribution information. Examining the different + sources in a fixed priority order does not always yield the most precise + version (e.g. for Debian 8.2, or CentOS 7.1). + + Some other distributions may not provide this kind of information. In these + cases, an empty string would be returned. This behavior can be observed + with rolling releases distributions (e.g. Arch Linux). + + The *best* parameter can be used to control the approach for the returned + version: + + If *best* is false, the first non-empty version number in priority order of + the examined sources is returned. + + If *best* is true, the most precise version number out of all examined + sources is returned. + + **Lookup hierarchy:** + + In all cases, the version number is obtained from the following sources. + If *best* is false, this order represents the priority order: + + * the value of the "VERSION_ID" attribute of the os-release file, + * the value of the "Release" attribute returned by the lsb_release + command, + * the version number parsed from the "" field of the first line + of the distro release file, + * the version number parsed from the "PRETTY_NAME" attribute of the + os-release file, if it follows the format of the distro release files. + * the version number parsed from the "Description" attribute returned by + the lsb_release command, if it follows the format of the distro release + files. + """ + return _distro.version(pretty, best) + + +def version_parts(best: bool = False) -> Tuple[str, str, str]: + """ + Return the version of the current OS distribution as a tuple + ``(major, minor, build_number)`` with items as follows: + + * ``major``: The result of :func:`distro.major_version`. + + * ``minor``: The result of :func:`distro.minor_version`. + + * ``build_number``: The result of :func:`distro.build_number`. + + For a description of the *best* parameter, see the :func:`distro.version` + method. + """ + return _distro.version_parts(best) + + +def major_version(best: bool = False) -> str: + """ + Return the major version of the current OS distribution, as a string, + if provided. + Otherwise, the empty string is returned. The major version is the first + part of the dot-separated version string. + + For a description of the *best* parameter, see the :func:`distro.version` + method. + """ + return _distro.major_version(best) + + +def minor_version(best: bool = False) -> str: + """ + Return the minor version of the current OS distribution, as a string, + if provided. + Otherwise, the empty string is returned. The minor version is the second + part of the dot-separated version string. + + For a description of the *best* parameter, see the :func:`distro.version` + method. + """ + return _distro.minor_version(best) + + +def build_number(best: bool = False) -> str: + """ + Return the build number of the current OS distribution, as a string, + if provided. + Otherwise, the empty string is returned. The build number is the third part + of the dot-separated version string. + + For a description of the *best* parameter, see the :func:`distro.version` + method. + """ + return _distro.build_number(best) + + +def like() -> str: + """ + Return a space-separated list of distro IDs of distributions that are + closely related to the current OS distribution in regards to packaging + and programming interfaces, for example distributions the current + distribution is a derivative from. + + **Lookup hierarchy:** + + This information item is only provided by the os-release file. + For details, see the description of the "ID_LIKE" attribute in the + `os-release man page + `_. + """ + return _distro.like() + + +def codename() -> str: + """ + Return the codename for the release of the current OS distribution, + as a string. + + If the distribution does not have a codename, an empty string is returned. + + Note that the returned codename is not always really a codename. For + example, openSUSE returns "x86_64". This function does not handle such + cases in any special way and just returns the string it finds, if any. + + **Lookup hierarchy:** + + * the codename within the "VERSION" attribute of the os-release file, if + provided, + + * the value of the "Codename" attribute returned by the lsb_release + command, + + * the value of the "" field of the distro release file. + """ + return _distro.codename() + + +def info(pretty: bool = False, best: bool = False) -> InfoDict: + """ + Return certain machine-readable information items about the current OS + distribution in a dictionary, as shown in the following example: + + .. sourcecode:: python + + { + 'id': 'rhel', + 'version': '7.0', + 'version_parts': { + 'major': '7', + 'minor': '0', + 'build_number': '' + }, + 'like': 'fedora', + 'codename': 'Maipo' + } + + The dictionary structure and keys are always the same, regardless of which + information items are available in the underlying data sources. The values + for the various keys are as follows: + + * ``id``: The result of :func:`distro.id`. + + * ``version``: The result of :func:`distro.version`. + + * ``version_parts -> major``: The result of :func:`distro.major_version`. + + * ``version_parts -> minor``: The result of :func:`distro.minor_version`. + + * ``version_parts -> build_number``: The result of + :func:`distro.build_number`. + + * ``like``: The result of :func:`distro.like`. + + * ``codename``: The result of :func:`distro.codename`. + + For a description of the *pretty* and *best* parameters, see the + :func:`distro.version` method. + """ + return _distro.info(pretty, best) + + +def os_release_info() -> Dict[str, str]: + """ + Return a dictionary containing key-value pairs for the information items + from the os-release file data source of the current OS distribution. + + See `os-release file`_ for details about these information items. + """ + return _distro.os_release_info() + + +def lsb_release_info() -> Dict[str, str]: + """ + Return a dictionary containing key-value pairs for the information items + from the lsb_release command data source of the current OS distribution. + + See `lsb_release command output`_ for details about these information + items. + """ + return _distro.lsb_release_info() + + +def distro_release_info() -> Dict[str, str]: + """ + Return a dictionary containing key-value pairs for the information items + from the distro release file data source of the current OS distribution. + + See `distro release file`_ for details about these information items. + """ + return _distro.distro_release_info() + + +def uname_info() -> Dict[str, str]: + """ + Return a dictionary containing key-value pairs for the information items + from the distro release file data source of the current OS distribution. + """ + return _distro.uname_info() + + +def os_release_attr(attribute: str) -> str: + """ + Return a single named information item from the os-release file data source + of the current OS distribution. + + Parameters: + + * ``attribute`` (string): Key of the information item. + + Returns: + + * (string): Value of the information item, if the item exists. + The empty string, if the item does not exist. + + See `os-release file`_ for details about these information items. + """ + return _distro.os_release_attr(attribute) + + +def lsb_release_attr(attribute: str) -> str: + """ + Return a single named information item from the lsb_release command output + data source of the current OS distribution. + + Parameters: + + * ``attribute`` (string): Key of the information item. + + Returns: + + * (string): Value of the information item, if the item exists. + The empty string, if the item does not exist. + + See `lsb_release command output`_ for details about these information + items. + """ + return _distro.lsb_release_attr(attribute) + + +def distro_release_attr(attribute: str) -> str: + """ + Return a single named information item from the distro release file + data source of the current OS distribution. + + Parameters: + + * ``attribute`` (string): Key of the information item. + + Returns: + + * (string): Value of the information item, if the item exists. + The empty string, if the item does not exist. + + See `distro release file`_ for details about these information items. + """ + return _distro.distro_release_attr(attribute) + + +def uname_attr(attribute: str) -> str: + """ + Return a single named information item from the distro release file + data source of the current OS distribution. + + Parameters: + + * ``attribute`` (string): Key of the information item. + + Returns: + + * (string): Value of the information item, if the item exists. + The empty string, if the item does not exist. + """ + return _distro.uname_attr(attribute) + + +try: + from functools import cached_property +except ImportError: + # Python < 3.8 + class cached_property: # type: ignore + """A version of @property which caches the value. On access, it calls the + underlying function and sets the value in `__dict__` so future accesses + will not re-call the property. + """ + + def __init__(self, f: Callable[[Any], Any]) -> None: + self._fname = f.__name__ + self._f = f + + def __get__(self, obj: Any, owner: Type[Any]) -> Any: + assert obj is not None, f"call {self._fname} on an instance" + ret = obj.__dict__[self._fname] = self._f(obj) + return ret + + +class LinuxDistribution: + """ + Provides information about a OS distribution. + + This package creates a private module-global instance of this class with + default initialization arguments, that is used by the + `consolidated accessor functions`_ and `single source accessor functions`_. + By using default initialization arguments, that module-global instance + returns data about the current OS distribution (i.e. the distro this + package runs on). + + Normally, it is not necessary to create additional instances of this class. + However, in situations where control is needed over the exact data sources + that are used, instances of this class can be created with a specific + distro release file, or a specific os-release file, or without invoking the + lsb_release command. + """ + + def __init__( + self, + include_lsb: Optional[bool] = None, + os_release_file: str = "", + distro_release_file: str = "", + include_uname: Optional[bool] = None, + root_dir: Optional[str] = None, + include_oslevel: Optional[bool] = None, + ) -> None: + """ + The initialization method of this class gathers information from the + available data sources, and stores that in private instance attributes. + Subsequent access to the information items uses these private instance + attributes, so that the data sources are read only once. + + Parameters: + + * ``include_lsb`` (bool): Controls whether the + `lsb_release command output`_ is included as a data source. + + If the lsb_release command is not available in the program execution + path, the data source for the lsb_release command will be empty. + + * ``os_release_file`` (string): The path name of the + `os-release file`_ that is to be used as a data source. + + An empty string (the default) will cause the default path name to + be used (see `os-release file`_ for details). + + If the specified or defaulted os-release file does not exist, the + data source for the os-release file will be empty. + + * ``distro_release_file`` (string): The path name of the + `distro release file`_ that is to be used as a data source. + + An empty string (the default) will cause a default search algorithm + to be used (see `distro release file`_ for details). + + If the specified distro release file does not exist, or if no default + distro release file can be found, the data source for the distro + release file will be empty. + + * ``include_uname`` (bool): Controls whether uname command output is + included as a data source. If the uname command is not available in + the program execution path the data source for the uname command will + be empty. + + * ``root_dir`` (string): The absolute path to the root directory to use + to find distro-related information files. Note that ``include_*`` + parameters must not be enabled in combination with ``root_dir``. + + * ``include_oslevel`` (bool): Controls whether (AIX) oslevel command + output is included as a data source. If the oslevel command is not + available in the program execution path the data source will be + empty. + + Public instance attributes: + + * ``os_release_file`` (string): The path name of the + `os-release file`_ that is actually used as a data source. The + empty string if no distro release file is used as a data source. + + * ``distro_release_file`` (string): The path name of the + `distro release file`_ that is actually used as a data source. The + empty string if no distro release file is used as a data source. + + * ``include_lsb`` (bool): The result of the ``include_lsb`` parameter. + This controls whether the lsb information will be loaded. + + * ``include_uname`` (bool): The result of the ``include_uname`` + parameter. This controls whether the uname information will + be loaded. + + * ``include_oslevel`` (bool): The result of the ``include_oslevel`` + parameter. This controls whether (AIX) oslevel information will be + loaded. + + * ``root_dir`` (string): The result of the ``root_dir`` parameter. + The absolute path to the root directory to use to find distro-related + information files. + + Raises: + + * :py:exc:`ValueError`: Initialization parameters combination is not + supported. + + * :py:exc:`OSError`: Some I/O issue with an os-release file or distro + release file. + + * :py:exc:`UnicodeError`: A data source has unexpected characters or + uses an unexpected encoding. + """ + self.root_dir = root_dir + self.etc_dir = os.path.join(root_dir, "etc") if root_dir else _UNIXCONFDIR + self.usr_lib_dir = ( + os.path.join(root_dir, "usr/lib") if root_dir else _UNIXUSRLIBDIR + ) + + if os_release_file: + self.os_release_file = os_release_file + else: + etc_dir_os_release_file = os.path.join(self.etc_dir, _OS_RELEASE_BASENAME) + usr_lib_os_release_file = os.path.join( + self.usr_lib_dir, _OS_RELEASE_BASENAME + ) + + # NOTE: The idea is to respect order **and** have it set + # at all times for API backwards compatibility. + if os.path.isfile(etc_dir_os_release_file) or not os.path.isfile( + usr_lib_os_release_file + ): + self.os_release_file = etc_dir_os_release_file + else: + self.os_release_file = usr_lib_os_release_file + + self.distro_release_file = distro_release_file or "" # updated later + + is_root_dir_defined = root_dir is not None + if is_root_dir_defined and (include_lsb or include_uname or include_oslevel): + raise ValueError( + "Including subprocess data sources from specific root_dir is disallowed" + " to prevent false information" + ) + self.include_lsb = ( + include_lsb if include_lsb is not None else not is_root_dir_defined + ) + self.include_uname = ( + include_uname if include_uname is not None else not is_root_dir_defined + ) + self.include_oslevel = ( + include_oslevel if include_oslevel is not None else not is_root_dir_defined + ) + + def __repr__(self) -> str: + """Return repr of all info""" + return ( + "LinuxDistribution(" + "os_release_file={self.os_release_file!r}, " + "distro_release_file={self.distro_release_file!r}, " + "include_lsb={self.include_lsb!r}, " + "include_uname={self.include_uname!r}, " + "include_oslevel={self.include_oslevel!r}, " + "root_dir={self.root_dir!r}, " + "_os_release_info={self._os_release_info!r}, " + "_lsb_release_info={self._lsb_release_info!r}, " + "_distro_release_info={self._distro_release_info!r}, " + "_uname_info={self._uname_info!r}, " + "_oslevel_info={self._oslevel_info!r})".format(self=self) + ) + + def linux_distribution( + self, full_distribution_name: bool = True + ) -> Tuple[str, str, str]: + """ + Return information about the OS distribution that is compatible + with Python's :func:`platform.linux_distribution`, supporting a subset + of its parameters. + + For details, see :func:`distro.linux_distribution`. + """ + return ( + self.name() if full_distribution_name else self.id(), + self.version(), + self._os_release_info.get("release_codename") or self.codename(), + ) + + def id(self) -> str: + """Return the distro ID of the OS distribution, as a string. + + For details, see :func:`distro.id`. + """ + + def normalize(distro_id: str, table: Dict[str, str]) -> str: + distro_id = distro_id.lower().replace(" ", "_") + return table.get(distro_id, distro_id) + + distro_id = self.os_release_attr("id") + if distro_id: + return normalize(distro_id, NORMALIZED_OS_ID) + + distro_id = self.lsb_release_attr("distributor_id") + if distro_id: + return normalize(distro_id, NORMALIZED_LSB_ID) + + distro_id = self.distro_release_attr("id") + if distro_id: + return normalize(distro_id, NORMALIZED_DISTRO_ID) + + distro_id = self.uname_attr("id") + if distro_id: + return normalize(distro_id, NORMALIZED_DISTRO_ID) + + return "" + + def name(self, pretty: bool = False) -> str: + """ + Return the name of the OS distribution, as a string. + + For details, see :func:`distro.name`. + """ + name = ( + self.os_release_attr("name") + or self.lsb_release_attr("distributor_id") + or self.distro_release_attr("name") + or self.uname_attr("name") + ) + if pretty: + name = self.os_release_attr("pretty_name") or self.lsb_release_attr( + "description" + ) + if not name: + name = self.distro_release_attr("name") or self.uname_attr("name") + version = self.version(pretty=True) + if version: + name = f"{name} {version}" + return name or "" + + def version(self, pretty: bool = False, best: bool = False) -> str: + """ + Return the version of the OS distribution, as a string. + + For details, see :func:`distro.version`. + """ + versions = [ + self.os_release_attr("version_id"), + self.lsb_release_attr("release"), + self.distro_release_attr("version_id"), + self._parse_distro_release_content(self.os_release_attr("pretty_name")).get( + "version_id", "" + ), + self._parse_distro_release_content( + self.lsb_release_attr("description") + ).get("version_id", ""), + self.uname_attr("release"), + ] + if self.uname_attr("id").startswith("aix"): + # On AIX platforms, prefer oslevel command output. + versions.insert(0, self.oslevel_info()) + elif self.id() == "debian" or "debian" in self.like().split(): + # On Debian-like, add debian_version file content to candidates list. + versions.append(self._debian_version) + version = "" + if best: + # This algorithm uses the last version in priority order that has + # the best precision. If the versions are not in conflict, that + # does not matter; otherwise, using the last one instead of the + # first one might be considered a surprise. + for v in versions: + if v.count(".") > version.count(".") or version == "": + version = v + else: + for v in versions: + if v != "": + version = v + break + if pretty and version and self.codename(): + version = f"{version} ({self.codename()})" + return version + + def version_parts(self, best: bool = False) -> Tuple[str, str, str]: + """ + Return the version of the OS distribution, as a tuple of version + numbers. + + For details, see :func:`distro.version_parts`. + """ + version_str = self.version(best=best) + if version_str: + version_regex = re.compile(r"(\d+)\.?(\d+)?\.?(\d+)?") + matches = version_regex.match(version_str) + if matches: + major, minor, build_number = matches.groups() + return major, minor or "", build_number or "" + return "", "", "" + + def major_version(self, best: bool = False) -> str: + """ + Return the major version number of the current distribution. + + For details, see :func:`distro.major_version`. + """ + return self.version_parts(best)[0] + + def minor_version(self, best: bool = False) -> str: + """ + Return the minor version number of the current distribution. + + For details, see :func:`distro.minor_version`. + """ + return self.version_parts(best)[1] + + def build_number(self, best: bool = False) -> str: + """ + Return the build number of the current distribution. + + For details, see :func:`distro.build_number`. + """ + return self.version_parts(best)[2] + + def like(self) -> str: + """ + Return the IDs of distributions that are like the OS distribution. + + For details, see :func:`distro.like`. + """ + return self.os_release_attr("id_like") or "" + + def codename(self) -> str: + """ + Return the codename of the OS distribution. + + For details, see :func:`distro.codename`. + """ + try: + # Handle os_release specially since distros might purposefully set + # this to empty string to have no codename + return self._os_release_info["codename"] + except KeyError: + return ( + self.lsb_release_attr("codename") + or self.distro_release_attr("codename") + or "" + ) + + def info(self, pretty: bool = False, best: bool = False) -> InfoDict: + """ + Return certain machine-readable information about the OS + distribution. + + For details, see :func:`distro.info`. + """ + return InfoDict( + id=self.id(), + version=self.version(pretty, best), + version_parts=VersionDict( + major=self.major_version(best), + minor=self.minor_version(best), + build_number=self.build_number(best), + ), + like=self.like(), + codename=self.codename(), + ) + + def os_release_info(self) -> Dict[str, str]: + """ + Return a dictionary containing key-value pairs for the information + items from the os-release file data source of the OS distribution. + + For details, see :func:`distro.os_release_info`. + """ + return self._os_release_info + + def lsb_release_info(self) -> Dict[str, str]: + """ + Return a dictionary containing key-value pairs for the information + items from the lsb_release command data source of the OS + distribution. + + For details, see :func:`distro.lsb_release_info`. + """ + return self._lsb_release_info + + def distro_release_info(self) -> Dict[str, str]: + """ + Return a dictionary containing key-value pairs for the information + items from the distro release file data source of the OS + distribution. + + For details, see :func:`distro.distro_release_info`. + """ + return self._distro_release_info + + def uname_info(self) -> Dict[str, str]: + """ + Return a dictionary containing key-value pairs for the information + items from the uname command data source of the OS distribution. + + For details, see :func:`distro.uname_info`. + """ + return self._uname_info + + def oslevel_info(self) -> str: + """ + Return AIX' oslevel command output. + """ + return self._oslevel_info + + def os_release_attr(self, attribute: str) -> str: + """ + Return a single named information item from the os-release file data + source of the OS distribution. + + For details, see :func:`distro.os_release_attr`. + """ + return self._os_release_info.get(attribute, "") + + def lsb_release_attr(self, attribute: str) -> str: + """ + Return a single named information item from the lsb_release command + output data source of the OS distribution. + + For details, see :func:`distro.lsb_release_attr`. + """ + return self._lsb_release_info.get(attribute, "") + + def distro_release_attr(self, attribute: str) -> str: + """ + Return a single named information item from the distro release file + data source of the OS distribution. + + For details, see :func:`distro.distro_release_attr`. + """ + return self._distro_release_info.get(attribute, "") + + def uname_attr(self, attribute: str) -> str: + """ + Return a single named information item from the uname command + output data source of the OS distribution. + + For details, see :func:`distro.uname_attr`. + """ + return self._uname_info.get(attribute, "") + + @cached_property + def _os_release_info(self) -> Dict[str, str]: + """ + Get the information items from the specified os-release file. + + Returns: + A dictionary containing all information items. + """ + if os.path.isfile(self.os_release_file): + with open(self.os_release_file, encoding="utf-8") as release_file: + return self._parse_os_release_content(release_file) + return {} + + @staticmethod + def _parse_os_release_content(lines: TextIO) -> Dict[str, str]: + """ + Parse the lines of an os-release file. + + Parameters: + + * lines: Iterable through the lines in the os-release file. + Each line must be a unicode string or a UTF-8 encoded byte + string. + + Returns: + A dictionary containing all information items. + """ + props = {} + lexer = shlex.shlex(lines, posix=True) + lexer.whitespace_split = True + + tokens = list(lexer) + for token in tokens: + # At this point, all shell-like parsing has been done (i.e. + # comments processed, quotes and backslash escape sequences + # processed, multi-line values assembled, trailing newlines + # stripped, etc.), so the tokens are now either: + # * variable assignments: var=value + # * commands or their arguments (not allowed in os-release) + # Ignore any tokens that are not variable assignments + if "=" in token: + k, v = token.split("=", 1) + props[k.lower()] = v + + if "version" in props: + # extract release codename (if any) from version attribute + match = re.search(r"\((\D+)\)|,\s*(\D+)", props["version"]) + if match: + release_codename = match.group(1) or match.group(2) + props["codename"] = props["release_codename"] = release_codename + + if "version_codename" in props: + # os-release added a version_codename field. Use that in + # preference to anything else Note that some distros purposefully + # do not have code names. They should be setting + # version_codename="" + props["codename"] = props["version_codename"] + elif "ubuntu_codename" in props: + # Same as above but a non-standard field name used on older Ubuntus + props["codename"] = props["ubuntu_codename"] + + return props + + @cached_property + def _lsb_release_info(self) -> Dict[str, str]: + """ + Get the information items from the lsb_release command output. + + Returns: + A dictionary containing all information items. + """ + if not self.include_lsb: + return {} + try: + cmd = ("lsb_release", "-a") + stdout = subprocess.check_output(cmd, stderr=subprocess.DEVNULL) + # Command not found or lsb_release returned error + except (OSError, subprocess.CalledProcessError): + return {} + content = self._to_str(stdout).splitlines() + return self._parse_lsb_release_content(content) + + @staticmethod + def _parse_lsb_release_content(lines: Iterable[str]) -> Dict[str, str]: + """ + Parse the output of the lsb_release command. + + Parameters: + + * lines: Iterable through the lines of the lsb_release output. + Each line must be a unicode string or a UTF-8 encoded byte + string. + + Returns: + A dictionary containing all information items. + """ + props = {} + for line in lines: + kv = line.strip("\n").split(":", 1) + if len(kv) != 2: + # Ignore lines without colon. + continue + k, v = kv + props.update({k.replace(" ", "_").lower(): v.strip()}) + return props + + @cached_property + def _uname_info(self) -> Dict[str, str]: + if not self.include_uname: + return {} + try: + cmd = ("uname", "-rs") + stdout = subprocess.check_output(cmd, stderr=subprocess.DEVNULL) + except OSError: + return {} + content = self._to_str(stdout).splitlines() + return self._parse_uname_content(content) + + @cached_property + def _oslevel_info(self) -> str: + if not self.include_oslevel: + return "" + try: + stdout = subprocess.check_output("oslevel", stderr=subprocess.DEVNULL) + except (OSError, subprocess.CalledProcessError): + return "" + return self._to_str(stdout).strip() + + @cached_property + def _debian_version(self) -> str: + try: + with open( + os.path.join(self.etc_dir, "debian_version"), encoding="ascii" + ) as fp: + return fp.readline().rstrip() + except FileNotFoundError: + return "" + + @staticmethod + def _parse_uname_content(lines: Sequence[str]) -> Dict[str, str]: + if not lines: + return {} + props = {} + match = re.search(r"^([^\s]+)\s+([\d\.]+)", lines[0].strip()) + if match: + name, version = match.groups() + + # This is to prevent the Linux kernel version from + # appearing as the 'best' version on otherwise + # identifiable distributions. + if name == "Linux": + return {} + props["id"] = name.lower() + props["name"] = name + props["release"] = version + return props + + @staticmethod + def _to_str(bytestring: bytes) -> str: + encoding = sys.getfilesystemencoding() + return bytestring.decode(encoding) + + @cached_property + def _distro_release_info(self) -> Dict[str, str]: + """ + Get the information items from the specified distro release file. + + Returns: + A dictionary containing all information items. + """ + if self.distro_release_file: + # If it was specified, we use it and parse what we can, even if + # its file name or content does not match the expected pattern. + distro_info = self._parse_distro_release_file(self.distro_release_file) + basename = os.path.basename(self.distro_release_file) + # The file name pattern for user-specified distro release files + # is somewhat more tolerant (compared to when searching for the + # file), because we want to use what was specified as best as + # possible. + match = _DISTRO_RELEASE_BASENAME_PATTERN.match(basename) + else: + try: + basenames = [ + basename + for basename in os.listdir(self.etc_dir) + if basename not in _DISTRO_RELEASE_IGNORE_BASENAMES + and os.path.isfile(os.path.join(self.etc_dir, basename)) + ] + # We sort for repeatability in cases where there are multiple + # distro specific files; e.g. CentOS, Oracle, Enterprise all + # containing `redhat-release` on top of their own. + basenames.sort() + except OSError: + # This may occur when /etc is not readable but we can't be + # sure about the *-release files. Check common entries of + # /etc for information. If they turn out to not be there the + # error is handled in `_parse_distro_release_file()`. + basenames = _DISTRO_RELEASE_BASENAMES + for basename in basenames: + match = _DISTRO_RELEASE_BASENAME_PATTERN.match(basename) + if match is None: + continue + filepath = os.path.join(self.etc_dir, basename) + distro_info = self._parse_distro_release_file(filepath) + # The name is always present if the pattern matches. + if "name" not in distro_info: + continue + self.distro_release_file = filepath + break + else: # the loop didn't "break": no candidate. + return {} + + if match is not None: + distro_info["id"] = match.group(1) + + # CloudLinux < 7: manually enrich info with proper id. + if "cloudlinux" in distro_info.get("name", "").lower(): + distro_info["id"] = "cloudlinux" + + return distro_info + + def _parse_distro_release_file(self, filepath: str) -> Dict[str, str]: + """ + Parse a distro release file. + + Parameters: + + * filepath: Path name of the distro release file. + + Returns: + A dictionary containing all information items. + """ + try: + with open(filepath, encoding="utf-8") as fp: + # Only parse the first line. For instance, on SLES there + # are multiple lines. We don't want them... + return self._parse_distro_release_content(fp.readline()) + except OSError: + # Ignore not being able to read a specific, seemingly version + # related file. + # See https://github.com/python-distro/distro/issues/162 + return {} + + @staticmethod + def _parse_distro_release_content(line: str) -> Dict[str, str]: + """ + Parse a line from a distro release file. + + Parameters: + * line: Line from the distro release file. Must be a unicode string + or a UTF-8 encoded byte string. + + Returns: + A dictionary containing all information items. + """ + matches = _DISTRO_RELEASE_CONTENT_REVERSED_PATTERN.match(line.strip()[::-1]) + distro_info = {} + if matches: + # regexp ensures non-None + distro_info["name"] = matches.group(3)[::-1] + if matches.group(2): + distro_info["version_id"] = matches.group(2)[::-1] + if matches.group(1): + distro_info["codename"] = matches.group(1)[::-1] + elif line: + distro_info["name"] = line.strip() + return distro_info + + +_distro = LinuxDistribution() + + +def main() -> None: + logger = logging.getLogger(__name__) + logger.setLevel(logging.DEBUG) + logger.addHandler(logging.StreamHandler(sys.stdout)) + + parser = argparse.ArgumentParser(description="OS distro info tool") + parser.add_argument( + "--json", "-j", help="Output in machine readable format", action="store_true" + ) + + parser.add_argument( + "--root-dir", + "-r", + type=str, + dest="root_dir", + help="Path to the root filesystem directory (defaults to /)", + ) + + args = parser.parse_args() + + if args.root_dir: + dist = LinuxDistribution( + include_lsb=False, + include_uname=False, + include_oslevel=False, + root_dir=args.root_dir, + ) + else: + dist = _distro + + if args.json: + logger.info(json.dumps(dist.info(), indent=4, sort_keys=True)) + else: + logger.info("Name: %s", dist.name(pretty=True)) + distribution_version = dist.version(pretty=True) + logger.info("Version: %s", distribution_version) + distribution_codename = dist.codename() + logger.info("Codename: %s", distribution_codename) + + +if __name__ == "__main__": + main() diff --git a/.venv/lib/python3.11/site-packages/distro/py.typed b/.venv/lib/python3.11/site-packages/distro/py.typed new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.venv/lib/python3.11/site-packages/gguf-0.10.0.dist-info/INSTALLER b/.venv/lib/python3.11/site-packages/gguf-0.10.0.dist-info/INSTALLER new file mode 100644 index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/gguf-0.10.0.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/.venv/lib/python3.11/site-packages/gguf-0.10.0.dist-info/LICENSE b/.venv/lib/python3.11/site-packages/gguf-0.10.0.dist-info/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..76f67efdc6470081b512a8db5bf2b1d4962d9c3c --- /dev/null +++ b/.venv/lib/python3.11/site-packages/gguf-0.10.0.dist-info/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 Georgi Gerganov + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/.venv/lib/python3.11/site-packages/gguf-0.10.0.dist-info/METADATA b/.venv/lib/python3.11/site-packages/gguf-0.10.0.dist-info/METADATA new file mode 100644 index 0000000000000000000000000000000000000000..e97309f52e8dab48a7422599ee189c5b7f03888c --- /dev/null +++ b/.venv/lib/python3.11/site-packages/gguf-0.10.0.dist-info/METADATA @@ -0,0 +1,114 @@ +Metadata-Version: 2.1 +Name: gguf +Version: 0.10.0 +Summary: Read and write ML models in GGUF for GGML +Home-page: https://ggml.ai +Keywords: ggml,gguf,llama.cpp +Author: GGML +Author-email: ggml@ggml.ai +Requires-Python: >=3.8 +Classifier: License :: OSI Approved :: MIT License +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 +Requires-Dist: numpy (>=1.17) +Requires-Dist: pyyaml (>=5.1) +Requires-Dist: tqdm (>=4.27) +Project-URL: Repository, https://github.com/ggerganov/llama.cpp +Description-Content-Type: text/markdown + +## gguf + +This is a Python package for writing binary files in the [GGUF](https://github.com/ggerganov/ggml/pull/302) +(GGML Universal File) format. + +See [convert_hf_to_gguf.py](https://github.com/ggerganov/llama.cpp/blob/master/convert_hf_to_gguf.py) +as an example for its usage. + +## Installation +```sh +pip install gguf +``` + +## API Examples/Simple Tools + +[examples/writer.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/examples/writer.py) — Generates `example.gguf` in the current directory to demonstrate generating a GGUF file. Note that this file cannot be used as a model. + +[scripts/gguf_dump.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/scripts/gguf_dump.py) — Dumps a GGUF file's metadata to the console. + +[scripts/gguf_set_metadata.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/scripts/gguf_set_metadata.py) — Allows changing simple metadata values in a GGUF file by key. + +[scripts/gguf_convert_endian.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/scripts/gguf_convert_endian.py) — Allows converting the endianness of GGUF files. + +[scripts/gguf_new_metadata.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/scripts/gguf_new_metadata.py) — Copies a GGUF file with added/modified/removed metadata values. + +## Development +Maintainers who participate in development of this package are advised to install it in editable mode: + +```sh +cd /path/to/llama.cpp/gguf-py + +pip install --editable . +``` + +**Note**: This may require to upgrade your Pip installation, with a message saying that editable installation currently requires `setup.py`. +In this case, upgrade Pip to the latest: + +```sh +pip install --upgrade pip +``` + +## Automatic publishing with CI + +There's a GitHub workflow to make a release automatically upon creation of tags in a specified format. + +1. Bump the version in `pyproject.toml`. +2. Create a tag named `gguf-vx.x.x` where `x.x.x` is the semantic version number. + +```sh +git tag -a gguf-v1.0.0 -m "Version 1.0 release" +``` + +3. Push the tags. + +```sh +git push origin --tags +``` + +## Manual publishing +If you want to publish the package manually for any reason, you need to have `twine` and `build` installed: + +```sh +pip install build twine +``` + +Then, follow these steps to release a new version: + +1. Bump the version in `pyproject.toml`. +2. Build the package: + +```sh +python -m build +``` + +3. Upload the generated distribution archives: + +```sh +python -m twine upload dist/* +``` + +## Run Unit Tests + +From root of this repository you can run this command to run all the unit tests + +```bash +python -m unittest discover ./gguf-py -v +``` + +## TODO +- [ ] Include conversion scripts as command line entry points in this package. + diff --git a/.venv/lib/python3.11/site-packages/gguf-0.10.0.dist-info/RECORD b/.venv/lib/python3.11/site-packages/gguf-0.10.0.dist-info/RECORD new file mode 100644 index 0000000000000000000000000000000000000000..ccb5732a1710165e6bef70154355b390ecabb120 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/gguf-0.10.0.dist-info/RECORD @@ -0,0 +1,45 @@ +../../../bin/gguf-convert-endian,sha256=W53Q9wLM4ktx91uKrLfyuFNGS7UCbZr-mA2U66A_W0E,276 +../../../bin/gguf-dump,sha256=v29_dyj4DF8zullVHEvLoQotNn0dgtrt8f-NjwDigFw,256 +../../../bin/gguf-new-metadata,sha256=UwjDwictRuL-TFKI2mlnkzB8YzGJSNWJ69870J9w64Y,272 +../../../bin/gguf-set-metadata,sha256=iqt1mYpW1z41SzKpVu0r6NLBVYPDc-k06JH7cmo-e9s,272 +gguf-0.10.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +gguf-0.10.0.dist-info/LICENSE,sha256=73jH5mWeNMeYGU8NNE6AfHIt5wy8oTWe9UdyZh4Ryjg,1072 +gguf-0.10.0.dist-info/METADATA,sha256=0jck7V9EOoT24ugiLaorfqFzhmN3eJM7lj-fyq8tvy8,3533 +gguf-0.10.0.dist-info/RECORD,, +gguf-0.10.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88 +gguf-0.10.0.dist-info/entry_points.txt,sha256=6SBOHW2PZstAG6hvoQ63pyxPF8CIGjGLXZyG-NFFnAc,227 +gguf/__init__.py,sha256=PM_AEEzX6ojGAodDt78_LIm19HRCXeA6IXpgcjINfC8,219 +gguf/__pycache__/__init__.cpython-311.pyc,, +gguf/__pycache__/constants.cpython-311.pyc,, +gguf/__pycache__/gguf.cpython-311.pyc,, +gguf/__pycache__/gguf_reader.cpython-311.pyc,, +gguf/__pycache__/gguf_writer.cpython-311.pyc,, +gguf/__pycache__/lazy.cpython-311.pyc,, +gguf/__pycache__/metadata.cpython-311.pyc,, +gguf/__pycache__/quants.cpython-311.pyc,, +gguf/__pycache__/tensor_mapping.cpython-311.pyc,, +gguf/__pycache__/utility.cpython-311.pyc,, +gguf/__pycache__/vocab.cpython-311.pyc,, +gguf/constants.py,sha256=00pCXTqoWP36ZR9OcuxJfhezZfxqjUTKGCNvGMrkLGU,49158 +gguf/gguf.py,sha256=8MDu7a0JEXhLUv_tjhYqDrWubVNc41cFvBYZbkZZenI,478 +gguf/gguf_reader.py,sha256=0Y86qmxYfPNq3u_AiAwSnhU_G-V2SjXK2b7CM3MoPP4,12365 +gguf/gguf_writer.py,sha256=ZWW_XwN6tyDIzBCwl6ARDwwRZ9TewNMV32IQaVYRNP8,35090 +gguf/lazy.py,sha256=QwxFAtj-5nIm_a3mvLBezpShFuo-pjJhoAiXreK9ITQ,8601 +gguf/metadata.py,sha256=nFEqgODwH6jYF3_jcycZkN4FjLnfXOQ7A73cLKZJb0Y,25722 +gguf/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +gguf/quants.py,sha256=_6kmSprMfawH3gcPz9WbxnlXCeZQIYivf_HjfUvUkM0,57336 +gguf/tensor_mapping.py,sha256=8CpqXKwKYFz6v11Fpzhe85hPcH18gnieh_FQsISpcVc,30893 +gguf/utility.py,sha256=jbd1bduLFjjNAQhjghSyrRQfJ_kjiTFhRAZxm0oabZw,2934 +gguf/vocab.py,sha256=N4PNulV8x_m7cdbmbUBF3nqQ0UXGTDi31oEIh3h8hAs,19046 +scripts/__init__.py,sha256=-Az7fR5lhVb9GPB5U_BaDb8Xp706km53vSY7aXQYw9I,297 +scripts/__pycache__/__init__.cpython-311.pyc,, +scripts/__pycache__/gguf_convert_endian.cpython-311.pyc,, +scripts/__pycache__/gguf_dump.cpython-311.pyc,, +scripts/__pycache__/gguf_hash.cpython-311.pyc,, +scripts/__pycache__/gguf_new_metadata.cpython-311.pyc,, +scripts/__pycache__/gguf_set_metadata.cpython-311.pyc,, +scripts/gguf_convert_endian.py,sha256=tcpe1O6xLTSetYD5T8dYcDN7cwiTWS-5sH7iFCP7ga8,5279 +scripts/gguf_dump.py,sha256=Ej3XWrYupFOg-Bz-ff9EAN5hdmp2OCdEkd46D4PtZ58,21934 +scripts/gguf_hash.py,sha256=DtzQ3xSdS8bHxdsp8ELQaBlaZiqqDBXzqtRSEzYTkhM,3711 +scripts/gguf_new_metadata.py,sha256=bGuF8iXkElxIsr4mR952DHEBTosA9ob-UkF-zEqlPqo,10713 +scripts/gguf_set_metadata.py,sha256=9jISgnh2atbwKh-68oN5b0wLxs0mYpXOWILjuqLTt-Q,4131 diff --git a/.venv/lib/python3.11/site-packages/gguf-0.10.0.dist-info/WHEEL b/.venv/lib/python3.11/site-packages/gguf-0.10.0.dist-info/WHEEL new file mode 100644 index 0000000000000000000000000000000000000000..d73ccaae8e0eea45949b0957a5af034099b36aa4 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/gguf-0.10.0.dist-info/WHEEL @@ -0,0 +1,4 @@ +Wheel-Version: 1.0 +Generator: poetry-core 1.9.0 +Root-Is-Purelib: true +Tag: py3-none-any diff --git a/.venv/lib/python3.11/site-packages/gguf-0.10.0.dist-info/entry_points.txt b/.venv/lib/python3.11/site-packages/gguf-0.10.0.dist-info/entry_points.txt new file mode 100644 index 0000000000000000000000000000000000000000..419684c75c1cf14cbbcf3f970d401862b280c91d --- /dev/null +++ b/.venv/lib/python3.11/site-packages/gguf-0.10.0.dist-info/entry_points.txt @@ -0,0 +1,6 @@ +[console_scripts] +gguf-convert-endian=scripts:gguf_convert_endian_entrypoint +gguf-dump=scripts:gguf_dump_entrypoint +gguf-new-metadata=scripts:gguf_new_metadata_entrypoint +gguf-set-metadata=scripts:gguf_set_metadata_entrypoint + diff --git a/.venv/lib/python3.11/site-packages/mistral_common/__init__.py b/.venv/lib/python3.11/site-packages/mistral_common/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5197c5f5acdac86c91de46aef8412fbc75ca5656 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/mistral_common/__init__.py @@ -0,0 +1 @@ +__version__ = "1.5.2" diff --git a/.venv/lib/python3.11/site-packages/mistral_common/base.py b/.venv/lib/python3.11/site-packages/mistral_common/base.py new file mode 100644 index 0000000000000000000000000000000000000000..dfa180b2985c8f2445de3cd0469776045624bf8a --- /dev/null +++ b/.venv/lib/python3.11/site-packages/mistral_common/base.py @@ -0,0 +1,9 @@ +from pydantic import BaseModel, ConfigDict + + +class MistralBase(BaseModel): + """ + Base class for all Mistral Pydantic models. + """ + + model_config = ConfigDict(extra="forbid", validate_default=True, use_enum_values=True) diff --git a/.venv/lib/python3.11/site-packages/mistral_common/exceptions.py b/.venv/lib/python3.11/site-packages/mistral_common/exceptions.py new file mode 100644 index 0000000000000000000000000000000000000000..236cd9a8b9a7586377c62381986f31d0447eb9d0 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/mistral_common/exceptions.py @@ -0,0 +1,67 @@ +from typing import Optional + + +class MistralCommonException(Exception): + message: str = "Internal server error" + + def __init__( + self, + message: Optional[str] = None, + ) -> None: + if message: + self.message = message + + +class TokenizerException(MistralCommonException): + def __init__(self, message: str) -> None: + super().__init__(message) + + +class UnsupportedTokenizerFeatureException(MistralCommonException): + def __init__(self, message: str) -> None: + super().__init__(message) + + +class InvalidRequestException(MistralCommonException): + def __init__(self, message: str) -> None: + super().__init__(message) + + +class InvalidSystemPromptException(MistralCommonException): + def __init__(self, message: str) -> None: + super().__init__(message) + + +class InvalidMessageStructureException(MistralCommonException): + def __init__(self, message: str) -> None: + super().__init__(message) + + +class InvalidAssistantMessageException(MistralCommonException): + def __init__(self, message: str) -> None: + super().__init__(message) + + +class InvalidToolMessageException(MistralCommonException): + def __init__(self, message: str) -> None: + super().__init__(message) + + +class InvalidToolSchemaException(MistralCommonException): + def __init__(self, message: str) -> None: + super().__init__(message) + + +class InvalidUserMessageException(MistralCommonException): + def __init__(self, message: str) -> None: + super().__init__(message) + + +class InvalidFunctionCallException(MistralCommonException): + def __init__(self, message: str) -> None: + super().__init__(message) + + +class InvalidToolException(MistralCommonException): + def __init__(self, message: str) -> None: + super().__init__(message) diff --git a/.venv/lib/python3.11/site-packages/mistral_common/multimodal.py b/.venv/lib/python3.11/site-packages/mistral_common/multimodal.py new file mode 100644 index 0000000000000000000000000000000000000000..4b8f6bd9226c7d9f45aa0c5ec5a0cd2d50397f7e --- /dev/null +++ b/.venv/lib/python3.11/site-packages/mistral_common/multimodal.py @@ -0,0 +1,70 @@ +import base64 +import io +from typing import Union + +import requests +from PIL import Image +from pydantic import BeforeValidator, PlainSerializer, SerializationInfo +from typing_extensions import Annotated + +from mistral_common import __version__ + + +def download_image(url: str) -> Image.Image: + headers = {"User-Agent": f"mistral-common/{__version__}"} + try: + # Make a request to download the image + response = requests.get(url, headers=headers) + response.raise_for_status() # Raise an error for bad responses (4xx, 5xx) + + # Convert the image content to a PIL Image + img = Image.open(io.BytesIO(response.content)) + return img + + except requests.exceptions.RequestException as e: + raise RuntimeError(f"Error downloading the image from {url}: {e}.") + except Exception as e: + raise RuntimeError(f"Error converting to PIL image: {e}") + + +def maybe_load_image_from_str_or_bytes(x: Union[Image.Image, str, bytes]) -> Image.Image: + if isinstance(x, Image.Image): + return x + if isinstance(x, bytes): + try: + return Image.open(io.BytesIO(x)) + except Exception: + raise RuntimeError("Encountered an error when loading image from bytes.") + + try: + image = Image.open(io.BytesIO(base64.b64decode(x.encode("ascii")))) + return image + except Exception as e: + raise RuntimeError( + f"Encountered an error when loading image from bytes starting " + f"with '{x[:20]}'. Expected either a PIL.Image.Image or a base64 " + f"encoded string of bytes." + ) from e + + +def serialize_image_to_byte_str(im: Image.Image, info: SerializationInfo) -> str: + if hasattr(info, "context"): + context = info.context or {} + else: + context = {} + + stream = io.BytesIO() + im_format = im.format or "PNG" + im.save(stream, format=im_format) + im_b64 = base64.b64encode(stream.getvalue()).decode("ascii") + if context and (max_image_b64_len := context.get("max_image_b64_len")): + return im_b64[:max_image_b64_len] + "..." + return im_b64 + + +# A normal PIL image that supports serialization to b64 bytes string +SerializableImage = Annotated[ + Image.Image, + BeforeValidator(maybe_load_image_from_str_or_bytes), + PlainSerializer(serialize_image_to_byte_str), +] diff --git a/.venv/lib/python3.11/site-packages/mistral_common/protocol/__init__.py b/.venv/lib/python3.11/site-packages/mistral_common/protocol/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.venv/lib/python3.11/site-packages/mistral_common/protocol/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/mistral_common/protocol/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fc21452e80f949a189c8dad291cf91539b4e615b Binary files /dev/null and b/.venv/lib/python3.11/site-packages/mistral_common/protocol/__pycache__/__init__.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/mistral_common/protocol/__pycache__/base.cpython-311.pyc b/.venv/lib/python3.11/site-packages/mistral_common/protocol/__pycache__/base.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5ba2e235db5f7918b238050c1dd36e8594afacef Binary files /dev/null and b/.venv/lib/python3.11/site-packages/mistral_common/protocol/__pycache__/base.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/mistral_common/protocol/__pycache__/utils.cpython-311.pyc b/.venv/lib/python3.11/site-packages/mistral_common/protocol/__pycache__/utils.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5eb5ebbb65102460384f33c363b69a9e592da463 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/mistral_common/protocol/__pycache__/utils.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/mistral_common/protocol/base.py b/.venv/lib/python3.11/site-packages/mistral_common/protocol/base.py new file mode 100644 index 0000000000000000000000000000000000000000..74d0573e32710698eecbb3d778e56e54a22915ec --- /dev/null +++ b/.venv/lib/python3.11/site-packages/mistral_common/protocol/base.py @@ -0,0 +1,18 @@ +from typing import Optional + +from pydantic import Field + +from mistral_common.base import MistralBase + + +class UsageInfo(MistralBase): + prompt_tokens: int = 0 + total_tokens: int = 0 + completion_tokens: Optional[int] = 0 + + +class BaseCompletionRequest(MistralBase): + temperature: float = Field(default=0.7, ge=0.0, le=1.0) + top_p: float = Field(default=1.0, ge=0.0, le=1.0) + max_tokens: Optional[int] = Field(default=None, ge=0) + random_seed: Optional[int] = Field(default=None, ge=0) diff --git a/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/__init__.py b/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/__pycache__/normalize.cpython-311.pyc b/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/__pycache__/normalize.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..75a3723c363f08887869a6636ae1f4eddd64f59e Binary files /dev/null and b/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/__pycache__/normalize.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/__pycache__/request.cpython-311.pyc b/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/__pycache__/request.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..50558933d83da9d8294c6f200e2e719be3717dcd Binary files /dev/null and b/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/__pycache__/request.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/__pycache__/tool_calls.cpython-311.pyc b/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/__pycache__/tool_calls.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f6fb3177b2a664886560f5c7d9cef6aa8e2aff32 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/__pycache__/tool_calls.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/__pycache__/validator.cpython-311.pyc b/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/__pycache__/validator.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..60587e8677e11f6ba085a8560d3a95073c24053f Binary files /dev/null and b/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/__pycache__/validator.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/messages.py b/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/messages.py new file mode 100644 index 0000000000000000000000000000000000000000..82d6f623c5d71f049768b11a6a6ae2515177ab21 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/messages.py @@ -0,0 +1,113 @@ +from enum import Enum +from typing import List, Literal, Optional, TypeVar, Union + +from pydantic import ConfigDict, Field +from typing_extensions import Annotated, TypeAlias + +from mistral_common.base import MistralBase +from mistral_common.multimodal import SerializableImage +from mistral_common.protocol.instruct.tool_calls import ToolCall + + +class ChunkTypes(str, Enum): + text = "text" + image = "image" + image_url = "image_url" + + +class BaseContentChunk(MistralBase): + type: Literal[ChunkTypes.text, ChunkTypes.image, ChunkTypes.image_url] + + +class ImageChunk(BaseContentChunk): + type: Literal[ChunkTypes.image] = ChunkTypes.image + image: SerializableImage + model_config = ConfigDict(arbitrary_types_allowed=True) + + +class ImageURL(MistralBase): + url: str + detail: Optional[str] = None + + +class ImageURLChunk(BaseContentChunk): + """ + {"type":"image_url","image_url":{"url":"data:image/png;base64,iVBORw0 + """ + + type: Literal[ChunkTypes.image_url] = ChunkTypes.image_url + image_url: Union[ImageURL, str] + + model_config = ConfigDict(arbitrary_types_allowed=True) + + def get_url(self) -> str: + if isinstance(self.image_url, ImageURL): + return self.image_url.url + return self.image_url + + +class TextChunk(BaseContentChunk): + type: Literal[ChunkTypes.text] = ChunkTypes.text + text: str + + +ContentChunk = Annotated[Union[TextChunk, ImageChunk, ImageURLChunk], Field(discriminator="type")] + + +class Roles(str, Enum): + system = "system" + user = "user" + assistant = "assistant" + tool = "tool" + + +class BaseMessage(MistralBase): + role: Literal[Roles.system, Roles.user, Roles.assistant, Roles.tool] + + +class UserMessage(BaseMessage): + role: Literal[Roles.user] = Roles.user + content: Union[str, List[ContentChunk]] + + +class SystemMessage(BaseMessage): + role: Literal[Roles.system] = Roles.system + content: Union[str, List[ContentChunk]] + + +class AssistantMessage(BaseMessage): + role: Literal[Roles.assistant] = Roles.assistant + content: Optional[str] = None + tool_calls: Optional[List[ToolCall]] = None + prefix: bool = False + + +class FinetuningAssistantMessage(AssistantMessage): + weight: Optional[float] = None + + +class ToolMessage(BaseMessage): + content: str + role: Literal[Roles.tool] = Roles.tool + tool_call_id: Optional[str] = None + + # Deprecated in V3 tokenization + name: Optional[str] = None + + +ChatMessage = Annotated[Union[SystemMessage, UserMessage, AssistantMessage, ToolMessage], Field(discriminator="role")] + +FinetuningMessage = Annotated[ + Union[SystemMessage, UserMessage, FinetuningAssistantMessage, ToolMessage], + Field(discriminator="role"), +] + +ChatMessageType = TypeVar("ChatMessageType", bound=ChatMessage) + +# Used for type hinting in generic classes where we might override the message types +UserMessageType = TypeVar("UserMessageType", bound=UserMessage) +AssistantMessageType = TypeVar("AssistantMessageType", bound=AssistantMessage) +ToolMessageType = TypeVar("ToolMessageType", bound=ToolMessage) +SystemMessageType = TypeVar("SystemMessageType", bound=SystemMessage) + +UATS: TypeAlias = Union[UserMessageType, AssistantMessageType, ToolMessageType, SystemMessageType] diff --git a/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/normalize.py b/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/normalize.py new file mode 100644 index 0000000000000000000000000000000000000000..2852f968e8c54dcb48f07a4a5a3ff020606d8ce0 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/normalize.py @@ -0,0 +1,265 @@ +import json +from typing import Generic, List, Optional, Sequence, Type, Union + +from mistral_common.protocol.instruct.messages import ( + UATS, + AssistantMessage, + AssistantMessageType, + ContentChunk, + FinetuningAssistantMessage, + Roles, + SystemMessage, + SystemMessageType, + TextChunk, + ToolMessage, + ToolMessageType, + UserMessage, + UserMessageType, +) +from mistral_common.protocol.instruct.request import ChatCompletionRequest +from mistral_common.protocol.instruct.tool_calls import FunctionCall, Tool, ToolCall +from mistral_common.tokens.instruct.request import InstructRequest +from mistral_common.tokens.tokenizers.base import InstructRequestType, TokenizerVersion + + +class InstructRequestNormalizer( + Generic[UserMessageType, AssistantMessageType, ToolMessageType, SystemMessageType, InstructRequestType] +): + """ + Takes a ChatCompletionRequest and normalizes it into an InstructRequest. + + The normalization process does several things such as: + - Aggregate consecutive messages of the same role + - Aggregate system prompts + - Normalize json content + - Normalize tool calls + """ + + system_prompt_in_begin: bool = False + + def __init__( + self, + user_message_class: Type[UserMessageType], + assistant_message_class: Type[AssistantMessageType], + tool_message_class: Type[ToolMessageType], + system_message_class: Type[SystemMessageType], + instruct_request_class: Type[InstructRequestType], + ): + self._user_message_class = user_message_class + self._assistant_message_class = assistant_message_class + self._tool_message_class = tool_message_class + self._instruct_request_class = instruct_request_class + # this is unused but makes creation nicer + self._system_message_class = system_message_class + + @staticmethod + def normalizer() -> "InstructRequestNormalizer": + return InstructRequestNormalizer( + UserMessage, + AssistantMessage, + ToolMessage, + SystemMessage, + InstructRequest[UATS, Tool], + ) + + def _normalize_json_content(self, content: Optional[str]) -> str: + if content is None or len(content) == 0: + return "{}" + + try: + parsed_json = json.loads(content) + normalized_content = json.dumps(parsed_json, ensure_ascii=False) + except json.JSONDecodeError: + normalized_content = content + return normalized_content + + def _aggregate_content_chunks(self, content: Union[str, List[TextChunk]], chunk_join_str: str = "\n\n") -> str: + if isinstance(content, list): + return chunk_join_str.join([chunk.text for chunk in content]) + else: + return content + + def _aggregate_system_prompts(self, request: ChatCompletionRequest[UATS]) -> Optional[str]: + system_prompt: List[str] = [] + + for message in request.messages: + if message.role == Roles.system and message.content: + system_prompt.append(self._aggregate_content_chunks(message.content)) + + return "\n\n".join(system_prompt) if len(system_prompt) else None + + def _aggregate_tool_messages(self, messages: List[UATS]) -> List[ToolMessageType]: + """ + We currently do not do any aggregation for tool messages, but we normalize the json content + """ + tool_messages: List[ToolMessageType] = [] + for message in messages: + assert isinstance(message, self._tool_message_class), "Expected tool message" + content = self._aggregate_content_chunks(message.content) + normalized_content = self._normalize_json_content(content) + tool_messages.append( + self._tool_message_class( + content=normalized_content, tool_call_id=message.tool_call_id, name=message.name + ) + ) + + return tool_messages + + def _normalize_tool_call(self, tool_call: ToolCall) -> ToolCall: + normalized_function_aruments = self._normalize_json_content(tool_call.function.arguments) + return ToolCall( + function=FunctionCall(name=tool_call.function.name, arguments=normalized_function_aruments), + id=tool_call.id, + ) + + def _aggregate_assistant_messages(self, messages: List[UATS]) -> AssistantMessageType: + aggregated_content: List[str] = [] + tool_calls: List[ToolCall] = [] + prefix: bool = False + weight: Optional[float] = None + for message in messages: + assert isinstance(message, self._assistant_message_class), "Expected assistant message" + if message.tool_calls is not None and len(message.tool_calls) > 0: + for tool_call in message.tool_calls: + normalized_tool_call = self._normalize_tool_call(tool_call) + tool_calls.append(normalized_tool_call) + elif message.content: + aggregated_content.append(self._aggregate_content_chunks(message.content)) + prefix |= message.prefix + if isinstance(message, FinetuningAssistantMessage): + # Only FinetuningAssistantMessage can be weighted + if weight is not None: + assert ( + weight == message.weight + ), "Expected weights of aggregated FinetuningAssistantMessage to be equal" + weight = message.weight + + aggregated_message = self._assistant_message_class( + content="\n\n".join(aggregated_content) if len(aggregated_content) else None, + tool_calls=tool_calls or None, + prefix=prefix, + ) + + if weight is not None and hasattr(aggregated_message, "weight"): + aggregated_message.weight = weight + return aggregated_message + + def _aggregate_user_messages(self, messages: List[UATS]) -> UserMessageType: + """ + Just coalesce neighboring blocks of text + """ + all_content: List[ContentChunk] = [] + text_chunks: List[str] = [] + for message in messages: + assert isinstance(message, self._user_message_class), f"Expected user message got {type(message)}" + if isinstance(message.content, str): + text_chunks.append(message.content) + else: # it's a List[ContentChunk] + for chunk in message.content: + if isinstance(chunk, TextChunk): + text_chunks.append(chunk.text) + else: + if text_chunks: + all_content.append(TextChunk(text="\n\n".join(text_chunks))) + text_chunks = [] + all_content.append(chunk) + + text_content = "\n\n".join(text_chunks) if text_chunks else "" + + if not all_content: + # if no ContentChunk was passed, we return content as a str + return self._user_message_class(content=text_content) + + if text_content: + # else we return a List of content chunks + all_content.append(TextChunk(text=text_content)) + + return self._user_message_class(content=all_content) + + def _aggregate_role(self, messages: List[UATS], role: Optional[Roles]) -> Sequence[UATS]: + if role == Roles.tool: + return self._aggregate_tool_messages(messages) + elif role == Roles.assistant: + return [self._aggregate_assistant_messages(messages)] + elif role == Roles.user: + return [self._aggregate_user_messages(messages)] + else: # System messages are ignored + return [] + + def _aggregate_messages(self, request: ChatCompletionRequest[UATS]) -> List[UATS]: + aggregated_messages: List[UATS] = [] + messages_to_aggregate: List[UATS] = [] + current_role: Optional[Roles] = None + current_weight: Optional[float] = None + + # Collect consecutive lists of messages with the same role and weight + for message in request.messages: + new_weight = getattr(message, "weight", None) + if current_role != message.role or (new_weight != current_weight): + aggregated_messages.extend(self._aggregate_role(messages_to_aggregate, current_role)) + messages_to_aggregate.clear() + current_weight = new_weight + current_role = message.role + messages_to_aggregate.append(message) + + # Add the last set of messages + aggregated_messages.extend(self._aggregate_role(messages_to_aggregate, current_role)) + + # If the first message is not a user message, or we didnt aggregate + # anything (all system messages) for example, add an empty user message + if len(aggregated_messages) == 0 or ( + not self.system_prompt_in_begin and aggregated_messages[0].role != Roles.user + ): + aggregated_messages.insert(0, self._user_message_class(content="")) + + return aggregated_messages + + def from_chat_completion_request(self, request: ChatCompletionRequest[UATS]) -> InstructRequestType: + system_prompt = self._aggregate_system_prompts(request) + messages = self._aggregate_messages(request) + + return self._instruct_request_class( + messages=messages, system_prompt=system_prompt, available_tools=request.tools + ) + + +class InstructRequestNormalizerV7(InstructRequestNormalizer): + system_prompt_in_begin: bool = True + + @staticmethod + def normalizer() -> "InstructRequestNormalizerV7": + return InstructRequestNormalizerV7( + UserMessage, + AssistantMessage, + ToolMessage, + SystemMessage, + InstructRequest[UATS, Tool], + ) + + def _aggregate_role(self, messages: List[UATS], role: Optional[Roles]) -> Sequence[UATS]: + if role == Roles.tool: + return self._aggregate_tool_messages(messages) + elif role == Roles.assistant: + return [self._aggregate_assistant_messages(messages)] + elif role == Roles.user: + return [self._aggregate_user_messages(messages)] + elif role == Roles.system: + return messages + else: + assert role is None and len(messages) == 0 + return [] + + def _aggregate_system_prompts(self, request: ChatCompletionRequest[UATS]) -> Optional[str]: + raise NotImplementedError("We should not aggregate system prompts") + + def from_chat_completion_request(self, request: ChatCompletionRequest[UATS]) -> InstructRequestType: # type: ignore[type-var] + messages = self._aggregate_messages(request) + return self._instruct_request_class(messages=messages, system_prompt=None, available_tools=request.tools) # type: ignore[no-any-return] + + +def normalizer_for_tokenizer_version(version: TokenizerVersion) -> InstructRequestNormalizer: + if version in {TokenizerVersion.v1, TokenizerVersion.v2, TokenizerVersion.v3}: + return InstructRequestNormalizer.normalizer() + elif version == TokenizerVersion.v7: + return InstructRequestNormalizerV7.normalizer() + raise ValueError(f"Unknown tokenizer version {version}") diff --git a/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/request.py b/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/request.py new file mode 100644 index 0000000000000000000000000000000000000000..4c4ee9040a036dfe00bc8faa0581c81473d63a43 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/request.py @@ -0,0 +1,27 @@ +from enum import Enum +from typing import Generic, List, Optional + +from pydantic import Field + +from mistral_common.base import MistralBase +from mistral_common.protocol.base import BaseCompletionRequest +from mistral_common.protocol.instruct.messages import ChatMessageType +from mistral_common.protocol.instruct.tool_calls import Tool, ToolChoice + + +class ResponseFormats(str, Enum): + text: str = "text" + json: str = "json_object" + + +class ResponseFormat(MistralBase): + type: ResponseFormats = ResponseFormats.text + + +class ChatCompletionRequest(BaseCompletionRequest, Generic[ChatMessageType]): + model: Optional[str] = None + messages: List[ChatMessageType] + response_format: ResponseFormat = Field(default_factory=ResponseFormat) + tools: Optional[List[Tool]] = None + tool_choice: ToolChoice = ToolChoice.auto + truncate_for_context_length: bool = False diff --git a/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/response.py b/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/response.py new file mode 100644 index 0000000000000000000000000000000000000000..f79aa320665178f78248aaf1c0e3f68599acbb24 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/response.py @@ -0,0 +1,66 @@ +import time +from enum import Enum +from typing import List, Optional + +from pydantic import Field + +from mistral_common.base import MistralBase +from mistral_common.protocol.base import UsageInfo +from mistral_common.protocol.instruct.tool_calls import ToolCall +from mistral_common.protocol.utils import random_uuid + + +class FinishReason(str, Enum): + stop: str = "stop" + length: str = "length" + model_length: str = "model_length" + error: str = "error" + tool_call: str = "tool_calls" + + +class ChatCompletionTokenLogprobs(MistralBase): + token: str + logprob: float + bytes: List[int] + + +class ChatCompletionResponseChoiceLogprobs(MistralBase): + content: List[ChatCompletionTokenLogprobs] + + +class DeltaMessage(MistralBase): + role: Optional[str] = None + content: Optional[str] = None + tool_calls: Optional[List[ToolCall]] = None + + +class ChatCompletionResponseChoice(MistralBase): + index: int + message: DeltaMessage + finish_reason: Optional[FinishReason] = None + logprobs: Optional[ChatCompletionResponseChoiceLogprobs] = None + + +class ChatCompletionResponse(MistralBase): + id: str = Field(default_factory=lambda: f"chatcmpl-{random_uuid()}") + object: str = "chat.completion" + created: int = Field(default_factory=lambda: int(time.time())) + model: str + choices: List[ChatCompletionResponseChoice] + usage: UsageInfo + + +class ChatCompletionResponseStreamChoice(MistralBase): + index: int + delta: DeltaMessage + finish_reason: Optional[FinishReason] = None + logprobs: Optional[ChatCompletionResponseChoiceLogprobs] = None + + +class ChatCompletionStreamResponse(MistralBase): + id: str = Field(default_factory=lambda: f"chatcmpl-{random_uuid()}") + object: str = "chat.completion.chunk" + created: int = Field(default_factory=lambda: int(time.time())) + model: str + choices: List[ChatCompletionResponseStreamChoice] + usage: Optional[UsageInfo] = None diff --git a/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/tool_calls.py b/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/tool_calls.py new file mode 100644 index 0000000000000000000000000000000000000000..764ba4cb45bb8ee958730b1dbd69a7bc7a51a8d9 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/tool_calls.py @@ -0,0 +1,51 @@ +import json +from enum import Enum +from typing import Any, Dict, TypeVar, Union + +from pydantic import field_validator + +from mistral_common.base import MistralBase + + +class Function(MistralBase): + name: str + description: str = "" + parameters: Dict[str, Any] + + +class ToolTypes(str, Enum): + function = "function" + + +class ToolChoice(str, Enum): + auto: str = "auto" + none: str = "none" + any: str = "any" + + +class Tool(MistralBase): + type: ToolTypes = ToolTypes.function + function: Function + + +class FunctionCall(MistralBase): + name: str + arguments: str + + @field_validator("arguments", mode="before") + def validate_arguments(cls, v: Union[str, Dict[str, Any]]) -> str: + """ + This is for backward compatibility + """ + if isinstance(v, dict): + return json.dumps(v) + return v + + +class ToolCall(MistralBase): + id: str = "null" # required for V3 tokenization + type: ToolTypes = ToolTypes.function + function: FunctionCall + + +ToolType = TypeVar("ToolType", bound=Tool) diff --git a/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/validator.py b/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/validator.py new file mode 100644 index 0000000000000000000000000000000000000000..202d36353bc91b5cf51c636450c79132b07ab18f --- /dev/null +++ b/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/validator.py @@ -0,0 +1,328 @@ +import re +from enum import Enum +from typing import Generic, List + +from jsonschema import Draft7Validator, SchemaError + +from mistral_common.exceptions import ( + InvalidAssistantMessageException, + InvalidFunctionCallException, + InvalidMessageStructureException, + InvalidRequestException, + InvalidSystemPromptException, + InvalidToolException, + InvalidToolMessageException, + InvalidToolSchemaException, +) +from mistral_common.protocol.instruct.messages import ( + UATS, + AssistantMessage, + AssistantMessageType, + FinetuningAssistantMessage, + Roles, + SystemMessageType, + ToolMessageType, + UserMessageType, +) +from mistral_common.protocol.instruct.request import ChatCompletionRequest +from mistral_common.protocol.instruct.tool_calls import ( + Function, + FunctionCall, + Tool, + ToolCall, +) + + +class ValidationMode(Enum): + serving = "serving" + finetuning = "finetuning" + test = "test" + + +class MistralRequestValidator(Generic[UserMessageType, AssistantMessageType, ToolMessageType, SystemMessageType]): + def __init__(self, mode: ValidationMode = ValidationMode.test): + self._mode = mode + + def validate_messages(self, messages: List[UATS]) -> None: + """ + Validates the list of messages + """ + self._validate_message_list_structure(messages) + self._validate_message_list_content(messages) + + def validate_request(self, request: ChatCompletionRequest) -> ChatCompletionRequest[UATS]: + """ + Validates the request + """ + + if self._mode == ValidationMode.serving: + if request.model is None: + raise InvalidRequestException("Model name parameter is required for serving mode") + + # Validate the messages + self.validate_messages(request.messages) + + # Validate the tools + self._validate_tools(request.tools or []) + + return request + + def _validate_function(self, function: Function) -> None: + """ + Checks: + - That the function schema is valid + """ + try: + Draft7Validator.check_schema(function.parameters) + except SchemaError as e: + raise InvalidToolSchemaException(f"Invalid tool schema: {e.message}") + + if not re.match(r"^[a-zA-Z0-9_-]{1,64}$", function.name): + raise InvalidToolException( + f"Function name was {function.name} but must be a-z, A-Z, 0-9, " + "or contain underscores and dashes, with a maximum length of 64." + ) + + def _validate_tools(self, tools: List[Tool]) -> None: + """ + Checks: + - That the tool schemas are valid + """ + + for tool in tools: + self._validate_function(tool.function) + + def _validate_user_message(self, message: UserMessageType) -> None: + pass + + def _validate_tool_message(self, message: ToolMessageType) -> None: + """ + Checks: + - The tool name is valid + """ + if message.name is not None: + if not re.match(r"^[a-zA-Z0-9_-]{1,64}$", message.name): + raise InvalidToolMessageException( + f"Function name was {message.name} but must be a-z, A-Z, 0-9, " + "or contain underscores and dashes, with a maximum length of 64." + ) + + def _validate_system_message(self, message: SystemMessageType) -> None: + """ + Checks: + - That the system prompt has content + """ + if message.content is None: + raise InvalidSystemPromptException("System prompt must have content") + + def _validate_function_call(self, function_call: FunctionCall) -> None: + """ + Checks: + - That the function call has a valid name + """ + if not re.match(r"^[a-zA-Z0-9_-]{1,64}$", function_call.name): + raise InvalidFunctionCallException( + f"Function name was {function_call.name} but must be a-z, A-Z, 0-9, " + "or contain underscores and dashes, with a maximum length of 64." + ) + + def _validate_tool_call(self, tool_call: ToolCall, is_last_message: bool) -> None: + """ + Checks: + - That the tool call has a valid function + """ + + self._validate_function_call(tool_call.function) + + def _validate_assistant_message(self, message: AssistantMessageType, is_last_message: bool = False) -> None: + """ + Checks: + - That the assistant message has either text or tool_calls, but not both + - That the tool calls are valid + """ + + # Validate that the message has either text or tool_calls + # but not both and not neither. + if bool(message.content) == bool(message.tool_calls): + raise InvalidAssistantMessageException( + "Assistant message must have either content or tool_calls, but not both." + ) + + # If we have tool calls, validate them + if message.tool_calls is not None: + # Validate that the tool calls are valid + for tool_call in message.tool_calls: + self._validate_tool_call(tool_call, is_last_message=is_last_message) + + if self._mode == ValidationMode.finetuning and isinstance(message, FinetuningAssistantMessage): + if message.weight is not None and message.weight not in [0, 1]: + raise InvalidAssistantMessageException("Assistant message weight must be either 0 or 1") + + if message.prefix: + if not is_last_message: + raise InvalidAssistantMessageException("Assistant message with prefix True must be last message") + # note : we already validate that assistant messsage has content 3 lines up. + + def _validate_tool_calls_followed_by_tool_messages(self, messages: List[UATS]) -> None: + """ + Checks: + - That the number of tool calls and tool messages are the same + - That the tool calls are followed by tool messages + """ + prev_role = None + expected_tool_messages = 0 + for message in messages: + if prev_role is None: + prev_role = message.role + continue + + if message.role == Roles.tool: + expected_tool_messages -= 1 + elif message.role == Roles.assistant: + # if we have an assistant message and we have not received all the function calls + # we need to raise an exception + if expected_tool_messages != 0: + raise InvalidMessageStructureException("Not the same number of function calls and responses") + + if message.tool_calls is not None: + # Validate that the number of function calls and responses are the same + expected_tool_messages = len(message.tool_calls) + + prev_role = message.role + + if expected_tool_messages != 0 and self._mode == ValidationMode.serving: + raise InvalidMessageStructureException("Not the same number of function calls and responses") + elif expected_tool_messages < 0 and self._mode == ValidationMode.finetuning: + raise InvalidMessageStructureException("More tool responses than tool calls") + + def _validate_message_order(self, messages: List[UATS]) -> None: + """ + Validates the order of the messages, for example user -> assistant -> user -> assistant -> ... + """ + previous_role = None + for message in messages: + current_role = message.role + + if previous_role is not None: + if previous_role == Roles.system: + expected_roles = {Roles.user, Roles.assistant, Roles.system} + elif previous_role == Roles.user: + expected_roles = {Roles.assistant, Roles.system, Roles.user} + elif previous_role == Roles.assistant: + expected_roles = {Roles.assistant, Roles.user, Roles.tool} + elif previous_role == Roles.tool: + expected_roles = {Roles.assistant, Roles.tool} + + if current_role not in expected_roles: + raise InvalidMessageStructureException( + f"Unexpected role '{current_role}' after role '{previous_role}'" + ) + + previous_role = current_role + + def _validate_last_message(self, message: UATS) -> None: + # The last message must be a user or tool message in serving mode or an assistant message in finetuning mode + last_message_role = message.role + if self._mode == ValidationMode.finetuning: + if last_message_role != Roles.assistant: + raise InvalidMessageStructureException( + f"Expected last role Assistant for finetuning but got {last_message_role}" + ) + else: + bad_assistant = isinstance(message, AssistantMessage) and not message.prefix + bad_role = message.role not in {Roles.user, Roles.tool} + if bad_assistant and bad_role: + raise InvalidMessageStructureException( + f"Expected last role User or Tool (or Assistant with prefix True) for serving" + f" but got {last_message_role}" + ) + + def _validate_message_list_structure(self, messages: List[UATS]) -> None: + """ + Validates the structure of the list of messages + + For example the messages must be in the correct order of user/assistant/tool + """ + + if len(messages) == 0: + raise InvalidMessageStructureException("Conversation must have at least one message") + + # If we have one message it must be a user or a system message + if len(messages) == 1: + if messages[0].role not in {Roles.user, Roles.system}: + raise InvalidMessageStructureException("Conversation must start with a user message or system message") + + # Always check the last message if in fine-tuning mode + if self._mode == ValidationMode.finetuning or len(messages) > 1: + self._validate_last_message(messages[-1]) + + self._validate_message_order(messages) + self._validate_tool_calls_followed_by_tool_messages(messages) + + def _validate_message_list_content(self, messages: List[UATS]) -> None: + """ + Validates the content of the messages + """ + + for idx, message in enumerate(messages): + if message.role == Roles.user: + self._validate_user_message(message) + elif message.role == Roles.assistant: + self._validate_assistant_message(message, is_last_message=idx == len(messages) - 1) + elif message.role == Roles.tool: + self._validate_tool_message(message) + elif message.role == Roles.system: + self._validate_system_message(message) + else: + raise InvalidRequestException(f"Unsupported message type {type(message)}") + + +class MistralRequestValidatorV3(MistralRequestValidator): + def _validate_tool_message(self, message: ToolMessageType) -> None: + """ + Checks: + - The tool name is valid + - Tool call id is valid + """ + if message.name is not None: + if not re.match(r"^[a-zA-Z0-9_-]{1,64}$", message.name): + raise InvalidToolMessageException( + f"Function name was {message.name} but must be a-z, A-Z, 0-9, " + "or contain underscores and dashes, with a maximum length of 64." + ) + + if message.tool_call_id is None: + raise InvalidRequestException("Tool call id has to be defined.") + + if not re.match(r"^[a-zA-Z0-9]{9}$", message.tool_call_id): + raise InvalidToolMessageException( + f"Tool call id was {message.tool_call_id} but must be a-z, A-Z, 0-9, with a length of 9." + ) + + def _validate_tool_call(self, tool_call: ToolCall, is_last_message: bool) -> None: + """ + Validate that the tool call has a valid ID + """ + if tool_call.id != "null": + if not re.match(r"^[a-zA-Z0-9]{9}$", tool_call.id): + raise InvalidFunctionCallException( + f"Tool call id was {tool_call.id} but must be a-z, A-Z, 0-9, with a length of 9." + ) + if self._mode == ValidationMode.finetuning and not is_last_message and tool_call.id == "null": + err_message = "Tool call id of assistant message that is not last has to be defined in finetuning mode." + raise InvalidFunctionCallException(err_message) + + if self._mode == ValidationMode.serving and tool_call.id == "null": + raise InvalidFunctionCallException("Tool call id has to be defined in serving mode.") + + self._validate_function_call(tool_call.function) + + def _validate_last_message(self, message: UATS) -> None: + super()._validate_last_message(message) + + if self._mode == ValidationMode.finetuning: + # in finetuning mode it has to be an assistant message + # as checked by parent `_validate_last_message` + if message.tool_calls is not None: + for tool_call in message.tool_calls: + self._validate_tool_call(tool_call, is_last_message=True) diff --git a/.venv/lib/python3.11/site-packages/mistral_common/protocol/utils.py b/.venv/lib/python3.11/site-packages/mistral_common/protocol/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..09bfa3dd7c909442ab895faa3b5b7718e32279f5 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/mistral_common/protocol/utils.py @@ -0,0 +1,5 @@ +import uuid + + +def random_uuid() -> str: + return str(uuid.uuid4().hex) diff --git a/.venv/lib/python3.11/site-packages/mistral_common/py.typed b/.venv/lib/python3.11/site-packages/mistral_common/py.typed new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.venv/lib/python3.11/site-packages/numpy/polynomial/tests/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/numpy/polynomial/tests/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..611c85ac99d107af2096d9b84c24cf0c07286c49 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/numpy/polynomial/tests/__pycache__/__init__.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/numpy/polynomial/tests/__pycache__/test_hermite_e.cpython-311.pyc b/.venv/lib/python3.11/site-packages/numpy/polynomial/tests/__pycache__/test_hermite_e.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..836cf6f35cc7ab4e58a13ab6e3072ed34bc2984c Binary files /dev/null and b/.venv/lib/python3.11/site-packages/numpy/polynomial/tests/__pycache__/test_hermite_e.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/INSTALLER b/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/INSTALLER new file mode 100644 index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/LICENSE b/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/METADATA b/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/METADATA new file mode 100644 index 0000000000000000000000000000000000000000..c80f9dcf24dd15d0a895731b81af0f3cc47873fe --- /dev/null +++ b/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/METADATA @@ -0,0 +1,238 @@ +Metadata-Version: 2.2 +Name: vllm +Version: 0.7.2 +Summary: A high-throughput and memory-efficient inference and serving engine for LLMs +Home-page: https://github.com/vllm-project/vllm +Author: vLLM Team +License: Apache 2.0 +Project-URL: Homepage, https://github.com/vllm-project/vllm +Project-URL: Documentation, https://vllm.readthedocs.io/en/latest/ +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 +Classifier: License :: OSI Approved :: Apache Software License +Classifier: Intended Audience :: Developers +Classifier: Intended Audience :: Information Technology +Classifier: Intended Audience :: Science/Research +Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence +Classifier: Topic :: Scientific/Engineering :: Information Analysis +Requires-Python: >=3.9 +Description-Content-Type: text/markdown +License-File: LICENSE +Requires-Dist: psutil +Requires-Dist: sentencepiece +Requires-Dist: numpy<2.0.0 +Requires-Dist: requests>=2.26.0 +Requires-Dist: tqdm +Requires-Dist: blake3 +Requires-Dist: py-cpuinfo +Requires-Dist: transformers>=4.48.2 +Requires-Dist: tokenizers>=0.19.1 +Requires-Dist: protobuf +Requires-Dist: fastapi<0.113.0,>=0.107.0; python_version < "3.9" +Requires-Dist: fastapi!=0.113.*,!=0.114.0,>=0.107.0; python_version >= "3.9" +Requires-Dist: aiohttp +Requires-Dist: openai>=1.52.0 +Requires-Dist: uvicorn[standard] +Requires-Dist: pydantic>=2.9 +Requires-Dist: prometheus_client>=0.18.0 +Requires-Dist: pillow +Requires-Dist: prometheus-fastapi-instrumentator>=7.0.0 +Requires-Dist: tiktoken>=0.6.0 +Requires-Dist: lm-format-enforcer<0.11,>=0.10.9 +Requires-Dist: outlines==0.1.11 +Requires-Dist: lark==1.2.2 +Requires-Dist: xgrammar>=0.1.6; platform_machine == "x86_64" +Requires-Dist: typing_extensions>=4.10 +Requires-Dist: filelock>=3.16.1 +Requires-Dist: partial-json-parser +Requires-Dist: pyzmq +Requires-Dist: msgspec +Requires-Dist: gguf==0.10.0 +Requires-Dist: importlib_metadata +Requires-Dist: mistral_common[opencv]>=1.5.0 +Requires-Dist: pyyaml +Requires-Dist: six>=1.16.0; python_version > "3.11" +Requires-Dist: setuptools>=74.1.1; python_version > "3.11" +Requires-Dist: einops +Requires-Dist: compressed-tensors==0.9.1 +Requires-Dist: depyf==0.18.0 +Requires-Dist: cloudpickle +Requires-Dist: ray[default]>=2.9 +Requires-Dist: nvidia-ml-py>=12.560.30 +Requires-Dist: torch==2.5.1 +Requires-Dist: torchaudio==2.5.1 +Requires-Dist: torchvision==0.20.1 +Requires-Dist: xformers==0.0.28.post3; platform_system == "Linux" and platform_machine == "x86_64" +Provides-Extra: tensorizer +Requires-Dist: tensorizer>=2.9.0; extra == "tensorizer" +Provides-Extra: runai +Requires-Dist: runai-model-streamer; extra == "runai" +Requires-Dist: runai-model-streamer-s3; extra == "runai" +Requires-Dist: boto3; extra == "runai" +Provides-Extra: audio +Requires-Dist: librosa; extra == "audio" +Requires-Dist: soundfile; extra == "audio" +Provides-Extra: video +Requires-Dist: decord; extra == "video" +Dynamic: author +Dynamic: classifier +Dynamic: description +Dynamic: description-content-type +Dynamic: home-page +Dynamic: license +Dynamic: project-url +Dynamic: provides-extra +Dynamic: requires-dist +Dynamic: requires-python +Dynamic: summary + +

+ + + vLLM + +

+ +

+Easy, fast, and cheap LLM serving for everyone +

+ +

+| Documentation | Blog | Paper | Twitter/X | Developer Slack | +

+ +--- + +*Latest News* 🔥 +- [2025/01] We are excited to announce the alpha release of vLLM V1: A major architectural upgrade with 1.7x speedup! Clean code, optimized execution loop, zero-overhead prefix caching, enhanced multimodal support, and more. Please check out our blog post [here](https://blog.vllm.ai/2025/01/27/v1-alpha-release.html). +- [2025/01] We hosted [the eighth vLLM meetup](https://lu.ma/zep56hui) with Google Cloud! Please find the meetup slides from vLLM team [here](https://docs.google.com/presentation/d/1epVkt4Zu8Jz_S5OhEHPc798emsYh2BwYfRuDDVEF7u4/edit?usp=sharing). +- [2024/12] vLLM joins [pytorch ecosystem](https://pytorch.org/blog/vllm-joins-pytorch)! Easy, Fast, and Cheap LLM Serving for Everyone! +- [2024/11] We hosted [the seventh vLLM meetup](https://lu.ma/h0qvrajz) with Snowflake! Please find the meetup slides from vLLM team [here](https://docs.google.com/presentation/d/1e3CxQBV3JsfGp30SwyvS3eM_tW-ghOhJ9PAJGK6KR54/edit?usp=sharing), and Snowflake team [here](https://docs.google.com/presentation/d/1qF3RkDAbOULwz9WK5TOltt2fE9t6uIc_hVNLFAaQX6A/edit?usp=sharing). +- [2024/10] We have just created a developer slack ([slack.vllm.ai](https://slack.vllm.ai)) focusing on coordinating contributions and discussing features. Please feel free to join us there! +- [2024/10] Ray Summit 2024 held a special track for vLLM! Please find the opening talk slides from the vLLM team [here](https://docs.google.com/presentation/d/1B_KQxpHBTRa_mDF-tR6i8rWdOU5QoTZNcEg2MKZxEHM/edit?usp=sharing). Learn more from the [talks](https://www.youtube.com/playlist?list=PLzTswPQNepXl6AQwifuwUImLPFRVpksjR) from other vLLM contributors and users! +- [2024/09] We hosted [the sixth vLLM meetup](https://lu.ma/87q3nvnh) with NVIDIA! Please find the meetup slides [here](https://docs.google.com/presentation/d/1wrLGwytQfaOTd5wCGSPNhoaW3nq0E-9wqyP7ny93xRs/edit?usp=sharing). +- [2024/07] We hosted [the fifth vLLM meetup](https://lu.ma/lp0gyjqr) with AWS! Please find the meetup slides [here](https://docs.google.com/presentation/d/1RgUD8aCfcHocghoP3zmXzck9vX3RCI9yfUAB2Bbcl4Y/edit?usp=sharing). +- [2024/07] In partnership with Meta, vLLM officially supports Llama 3.1 with FP8 quantization and pipeline parallelism! Please check out our blog post [here](https://blog.vllm.ai/2024/07/23/llama31.html). +- [2024/06] We hosted [the fourth vLLM meetup](https://lu.ma/agivllm) with Cloudflare and BentoML! Please find the meetup slides [here](https://docs.google.com/presentation/d/1iJ8o7V2bQEi0BFEljLTwc5G1S10_Rhv3beed5oB0NJ4/edit?usp=sharing). +- [2024/04] We hosted [the third vLLM meetup](https://robloxandvllmmeetup2024.splashthat.com/) with Roblox! Please find the meetup slides [here](https://docs.google.com/presentation/d/1A--47JAK4BJ39t954HyTkvtfwn0fkqtsL8NGFuslReM/edit?usp=sharing). +- [2024/01] We hosted [the second vLLM meetup](https://lu.ma/ygxbpzhl) with IBM! Please find the meetup slides [here](https://docs.google.com/presentation/d/12mI2sKABnUw5RBWXDYY-HtHth4iMSNcEoQ10jDQbxgA/edit?usp=sharing). +- [2023/10] We hosted [the first vLLM meetup](https://lu.ma/first-vllm-meetup) with a16z! Please find the meetup slides [here](https://docs.google.com/presentation/d/1QL-XPFXiFpDBh86DbEegFXBXFXjix4v032GhShbKf3s/edit?usp=sharing). +- [2023/08] We would like to express our sincere gratitude to [Andreessen Horowitz](https://a16z.com/2023/08/30/supporting-the-open-source-ai-community/) (a16z) for providing a generous grant to support the open-source development and research of vLLM. +- [2023/06] We officially released vLLM! FastChat-vLLM integration has powered [LMSYS Vicuna and Chatbot Arena](https://chat.lmsys.org) since mid-April. Check out our [blog post](https://vllm.ai). + +--- +## About +vLLM is a fast and easy-to-use library for LLM inference and serving. + +Originally developed in the [Sky Computing Lab](https://sky.cs.berkeley.edu) at UC Berkeley, vLLM has evolved into a community-driven project with contributions from both academia and industry. + +vLLM is fast with: + +- State-of-the-art serving throughput +- Efficient management of attention key and value memory with [**PagedAttention**](https://blog.vllm.ai/2023/06/20/vllm.html) +- Continuous batching of incoming requests +- Fast model execution with CUDA/HIP graph +- Quantizations: [GPTQ](https://arxiv.org/abs/2210.17323), [AWQ](https://arxiv.org/abs/2306.00978), INT4, INT8, and FP8. +- Optimized CUDA kernels, including integration with FlashAttention and FlashInfer. +- Speculative decoding +- Chunked prefill + +**Performance benchmark**: We include a performance benchmark at the end of [our blog post](https://blog.vllm.ai/2024/09/05/perf-update.html). It compares the performance of vLLM against other LLM serving engines ([TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM), [SGLang](https://github.com/sgl-project/sglang) and [LMDeploy](https://github.com/InternLM/lmdeploy)). The implementation is under [nightly-benchmarks folder](.buildkite/nightly-benchmarks/) and you can [reproduce](https://github.com/vllm-project/vllm/issues/8176) this benchmark using our one-click runnable script. + +vLLM is flexible and easy to use with: + +- Seamless integration with popular Hugging Face models +- High-throughput serving with various decoding algorithms, including *parallel sampling*, *beam search*, and more +- Tensor parallelism and pipeline parallelism support for distributed inference +- Streaming outputs +- OpenAI-compatible API server +- Support NVIDIA GPUs, AMD CPUs and GPUs, Intel CPUs and GPUs, PowerPC CPUs, TPU, and AWS Neuron. +- Prefix caching support +- Multi-lora support + +vLLM seamlessly supports most popular open-source models on HuggingFace, including: +- Transformer-like LLMs (e.g., Llama) +- Mixture-of-Expert LLMs (e.g., Mixtral, Deepseek-V2 and V3) +- Embedding Models (e.g. E5-Mistral) +- Multi-modal LLMs (e.g., LLaVA) + +Find the full list of supported models [here](https://docs.vllm.ai/en/latest/models/supported_models.html). + +## Getting Started + +Install vLLM with `pip` or [from source](https://docs.vllm.ai/en/latest/getting_started/installation/gpu/index.html#build-wheel-from-source): + +```bash +pip install vllm +``` + +Visit our [documentation](https://docs.vllm.ai/en/latest/) to learn more. +- [Installation](https://docs.vllm.ai/en/latest/getting_started/installation/index.html) +- [Quickstart](https://docs.vllm.ai/en/latest/getting_started/quickstart.html) +- [List of Supported Models](https://docs.vllm.ai/en/latest/models/supported_models.html) + +## Contributing + +We welcome and value any contributions and collaborations. +Please check out [CONTRIBUTING.md](./CONTRIBUTING.md) for how to get involved. + +## Sponsors + +vLLM is a community project. Our compute resources for development and testing are supported by the following organizations. Thank you for your support! + + + +Cash Donations: +- a16z +- Dropbox +- Sequoia Capital +- Skywork AI +- ZhenFund + +Compute Resources: +- AMD +- Anyscale +- AWS +- Crusoe Cloud +- Databricks +- DeepInfra +- Google Cloud +- Lambda Lab +- Nebius +- Novita AI +- NVIDIA +- Replicate +- Roblox +- RunPod +- Trainy +- UC Berkeley +- UC San Diego + +Slack Sponsor: Anyscale + +We also have an official fundraising venue through [OpenCollective](https://opencollective.com/vllm). We plan to use the fund to support the development, maintenance, and adoption of vLLM. + +## Citation + +If you use vLLM for your research, please cite our [paper](https://arxiv.org/abs/2309.06180): +```bibtex +@inproceedings{kwon2023efficient, + title={Efficient Memory Management for Large Language Model Serving with PagedAttention}, + author={Woosuk Kwon and Zhuohan Li and Siyuan Zhuang and Ying Sheng and Lianmin Zheng and Cody Hao Yu and Joseph E. Gonzalez and Hao Zhang and Ion Stoica}, + booktitle={Proceedings of the ACM SIGOPS 29th Symposium on Operating Systems Principles}, + year={2023} +} +``` + +## Contact Us + +* For technical questions and feature requests, please use Github issues or discussions. +* For discussing with fellow users and coordinating contributions and development, please use Slack. +* For security disclosures, please use Github's security advisory feature. +* For collaborations and partnerships, please contact us at vllm-questions AT lists.berkeley.edu. + +## Media Kit + +* If you wish to use vLLM's logo, please refer to [our media kit repo](https://github.com/vllm-project/media-kit). diff --git a/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/RECORD b/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/RECORD new file mode 100644 index 0000000000000000000000000000000000000000..a264252087aef620fe5aff3fa58965e4dba4e099 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/RECORD @@ -0,0 +1,1313 @@ +../../../bin/vllm,sha256=8rKxioauw2uk5PWmY3kz-AQil42vTcT8MrUUp53pscQ,229 +vllm-0.7.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +vllm-0.7.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357 +vllm-0.7.2.dist-info/METADATA,sha256=og0UL1frXzLiqZP4Y32h6Z7T8qkyrx0tlhTYCemzSa8,12451 +vllm-0.7.2.dist-info/RECORD,, +vllm-0.7.2.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +vllm-0.7.2.dist-info/WHEEL,sha256=DNGSEk5U4ugAGvX0_BXBTCo3Pq7hN4686HcjBg8CfTM,102 +vllm-0.7.2.dist-info/entry_points.txt,sha256=3gs2VG8nr7QHZXKTqjfRIabj07IjR9Gj5xm764igbjE,43 +vllm-0.7.2.dist-info/top_level.txt,sha256=fAgb8Pt4zQoKTUA3ZnKEIgcjh0L97_dwEjYDTL5MEEo,5 +vllm/_C.abi3.so,sha256=kUXtJrlp06iQehb0BZYr-c7rfoFuUWTI0TtGw0iQsg4,217472040 +vllm/__init__.py,sha256=4znJi5rYl--TIYaS7ITK9jm-_ctVwgR1VDPKwjWsmkY,2052 +vllm/__pycache__/__init__.cpython-311.pyc,, +vllm/__pycache__/_custom_ops.cpython-311.pyc,, +vllm/__pycache__/_ipex_ops.cpython-311.pyc,, +vllm/__pycache__/_version.cpython-311.pyc,, +vllm/__pycache__/beam_search.cpython-311.pyc,, +vllm/__pycache__/config.cpython-311.pyc,, +vllm/__pycache__/connections.cpython-311.pyc,, +vllm/__pycache__/envs.cpython-311.pyc,, +vllm/__pycache__/forward_context.cpython-311.pyc,, +vllm/__pycache__/logger.cpython-311.pyc,, +vllm/__pycache__/logits_process.cpython-311.pyc,, +vllm/__pycache__/outputs.cpython-311.pyc,, +vllm/__pycache__/pooling_params.cpython-311.pyc,, +vllm/__pycache__/sampling_params.cpython-311.pyc,, +vllm/__pycache__/scalar_type.cpython-311.pyc,, +vllm/__pycache__/scripts.cpython-311.pyc,, +vllm/__pycache__/sequence.cpython-311.pyc,, +vllm/__pycache__/tracing.cpython-311.pyc,, +vllm/__pycache__/utils.cpython-311.pyc,, +vllm/__pycache__/version.cpython-311.pyc,, +vllm/_custom_ops.py,sha256=idZui8sfplykQofJkDgxqJkym-vxfba763xNcmufHUU,44100 +vllm/_ipex_ops.py,sha256=fAp8PqSkAMWjm82Lhaohv63S1LYAUJwk0GXJl2PZl50,7839 +vllm/_moe_C.abi3.so,sha256=PPqjsHbDM1FhWuIt5g1lx5Nfo_2AX7RzoxsEWkbzd1k,83764856 +vllm/_version.py,sha256=SwvkPl7GLhGxWFK-GDetxs6qkq8TTqCfnxJUNJqmWtU,411 +vllm/adapter_commons/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +vllm/adapter_commons/__pycache__/__init__.cpython-311.pyc,, +vllm/adapter_commons/__pycache__/layers.cpython-311.pyc,, +vllm/adapter_commons/__pycache__/models.cpython-311.pyc,, +vllm/adapter_commons/__pycache__/request.cpython-311.pyc,, +vllm/adapter_commons/__pycache__/utils.cpython-311.pyc,, +vllm/adapter_commons/__pycache__/worker_manager.cpython-311.pyc,, +vllm/adapter_commons/layers.py,sha256=rdsvBlYTiblidwK2EYkl3UdB4xvopcrd8li3vPFTbwo,406 +vllm/adapter_commons/models.py,sha256=tuuVafwk9Yvfl8uCXSg1Whzm2Wsq8W7JqypRm_XpBzg,2807 +vllm/adapter_commons/request.py,sha256=GoLdKUNCU6x-8plK95CuLOy56QOSYW6IQAg6ZQg76C8,617 +vllm/adapter_commons/utils.py,sha256=ytCCfLdk-FwWCflWMSTazUPg2gNmXjaovEWbpvQ6fe0,3271 +vllm/adapter_commons/worker_manager.py,sha256=qBj7swkk7LJoQi2GpueMQFMbVPjphnuzOKIc36oQ6Ts,928 +vllm/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +vllm/assets/__pycache__/__init__.cpython-311.pyc,, +vllm/assets/__pycache__/audio.cpython-311.pyc,, +vllm/assets/__pycache__/base.cpython-311.pyc,, +vllm/assets/__pycache__/image.cpython-311.pyc,, +vllm/assets/__pycache__/video.cpython-311.pyc,, +vllm/assets/audio.py,sha256=ISR8TC_92QOEC7pFr1_ILC0clwyvyMYdq-2ewBO4cG4,892 +vllm/assets/base.py,sha256=IdwWieuPqaAaYKo2ybSfg07dt86k4-NrP1BHPCrFN2s,1196 +vllm/assets/image.py,sha256=Kn7HrcJEIXKUcx7IUUcixkslwKannvgGVXWa77bI19s,922 +vllm/assets/video.py,sha256=N642PEaHhx7vB5hGCJVAUi3toVFt0CIXuSKFSFzlzc8,2464 +vllm/attention/__init__.py,sha256=YW7x8Ahq9TPxx8GiduskAliKrJUM5i-kDrOjuLep7aA,610 +vllm/attention/__pycache__/__init__.cpython-311.pyc,, +vllm/attention/__pycache__/layer.cpython-311.pyc,, +vllm/attention/__pycache__/selector.cpython-311.pyc,, +vllm/attention/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +vllm/attention/backends/__pycache__/__init__.cpython-311.pyc,, +vllm/attention/backends/__pycache__/abstract.cpython-311.pyc,, +vllm/attention/backends/__pycache__/blocksparse_attn.cpython-311.pyc,, +vllm/attention/backends/__pycache__/flash_attn.cpython-311.pyc,, +vllm/attention/backends/__pycache__/flashinfer.cpython-311.pyc,, +vllm/attention/backends/__pycache__/hpu_attn.cpython-311.pyc,, +vllm/attention/backends/__pycache__/ipex_attn.cpython-311.pyc,, +vllm/attention/backends/__pycache__/openvino.cpython-311.pyc,, +vllm/attention/backends/__pycache__/pallas.cpython-311.pyc,, +vllm/attention/backends/__pycache__/placeholder_attn.cpython-311.pyc,, +vllm/attention/backends/__pycache__/rocm_flash_attn.cpython-311.pyc,, +vllm/attention/backends/__pycache__/torch_sdpa.cpython-311.pyc,, +vllm/attention/backends/__pycache__/triton_mla.cpython-311.pyc,, +vllm/attention/backends/__pycache__/utils.cpython-311.pyc,, +vllm/attention/backends/__pycache__/xformers.cpython-311.pyc,, +vllm/attention/backends/abstract.py,sha256=TNYGVUDkx7EQ8FuOqFmQYtMLEAzUPCUxmEdCbIesy1o,9097 +vllm/attention/backends/blocksparse_attn.py,sha256=QbIPsZDM3d0IPEkKBT7E78Ac-o2Hv8Fw-Ug_75XovzI,18003 +vllm/attention/backends/flash_attn.py,sha256=ZhEd0k8B9lqiyLG-LifdCLmUHzqzGtmpIUq6CvS2ALY,41200 +vllm/attention/backends/flashinfer.py,sha256=0_ow7Sy6yrEBuqpDNcl4ao2AVKYS1VdKOtr1QSuFeno,46216 +vllm/attention/backends/hpu_attn.py,sha256=9dRfuKLiTNbnq7X0HKZGGOWXicin5rXBCOJWwpj8GiA,11734 +vllm/attention/backends/ipex_attn.py,sha256=Hpob2Gbmjc-PF0EoURLqbbO2AlttWbw1lOY0x0JLpec,14586 +vllm/attention/backends/mla/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +vllm/attention/backends/mla/__pycache__/__init__.cpython-311.pyc,, +vllm/attention/backends/mla/__pycache__/utils.cpython-311.pyc,, +vllm/attention/backends/mla/utils.py,sha256=RoD0gCzp5Cz7teWxDUUOCWY24JtUBAceEqnV02g75hU,23524 +vllm/attention/backends/openvino.py,sha256=DLSknNrBOmqKbtTV8ah0pOcNr0q2yz1R8nTaRk4Mte4,4799 +vllm/attention/backends/pallas.py,sha256=2vT7FvPWcGYHhQ_6QHhwqIk49Yd4PtqgExtgRoOjVXU,13263 +vllm/attention/backends/placeholder_attn.py,sha256=-NOPWnH3YVd8o1HuxULlYXkWsgZu_PtmYi7aUL4N8yw,16316 +vllm/attention/backends/rocm_flash_attn.py,sha256=zri0w4t1CdK8apXZTik3XKZxnHe1t4nMmnyQ3ECFFoU,38385 +vllm/attention/backends/torch_sdpa.py,sha256=uTf0ayYb23o0jwjIwIJe6KOD778wLCpR0HRzAGnlPDA,26939 +vllm/attention/backends/triton_mla.py,sha256=qMC8DCSuZE3gk1-n8ExQjW0Xoi-1KeqQaQ9SleO9Ntc,31630 +vllm/attention/backends/utils.py,sha256=Vvahg8g0PWOUKBAnOfeVMxUIaEAHM8j0AAn7hWwXZIQ,25129 +vllm/attention/backends/xformers.py,sha256=5JOTUcHT3leG8uNYa73Dzp-i9SD-kr5FCFjER6dsO1M,33442 +vllm/attention/layer.py,sha256=Q9sBH_5c6bc6-G_V5kxOu1Lt_7HZ4Hj_kZAE-cDLK4U,14828 +vllm/attention/ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +vllm/attention/ops/__pycache__/__init__.cpython-311.pyc,, +vllm/attention/ops/__pycache__/hpu_paged_attn.cpython-311.pyc,, +vllm/attention/ops/__pycache__/ipex_attn.cpython-311.pyc,, +vllm/attention/ops/__pycache__/nki_flash_attn.cpython-311.pyc,, +vllm/attention/ops/__pycache__/paged_attn.cpython-311.pyc,, +vllm/attention/ops/__pycache__/prefix_prefill.cpython-311.pyc,, +vllm/attention/ops/__pycache__/triton_decode_attention.cpython-311.pyc,, +vllm/attention/ops/__pycache__/triton_flash_attention.cpython-311.pyc,, +vllm/attention/ops/blocksparse_attention/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +vllm/attention/ops/blocksparse_attention/__pycache__/__init__.cpython-311.pyc,, +vllm/attention/ops/blocksparse_attention/__pycache__/blocksparse_attention_kernel.cpython-311.pyc,, +vllm/attention/ops/blocksparse_attention/__pycache__/interface.cpython-311.pyc,, +vllm/attention/ops/blocksparse_attention/__pycache__/utils.cpython-311.pyc,, +vllm/attention/ops/blocksparse_attention/blocksparse_attention_kernel.py,sha256=u_96xPRC15lmK8hK5npn3-G8DyY-I1k3spa5sfzPi3U,11535 +vllm/attention/ops/blocksparse_attention/interface.py,sha256=QfWOzV2KjpJu7goLLOFegFbkIOA484Io3T6E-T8KXwo,9326 +vllm/attention/ops/blocksparse_attention/utils.py,sha256=vitFMc2NXAqjiyWKt23ELVxzKT_1BE10TvA3wYPjooY,8085 +vllm/attention/ops/hpu_paged_attn.py,sha256=JIJMptp237a-zCl0-656j4Di4ru4RJfeBFMxpdLTq9w,3460 +vllm/attention/ops/ipex_attn.py,sha256=6FWxaOW-Oz9kuoC0UXd_T1hrTW81iXuqUdAAwrw0DOw,5521 +vllm/attention/ops/nki_flash_attn.py,sha256=arELBFAzm80fyr8ejm2y7GFdABPJ_TMuVS8T7VUjqfc,24491 +vllm/attention/ops/paged_attn.py,sha256=nzV0-MUHyX0J94-W0R-xfHX_SoQdbfCFsQqrDle_IBw,8334 +vllm/attention/ops/prefix_prefill.py,sha256=Uf4oBhidx__t5NWrR4gG3tvyIwa29Dhug519dh5xez0,32113 +vllm/attention/ops/triton_decode_attention.py,sha256=n_Klveqthy-pVj5NBHHOlTtfHMw5umnXttqhO-ANaLU,18915 +vllm/attention/ops/triton_flash_attention.py,sha256=CBK7O8xBFKAGPNqjUDY19QMYjvNGrxDE7Hm3qNw3H5U,27534 +vllm/attention/selector.py,sha256=PuQqeyrkyTtUcP2FWDJhXk0_qHX-Y4ekePSI7MZdo6s,5865 +vllm/beam_search.py,sha256=OloyO9Rvh75DE1uTVW3tX3XWhNsyQqcFOSBzFhNTvzI,2183 +vllm/compilation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +vllm/compilation/__pycache__/__init__.cpython-311.pyc,, +vllm/compilation/__pycache__/backends.cpython-311.pyc,, +vllm/compilation/__pycache__/counter.cpython-311.pyc,, +vllm/compilation/__pycache__/decorators.cpython-311.pyc,, +vllm/compilation/__pycache__/fix_functionalization.cpython-311.pyc,, +vllm/compilation/__pycache__/fusion.cpython-311.pyc,, +vllm/compilation/__pycache__/fx_utils.cpython-311.pyc,, +vllm/compilation/__pycache__/inductor_pass.cpython-311.pyc,, +vllm/compilation/__pycache__/monitor.cpython-311.pyc,, +vllm/compilation/__pycache__/multi_output_match.cpython-311.pyc,, +vllm/compilation/__pycache__/pass_manager.cpython-311.pyc,, +vllm/compilation/__pycache__/reshapes.cpython-311.pyc,, +vllm/compilation/__pycache__/vllm_inductor_pass.cpython-311.pyc,, +vllm/compilation/__pycache__/wrapper.cpython-311.pyc,, +vllm/compilation/backends.py,sha256=CR8uEtc9fKZYISDIbgbn5FRZGc0eJVG_v96--Z3r9C8,35424 +vllm/compilation/counter.py,sha256=TzXY5bpM2Z3y55tg9Zssz-qb-p7TW-db__4L3DcV9hY,938 +vllm/compilation/decorators.py,sha256=u0kOMbxoEOtxTPAChFijXWyw3nU3QwmlkX26J5uQYsc,10246 +vllm/compilation/fix_functionalization.py,sha256=z8l6h_C6-UkbOR5uxo55R1ETOIRfCTj6dawujHzap-o,7953 +vllm/compilation/fusion.py,sha256=PiC6D8vwtRCqGOQ-Eyw1w2nzvfbwf4aOSAHjtUmQSi0,24556 +vllm/compilation/fx_utils.py,sha256=3N-l_AWzwE0-hDJXtsKlfY9XaKyRJy-mx7G9FmtEZDc,1478 +vllm/compilation/inductor_pass.py,sha256=KcFr08N5xkyFfWV3NuGgNS_egfl3hx2padUOqcnoShE,2570 +vllm/compilation/monitor.py,sha256=p4LQ1roNhKQE8M8zhlYwgdnSFBFi6EkDUazr6X2usc0,1346 +vllm/compilation/multi_output_match.py,sha256=FKsWQk73_olG_sIh7veaHzfBecoKURWD6SRH5_Ozw20,3839 +vllm/compilation/pass_manager.py,sha256=4A91kDHMRNidei644TXaMC89Kh-CAOFVjGQfNiO0oSo,2792 +vllm/compilation/reshapes.py,sha256=K7j5y9bMqeFv6EC3ggqjgVSuFBpmn3Imw3FGm0lwbhM,3055 +vllm/compilation/vllm_inductor_pass.py,sha256=QVhnVvw0GzSR-h80GNa2JkcfaLIy-jAecdLPQigKcUI,1835 +vllm/compilation/wrapper.py,sha256=pCzXlNs-_49mKiQJFE3Kg02lEkHf_CfbVk6dcPlhnok,5631 +vllm/config.py,sha256=ruDhJ_KG-4wUkNlzDa6OdUT__xbV3mVolVlW35lbOBU,151021 +vllm/connections.py,sha256=5Mscwp5D4mOFEA7nT-r7Zs73H9ghaYQynex1fLyxQrU,4962 +vllm/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +vllm/core/__pycache__/__init__.cpython-311.pyc,, +vllm/core/__pycache__/block_manager.cpython-311.pyc,, +vllm/core/__pycache__/evictor.cpython-311.pyc,, +vllm/core/__pycache__/interfaces.cpython-311.pyc,, +vllm/core/__pycache__/placeholder_block_space_manager.cpython-311.pyc,, +vllm/core/__pycache__/scheduler.cpython-311.pyc,, +vllm/core/block/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +vllm/core/block/__pycache__/__init__.cpython-311.pyc,, +vllm/core/block/__pycache__/block_table.cpython-311.pyc,, +vllm/core/block/__pycache__/common.cpython-311.pyc,, +vllm/core/block/__pycache__/cpu_gpu_block_allocator.cpython-311.pyc,, +vllm/core/block/__pycache__/interfaces.cpython-311.pyc,, +vllm/core/block/__pycache__/naive_block.cpython-311.pyc,, +vllm/core/block/__pycache__/prefix_caching_block.cpython-311.pyc,, +vllm/core/block/__pycache__/utils.cpython-311.pyc,, +vllm/core/block/block_table.py,sha256=HMwMwVY8pHLjlje6gfVsrHvyvLupcd3SMAvgcsUcnxM,16022 +vllm/core/block/common.py,sha256=cfDse1iNYLehOXrSfUypTmakGAdSSXrX0YmodFPpJjI,13200 +vllm/core/block/cpu_gpu_block_allocator.py,sha256=gr6P5-ahDwFR4i8x3bxTkWukTUlHV4_BciigAJA8MwM,16817 +vllm/core/block/interfaces.py,sha256=FSmVJr-d2RVt_kX2ckWCYf7N1Cq5XNeJqk-TebbOj2k,8111 +vllm/core/block/naive_block.py,sha256=EgYRm94K88DyFM3Xjfa1A8hWuGZStL0nIqiZqbVxQMI,16355 +vllm/core/block/prefix_caching_block.py,sha256=tBjZ58xTQmmzx2s24BcneVyCf5F-aaOqgJz07bxbfoo,44182 +vllm/core/block/utils.py,sha256=osLxVwSUYjOsLeal8RzpmGT72F4aU3qbTGuYMdWIsHY,928 +vllm/core/block_manager.py,sha256=xJRkXw2xn1vAIsH0IZTBHeoCZ1fd6mIEtL-TBV7K5qQ,22165 +vllm/core/evictor.py,sha256=Jy-eZwgdA4Q2F0buFgDNv2fPeiIxJAUEFtyKYz1VL40,5446 +vllm/core/interfaces.py,sha256=86jc5i5_EbVMSLNDZ5W5MUBl-DgiisRsIeTwfCLKjj0,3534 +vllm/core/placeholder_block_space_manager.py,sha256=WWT-kWCOKZ_HqMqcu_YP26lvE7T9TSGCG6TGdqPYGaM,2928 +vllm/core/scheduler.py,sha256=PjmfVKjDq0zD7O2O4gQ2S2L2EiN08oU1gMb8of3TX2c,81122 +vllm/cumem_allocator.abi3.so,sha256=0BF47IBh0qAI7WAU351txtj3DInTfvzQ9Mb9xDd0oSY,27824 +vllm/device_allocator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +vllm/device_allocator/__pycache__/__init__.cpython-311.pyc,, +vllm/device_allocator/__pycache__/cumem.cpython-311.pyc,, +vllm/device_allocator/cumem.py,sha256=7IJr44wYldBlwFk6O_2uK0C1hrUtUYPLaiMI94QxnXE,9814 +vllm/distributed/__init__.py,sha256=Rk8k7bXtcPNaihFk5qOn__toXjElImWbszyRJBzeYHA,122 +vllm/distributed/__pycache__/__init__.cpython-311.pyc,, +vllm/distributed/__pycache__/communication_op.cpython-311.pyc,, +vllm/distributed/__pycache__/parallel_state.cpython-311.pyc,, +vllm/distributed/__pycache__/utils.cpython-311.pyc,, +vllm/distributed/communication_op.py,sha256=PHbMVvmq2WRcpTf5peCbHgP8FCC3ZMTzJPu38ydBBaM,1236 +vllm/distributed/device_communicators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +vllm/distributed/device_communicators/__pycache__/__init__.cpython-311.pyc,, +vllm/distributed/device_communicators/__pycache__/cuda_wrapper.cpython-311.pyc,, +vllm/distributed/device_communicators/__pycache__/custom_all_reduce.cpython-311.pyc,, +vllm/distributed/device_communicators/__pycache__/custom_all_reduce_utils.cpython-311.pyc,, +vllm/distributed/device_communicators/__pycache__/hpu_communicator.cpython-311.pyc,, +vllm/distributed/device_communicators/__pycache__/pynccl.cpython-311.pyc,, +vllm/distributed/device_communicators/__pycache__/pynccl_wrapper.cpython-311.pyc,, +vllm/distributed/device_communicators/__pycache__/shm_broadcast.cpython-311.pyc,, +vllm/distributed/device_communicators/__pycache__/tpu_communicator.cpython-311.pyc,, +vllm/distributed/device_communicators/__pycache__/xpu_communicator.cpython-311.pyc,, +vllm/distributed/device_communicators/cuda_wrapper.py,sha256=Wa68jPiG67iSwqg3Lb5zEDzX0L1qnD6TAn_iUU-Oo7E,6901 +vllm/distributed/device_communicators/custom_all_reduce.py,sha256=QnH2wct_sdz_-ZJx7JjR0vej13qg2Ar8dSKppp0O6sc,12492 +vllm/distributed/device_communicators/custom_all_reduce_utils.py,sha256=g-GEhCjE6dGzRDYHKYDP5FqFW9snR-YMLcIQOYt0Zz8,10474 +vllm/distributed/device_communicators/hpu_communicator.py,sha256=wgpKr7gNzvj9BpmjIn51cd0V4Q2YiReargkr5U-Mrz0,1848 +vllm/distributed/device_communicators/pynccl.py,sha256=Z4QXdGf_qzz2J3PfKtU7bEG29oxbXy4dEorGB9WNB3k,9142 +vllm/distributed/device_communicators/pynccl_wrapper.py,sha256=FUfvxJPoxuYRXWBNbbFkFTKHncIYq23SmFhHCcvCirA,13705 +vllm/distributed/device_communicators/shm_broadcast.py,sha256=Nu57UdDywE3vhodVWkaIQfLEWLwrjq94tXmb14vwGp4,22780 +vllm/distributed/device_communicators/tpu_communicator.py,sha256=j_3BxCUAULwnKWb9cNXeM6FjzGcXhl28y6PBeGHBQT4,2438 +vllm/distributed/device_communicators/xpu_communicator.py,sha256=9k17FFfhUShuOypGMflbWTY02WcMbX5f0-C9P-qW8fs,1790 +vllm/distributed/kv_transfer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +vllm/distributed/kv_transfer/__pycache__/__init__.cpython-311.pyc,, +vllm/distributed/kv_transfer/__pycache__/kv_transfer_agent.cpython-311.pyc,, +vllm/distributed/kv_transfer/kv_connector/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +vllm/distributed/kv_transfer/kv_connector/__pycache__/__init__.cpython-311.pyc,, +vllm/distributed/kv_transfer/kv_connector/__pycache__/base.cpython-311.pyc,, +vllm/distributed/kv_transfer/kv_connector/__pycache__/factory.cpython-311.pyc,, +vllm/distributed/kv_transfer/kv_connector/__pycache__/simple_connector.cpython-311.pyc,, +vllm/distributed/kv_transfer/kv_connector/base.py,sha256=eqvKBJoU2iJwfPvtyJmdWEPqVIJLIIOieatOTMkxcMA,4291 +vllm/distributed/kv_transfer/kv_connector/factory.py,sha256=-5QZebKjYZQPv-lmLYwRnsFsTqgN-1c1hBOpZfFpBzk,1736 +vllm/distributed/kv_transfer/kv_connector/simple_connector.py,sha256=svH3MECdr72_1BKcswRZbTiRQh20HHBwg_1_2I6QL3A,12941 +vllm/distributed/kv_transfer/kv_lookup_buffer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +vllm/distributed/kv_transfer/kv_lookup_buffer/__pycache__/__init__.cpython-311.pyc,, +vllm/distributed/kv_transfer/kv_lookup_buffer/__pycache__/base.cpython-311.pyc,, +vllm/distributed/kv_transfer/kv_lookup_buffer/__pycache__/simple_buffer.cpython-311.pyc,, +vllm/distributed/kv_transfer/kv_lookup_buffer/base.py,sha256=PQnicbzMqrZvwSdrUrNLqJLmrQXzY-CwcqiNmQm28MI,4162 +vllm/distributed/kv_transfer/kv_lookup_buffer/simple_buffer.py,sha256=c8gZgks_s2k9TH1e6u2GjtPhBSM5spVQh5ybrChdQ8w,9033 +vllm/distributed/kv_transfer/kv_pipe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +vllm/distributed/kv_transfer/kv_pipe/__pycache__/__init__.cpython-311.pyc,, +vllm/distributed/kv_transfer/kv_pipe/__pycache__/base.cpython-311.pyc,, +vllm/distributed/kv_transfer/kv_pipe/__pycache__/mooncake_pipe.cpython-311.pyc,, +vllm/distributed/kv_transfer/kv_pipe/__pycache__/pynccl_pipe.cpython-311.pyc,, +vllm/distributed/kv_transfer/kv_pipe/base.py,sha256=U4hivz-zJkjhTGgNdtcuupc_ArsoUPFuWEv_AXJ9rqs,2087 +vllm/distributed/kv_transfer/kv_pipe/mooncake_pipe.py,sha256=PI0bXHb1bUfng7_u40Y-TJ6SQ-ssfUW_xKFLpES-Bw8,11687 +vllm/distributed/kv_transfer/kv_pipe/pynccl_pipe.py,sha256=p9U4-Se_2brXUNWSv_U_wB4RYXFr_dcV9iTjjucZzFA,9588 +vllm/distributed/kv_transfer/kv_transfer_agent.py,sha256=-F7qEvxSGJ3FAzw_km73LoRAWmZ8WEiMFTHddPDarRU,2430 +vllm/distributed/parallel_state.py,sha256=6q3_gx5FUr2eMboKlmM0Getg3UH9XkelGfM0rnEfKm4,49922 +vllm/distributed/utils.py,sha256=3ESo_gKzI1FtAsYsd--7K2xjLFrKebLd9s9Ui38EFLQ,8553 +vllm/engine/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +vllm/engine/__pycache__/__init__.cpython-311.pyc,, +vllm/engine/__pycache__/arg_utils.cpython-311.pyc,, +vllm/engine/__pycache__/async_llm_engine.cpython-311.pyc,, +vllm/engine/__pycache__/async_timeout.cpython-311.pyc,, +vllm/engine/__pycache__/llm_engine.cpython-311.pyc,, +vllm/engine/__pycache__/metrics.cpython-311.pyc,, +vllm/engine/__pycache__/metrics_types.cpython-311.pyc,, +vllm/engine/__pycache__/protocol.cpython-311.pyc,, +vllm/engine/arg_utils.py,sha256=hrJsNd9b2EHP1lWw8bbzHoBJF3fr16HGqiubLcafrJw,64060 +vllm/engine/async_llm_engine.py,sha256=lraQRRsWl_zzSIEJGGI7KtlPV7yH5nZbtm6Iiz-r8CY,48514 +vllm/engine/async_timeout.py,sha256=JxUaRVK_M5P5wRVkKHQ-QkDMnGxKMTt9S9OhQeQzP-s,7092 +vllm/engine/llm_engine.py,sha256=99SOL62knx8EZfgFmcRj-G6_mvXpOnxPufZcOjPrPEc,88668 +vllm/engine/metrics.py,sha256=DGrpynyGS5tnzo5kksJ4Z2403QrAEt75xrUaR37tkNc,29606 +vllm/engine/metrics_types.py,sha256=QgPkVHKyokSL_lYCG9tezbQTQ-ccp294vLUkkmsJj3A,3402 +vllm/engine/multiprocessing/__init__.py,sha256=7oSrA3Cfp7mO8-6ekiElbHXXEYCfNzj1glJ4pk4nu8g,4415 +vllm/engine/multiprocessing/__pycache__/__init__.cpython-311.pyc,, +vllm/engine/multiprocessing/__pycache__/client.cpython-311.pyc,, +vllm/engine/multiprocessing/__pycache__/engine.cpython-311.pyc,, +vllm/engine/multiprocessing/client.py,sha256=Vh1tEPt4CCeOFJnKti5RFbjOyDZ4V646Pirhu_NVzGI,28650 +vllm/engine/multiprocessing/engine.py,sha256=bZYXPxJrH9glM7i55xqUK1RBzldYEm_3L47Ey0MC-J4,15396 +vllm/engine/output_processor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +vllm/engine/output_processor/__pycache__/__init__.cpython-311.pyc,, +vllm/engine/output_processor/__pycache__/interfaces.cpython-311.pyc,, +vllm/engine/output_processor/__pycache__/multi_step.cpython-311.pyc,, +vllm/engine/output_processor/__pycache__/single_step.cpython-311.pyc,, +vllm/engine/output_processor/__pycache__/stop_checker.cpython-311.pyc,, +vllm/engine/output_processor/__pycache__/util.cpython-311.pyc,, +vllm/engine/output_processor/interfaces.py,sha256=99zPnCsA0H9k8d7uXfv8yGva69mAS1m3apR9sdWIUfY,2994 +vllm/engine/output_processor/multi_step.py,sha256=5lihog6cmxNmXPJxeKecZ8UhPrpiJXH_r0uPvcD8vDI,9000 +vllm/engine/output_processor/single_step.py,sha256=ToiNI09zyU86zsB8Vbw9-eLcbNk_gsCHc2sKRmBfcF0,6015 +vllm/engine/output_processor/stop_checker.py,sha256=YVSkufbbX3km41aU7bXRJjhpP24Qcun7y6V3H_cQ65U,5064 +vllm/engine/output_processor/util.py,sha256=IoNFmy8vKrK5pn3nGS26Ey5irhKr8mzNOGP30SsT1qA,1056 +vllm/engine/protocol.py,sha256=aPDHm0tM4qycT3mkLcGeyn_MuFe5ehKXDTyULXdbfQA,10104 +vllm/entrypoints/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +vllm/entrypoints/__pycache__/__init__.cpython-311.pyc,, +vllm/entrypoints/__pycache__/api_server.cpython-311.pyc,, +vllm/entrypoints/__pycache__/chat_utils.cpython-311.pyc,, +vllm/entrypoints/__pycache__/launcher.cpython-311.pyc,, +vllm/entrypoints/__pycache__/llm.cpython-311.pyc,, +vllm/entrypoints/__pycache__/logger.cpython-311.pyc,, +vllm/entrypoints/__pycache__/utils.cpython-311.pyc,, +vllm/entrypoints/api_server.py,sha256=QUmsbFCoQ3f74HXyg4RhqztQ8GKNZXM_mul9B-gpj6E,5417 +vllm/entrypoints/chat_utils.py,sha256=LynDqRGDJCyB1CTixN2LpTdRpWIEkpMiAJwP1MBuIQo,35458 +vllm/entrypoints/launcher.py,sha256=3RfCwmGWoGqgASiRP5kt86sOKIjyH1lYWZha7W2w-7U,3929 +vllm/entrypoints/llm.py,sha256=EE4MjNgbx6pSZg6POsTtqGrKe5HJqwvXi-rmj5tlT4Q,61938 +vllm/entrypoints/logger.py,sha256=0a5GEgmC1d210eqgdYwUuSIcZRcxeIozUIzrZyvD5T4,1449 +vllm/entrypoints/openai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +vllm/entrypoints/openai/__pycache__/__init__.cpython-311.pyc,, +vllm/entrypoints/openai/__pycache__/api_server.cpython-311.pyc,, +vllm/entrypoints/openai/__pycache__/cli_args.cpython-311.pyc,, +vllm/entrypoints/openai/__pycache__/logits_processors.cpython-311.pyc,, +vllm/entrypoints/openai/__pycache__/protocol.cpython-311.pyc,, +vllm/entrypoints/openai/__pycache__/run_batch.cpython-311.pyc,, +vllm/entrypoints/openai/__pycache__/serving_chat.cpython-311.pyc,, +vllm/entrypoints/openai/__pycache__/serving_completion.cpython-311.pyc,, +vllm/entrypoints/openai/__pycache__/serving_embedding.cpython-311.pyc,, +vllm/entrypoints/openai/__pycache__/serving_engine.cpython-311.pyc,, +vllm/entrypoints/openai/__pycache__/serving_models.cpython-311.pyc,, +vllm/entrypoints/openai/__pycache__/serving_pooling.cpython-311.pyc,, +vllm/entrypoints/openai/__pycache__/serving_rerank.cpython-311.pyc,, +vllm/entrypoints/openai/__pycache__/serving_score.cpython-311.pyc,, +vllm/entrypoints/openai/__pycache__/serving_tokenization.cpython-311.pyc,, +vllm/entrypoints/openai/api_server.py,sha256=3FNtzrhzEN7mZgMzBsYnTkyLffHKBdLqytlmCxVmkis,34388 +vllm/entrypoints/openai/cli_args.py,sha256=py20pxtraM9eheEc2jzWXUiE3dX6ysnGNSkrrBgrutc,12174 +vllm/entrypoints/openai/logits_processors.py,sha256=XIh2SPLpyOlF002J8K_85qgFG8qiIDCK4VnVk_tYB0U,3157 +vllm/entrypoints/openai/protocol.py,sha256=IIslNH6vEKljEXAG99B6HJCs6wjPgW1I175MY1_PcWo,53132 +vllm/entrypoints/openai/reasoning_parsers/__init__.py,sha256=q2Ai9Mb9TCEAFoag8rxvcduJpf1SK6KD9KzcyaBIb20,274 +vllm/entrypoints/openai/reasoning_parsers/__pycache__/__init__.cpython-311.pyc,, +vllm/entrypoints/openai/reasoning_parsers/__pycache__/abs_reasoning_parsers.cpython-311.pyc,, +vllm/entrypoints/openai/reasoning_parsers/__pycache__/deepseek_r1_reasoning_parser.cpython-311.pyc,, +vllm/entrypoints/openai/reasoning_parsers/abs_reasoning_parsers.py,sha256=Q05zVyaESXOibqsH0MvbagFAX4WWusj2wlreBwpchtM,5945 +vllm/entrypoints/openai/reasoning_parsers/deepseek_r1_reasoning_parser.py,sha256=tnjGZ-d66uPh2uWI37YpSoTBIfW-2_c6vyoyOeAk_l4,5987 +vllm/entrypoints/openai/run_batch.py,sha256=xsRrylkwjd8kuzJBtAo0JZvNIxIiui9qhwW5g4EAy2E,12567 +vllm/entrypoints/openai/serving_chat.py,sha256=DQTZPOf2tbDkqLyPAA_4Tc6KNf7wdx6XvmcFukUYyDo,43959 +vllm/entrypoints/openai/serving_completion.py,sha256=MO00SXETExwnSbcJG8Skm13TdLI7FbxceYHD6Q7egEo,22786 +vllm/entrypoints/openai/serving_embedding.py,sha256=acNsb2R3lcOJhgrlzJ7so0HCzXPjPIzvlZmBRHrEn78,9106 +vllm/entrypoints/openai/serving_engine.py,sha256=bIfvo5q63wVa9O_XalGT22I-q6pbZQCOBcI_GsjHCyw,20607 +vllm/entrypoints/openai/serving_models.py,sha256=rflL6qv1a_bupedo8RVYXe9pruJgvfJu0iyuf_COCC4,9676 +vllm/entrypoints/openai/serving_pooling.py,sha256=ib8oc4gJFaBWk27t58RDnYfjHqxEhmY0SxeywbwOfyM,8784 +vllm/entrypoints/openai/serving_rerank.py,sha256=Jvi3rHKeRUp6XuFxpvFRDa8VwiYtKy0bhwXkWpn_NdU,8430 +vllm/entrypoints/openai/serving_score.py,sha256=05-WnpkF0jkdhR3fYfqqLaAHEi98k9v_sSKbcjYcJcg,8833 +vllm/entrypoints/openai/serving_tokenization.py,sha256=L4Tl5X2QgWLYuKgAv_UwD-QLh4UkMzc5Zeco68jhics,5474 +vllm/entrypoints/openai/tool_parsers/__init__.py,sha256=MCAp6mTORZyGThFRQXengbxammJdKmSr7kuQa-_Wjs8,773 +vllm/entrypoints/openai/tool_parsers/__pycache__/__init__.cpython-311.pyc,, +vllm/entrypoints/openai/tool_parsers/__pycache__/abstract_tool_parser.cpython-311.pyc,, +vllm/entrypoints/openai/tool_parsers/__pycache__/granite_20b_fc_tool_parser.cpython-311.pyc,, +vllm/entrypoints/openai/tool_parsers/__pycache__/granite_tool_parser.cpython-311.pyc,, +vllm/entrypoints/openai/tool_parsers/__pycache__/hermes_tool_parser.cpython-311.pyc,, +vllm/entrypoints/openai/tool_parsers/__pycache__/internlm2_tool_parser.cpython-311.pyc,, +vllm/entrypoints/openai/tool_parsers/__pycache__/jamba_tool_parser.cpython-311.pyc,, +vllm/entrypoints/openai/tool_parsers/__pycache__/llama_tool_parser.cpython-311.pyc,, +vllm/entrypoints/openai/tool_parsers/__pycache__/mistral_tool_parser.cpython-311.pyc,, +vllm/entrypoints/openai/tool_parsers/__pycache__/pythonic_tool_parser.cpython-311.pyc,, +vllm/entrypoints/openai/tool_parsers/__pycache__/utils.cpython-311.pyc,, +vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py,sha256=Jkyeyt0PdTxWSwwqDIpIOkkUVbG4Am-LjFLRJQGOv4U,6017 +vllm/entrypoints/openai/tool_parsers/granite_20b_fc_tool_parser.py,sha256=mjnp2uh4ZgJsZzTUU1_lkn15Tk5rFs84P7oAsPwt5W8,11114 +vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py,sha256=SejcNYn9O18Nt-YZgtRyJ1sGl-yLY2WtFgJkNZjh23A,10344 +vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py,sha256=5rF_22DrZFyGsj-SZG1d6IS9nuaWTtzvoaSUXhITnMU,16784 +vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py,sha256=GnxKcV02EMSB-R4AXETCrwQjUOfyUnt89VXgkKC2oIY,9114 +vllm/entrypoints/openai/tool_parsers/jamba_tool_parser.py,sha256=F9aSXFBrGmQGqXeVBC6kWSbqzug_CmkEM3LgoDOY-Oc,13538 +vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py,sha256=KyUb6v6vlCw3m3NRZemMlRTuPgvvYCPVm0vqKbQB9Dk,11894 +vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py,sha256=N-D8XAETGsMhlNgDg45Y0huPMbVY73OI1ZD4O7yxLhA,14664 +vllm/entrypoints/openai/tool_parsers/pythonic_tool_parser.py,sha256=9ZXksKP2X0i--AlkLtGEQ9IEq8Y5PtAsKkkqx1yqpnw,11939 +vllm/entrypoints/openai/tool_parsers/utils.py,sha256=YMUBQVTiG7HuE9QB60_5CXG_Ed4_A87jq543B2Q4psQ,3819 +vllm/entrypoints/utils.py,sha256=1AhOs4JUzXO6eVKcQyO_zoHYFuNHPpY8WAMbf1Tlcks,2294 +vllm/envs.py,sha256=N7D6rputSdxivRZny---DOUsGadYU8nn1GSZfe4nU_4,24374 +vllm/executor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +vllm/executor/__pycache__/__init__.cpython-311.pyc,, +vllm/executor/__pycache__/executor_base.cpython-311.pyc,, +vllm/executor/__pycache__/mp_distributed_executor.cpython-311.pyc,, +vllm/executor/__pycache__/msgspec_utils.cpython-311.pyc,, +vllm/executor/__pycache__/multiproc_worker_utils.cpython-311.pyc,, +vllm/executor/__pycache__/ray_distributed_executor.cpython-311.pyc,, +vllm/executor/__pycache__/ray_utils.cpython-311.pyc,, +vllm/executor/__pycache__/uniproc_executor.cpython-311.pyc,, +vllm/executor/executor_base.py,sha256=p-RWke1Eu1DBjRLN2U0bwDp97tlBMXGZdqGCwJzpzTU,14560 +vllm/executor/mp_distributed_executor.py,sha256=6mrv5wsBUw6ng08_qyn7LtmYsaGgGgTHoZSdP-hmPtY,9854 +vllm/executor/msgspec_utils.py,sha256=FG5Qh6ghtLjyI6BHiTGmJQ3sGop2Lpm4LoADP_0Hs9o,909 +vllm/executor/multiproc_worker_utils.py,sha256=X6jx-OFT-kW-O-2njzoZOG60A3GUTQQr5M5u_8i_fGc,10590 +vllm/executor/ray_distributed_executor.py,sha256=wujrkeqP6VwA81ox9hrhS14lcHWi7D8B6hqfDKFjXoA,27588 +vllm/executor/ray_utils.py,sha256=gSu9CiXPnOjrmPZbLNj4rms2zokeetupaiVMVtij6KU,15646 +vllm/executor/uniproc_executor.py,sha256=MlWdrHScDoFZbmhgT6QNONnCL05aawKGvmRhUz6n3lU,5408 +vllm/forward_context.py,sha256=Pm5oWtNDYmDmORo4vyAvEH4l5ujcbQwSylfQYif5Ysk,3939 +vllm/inputs/__init__.py,sha256=28WIgSktiZ0KR0ndxtnEwZjhS406uO-jPjyWovkw7L4,1228 +vllm/inputs/__pycache__/__init__.cpython-311.pyc,, +vllm/inputs/__pycache__/data.cpython-311.pyc,, +vllm/inputs/__pycache__/parse.cpython-311.pyc,, +vllm/inputs/__pycache__/preprocess.cpython-311.pyc,, +vllm/inputs/__pycache__/registry.cpython-311.pyc,, +vllm/inputs/data.py,sha256=geBLaVXi6auxBSHvSkqViX-UR_fxc1L4cwnCKDmlgH8,13247 +vllm/inputs/parse.py,sha256=QIv86GTYr1TuOr0YpGvqxh_NWYbyIUJSomXcXnjbqSk,3472 +vllm/inputs/preprocess.py,sha256=SIvINfVZw8iTTg2cpybsuhYMFdDbPS2HBVMxLiHXadM,25481 +vllm/inputs/registry.py,sha256=VIQGXrJnB_UY319TwVR3lxjiSI69OpOIyszTKjzgBWo,16877 +vllm/logger.py,sha256=Zhf5TX7_kMXIYG5hOAjT1ufmNiPvMzHwrP7SV5tOBTc,7338 +vllm/logging_utils/__init__.py,sha256=t2aDazCRc19hTrOxiW3eY-d702nQFBOJz_QIfSip9ok,136 +vllm/logging_utils/__pycache__/__init__.cpython-311.pyc,, +vllm/logging_utils/__pycache__/formatter.cpython-311.pyc,, +vllm/logging_utils/formatter.py,sha256=AAWbFV4wgQZn_Ek0MKA-TBJwCQiO3ejIuIV1rQm8ADQ,525 +vllm/logits_process.py,sha256=Rq55GebQWGNj5u_RpcE6ABbHkjHps5vhK-3It5UX-9A,4664 +vllm/lora/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +vllm/lora/__pycache__/__init__.cpython-311.pyc,, +vllm/lora/__pycache__/fully_sharded_layers.cpython-311.pyc,, +vllm/lora/__pycache__/layers.cpython-311.pyc,, +vllm/lora/__pycache__/lora.cpython-311.pyc,, +vllm/lora/__pycache__/models.cpython-311.pyc,, +vllm/lora/__pycache__/peft_helper.cpython-311.pyc,, +vllm/lora/__pycache__/request.cpython-311.pyc,, +vllm/lora/__pycache__/utils.cpython-311.pyc,, +vllm/lora/__pycache__/worker_manager.cpython-311.pyc,, +vllm/lora/fully_sharded_layers.py,sha256=GsESume-K5mteX1EH1Vr8fQmY0nUjyTIKtY-1HVFNV8,12229 +vllm/lora/layers.py,sha256=zgnvqQwBCB3pbnsDyhBjvYvWVrpmQNGWWXHgaXUVJew,44795 +vllm/lora/lora.py,sha256=XfOb94aCsORrhvTnHDy-gF6iGo6nULtpsRcR2wpfYBU,6222 +vllm/lora/models.py,sha256=FsxAE7ugEpEiPwnZIN0_W0jQyqbl5CqI8FQtcz83Nj0,33292 +vllm/lora/ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +vllm/lora/ops/__pycache__/__init__.cpython-311.pyc,, +vllm/lora/ops/torch_ops/__init__.py,sha256=z03eb5aCSj_Z-_RPa3huUKuXRBvhxj_M8lK7izkQJHE,466 +vllm/lora/ops/torch_ops/__pycache__/__init__.cpython-311.pyc,, +vllm/lora/ops/torch_ops/__pycache__/lora_ops.cpython-311.pyc,, +vllm/lora/ops/torch_ops/lora_ops.py,sha256=ilxQObKw2wEz3BJJ8X87xWPHGOz0jSII3b13wpj66es,4300 +vllm/lora/ops/triton_ops/__init__.py,sha256=pDFzcbjtDVrinPai8pFqq1G0hIjI-fLpxg5vXesFGgc,486 +vllm/lora/ops/triton_ops/__pycache__/__init__.cpython-311.pyc,, +vllm/lora/ops/triton_ops/__pycache__/bgmv_expand.cpython-311.pyc,, +vllm/lora/ops/triton_ops/__pycache__/bgmv_expand_slice.cpython-311.pyc,, +vllm/lora/ops/triton_ops/__pycache__/bgmv_shrink.cpython-311.pyc,, +vllm/lora/ops/triton_ops/__pycache__/sgmv_expand.cpython-311.pyc,, +vllm/lora/ops/triton_ops/__pycache__/sgmv_shrink.cpython-311.pyc,, +vllm/lora/ops/triton_ops/__pycache__/utils.cpython-311.pyc,, +vllm/lora/ops/triton_ops/bgmv_expand.py,sha256=VeuUfI1mp37B-kYBQdzF9RA6cyWY49ZoNnmU2qqsqj0,5533 +vllm/lora/ops/triton_ops/bgmv_expand_slice.py,sha256=NO-IhMKXVu7g-8giHS0F1xyqA4fM03oSnNi6cUyKNxg,6074 +vllm/lora/ops/triton_ops/bgmv_shrink.py,sha256=7Oxgn2g2vilJQJUfPx7hhk2yz_Wd6H-6FQRfhE9BzYo,4643 +vllm/lora/ops/triton_ops/sgmv_expand.py,sha256=KidzmgiQaxGUgkpgvswTK8D0tVePqwwVXoyeTc7B-OA,9085 +vllm/lora/ops/triton_ops/sgmv_shrink.py,sha256=x2HlCSoiNVtS4s_P-8Uu93kbfXCTFHrQG2TMJRhpJWw,7674 +vllm/lora/ops/triton_ops/utils.py,sha256=nrcZGbN48oga_9gcLMX1FePDKN182DyFjfFrKFuLq7w,6365 +vllm/lora/peft_helper.py,sha256=iOUNDIf9nw2TSwuZ5cjpDCsb-stn1qhkupZVvYPcvrc,4399 +vllm/lora/punica_wrapper/__init__.py,sha256=RAbrZogtmoPZNIMImJFX1REM0cydwz5C-ATIp7_qHFA,244 +vllm/lora/punica_wrapper/__pycache__/__init__.cpython-311.pyc,, +vllm/lora/punica_wrapper/__pycache__/punica_base.cpython-311.pyc,, +vllm/lora/punica_wrapper/__pycache__/punica_cpu.cpython-311.pyc,, +vllm/lora/punica_wrapper/__pycache__/punica_gpu.cpython-311.pyc,, +vllm/lora/punica_wrapper/__pycache__/punica_hpu.cpython-311.pyc,, +vllm/lora/punica_wrapper/__pycache__/punica_selector.cpython-311.pyc,, +vllm/lora/punica_wrapper/__pycache__/utils.cpython-311.pyc,, +vllm/lora/punica_wrapper/punica_base.py,sha256=Sd6bBfAUwWVM82zzOrl5hIkINzthhNGv857VdGwAk6w,18228 +vllm/lora/punica_wrapper/punica_cpu.py,sha256=1hzv1SchgOGiUo4kYsAN5BcTDX5XNQopMqbbXvxLqlg,12465 +vllm/lora/punica_wrapper/punica_gpu.py,sha256=AfVozZYi7kwdwVj3CLjxAx7vHDFGQpoORzo1wCDf5t8,11427 +vllm/lora/punica_wrapper/punica_hpu.py,sha256=84vAp9BLWkf9u8CdnT_enX3IUd2_QI_qvOU2vKiLSb8,3253 +vllm/lora/punica_wrapper/punica_selector.py,sha256=WP5XsmWE8YJG8fmak0jNPrgYxF4_lxaJt-mcNaMfsRY,755 +vllm/lora/punica_wrapper/utils.py,sha256=CFoSN8wz2TQhQhYdiZFjN_QU4AwQ8wPfa_o7wL9Ufuk,6846 +vllm/lora/request.py,sha256=w_fGpOlDlJpOS-7iw-dKXPkr0zvDmgdB4mr3kOszin8,3059 +vllm/lora/utils.py,sha256=6FD4StnjuC_MporT8Ahd6TC_t7VQZz22gzekjQljmrg,8122 +vllm/lora/worker_manager.py,sha256=-rm7b4BXK2ezu8tW41sEe1mzCmG49ksWfQaZkHvW8SU,10590 +vllm/model_executor/__init__.py,sha256=cRhmybV9ftoNVy7E91WIczp4wLL4E6y77KQ9vrhWqL0,505 +vllm/model_executor/__pycache__/__init__.cpython-311.pyc,, +vllm/model_executor/__pycache__/custom_op.cpython-311.pyc,, +vllm/model_executor/__pycache__/parameter.cpython-311.pyc,, +vllm/model_executor/__pycache__/pooling_metadata.cpython-311.pyc,, +vllm/model_executor/__pycache__/sampling_metadata.cpython-311.pyc,, +vllm/model_executor/__pycache__/utils.cpython-311.pyc,, +vllm/model_executor/custom_op.py,sha256=SY4dP4JUcvagu1OOsZwjM6Q-kKPG0N597JeROytng04,5331 +vllm/model_executor/guided_decoding/__init__.py,sha256=qbfQVkvAs7NgQaLMB9_wGhDGYwkOlTS_QQJm763nN4s,6883 +vllm/model_executor/guided_decoding/__pycache__/__init__.cpython-311.pyc,, +vllm/model_executor/guided_decoding/__pycache__/guided_fields.cpython-311.pyc,, +vllm/model_executor/guided_decoding/__pycache__/lm_format_enforcer_decoding.cpython-311.pyc,, +vllm/model_executor/guided_decoding/__pycache__/outlines_decoding.cpython-311.pyc,, +vllm/model_executor/guided_decoding/__pycache__/outlines_logits_processors.cpython-311.pyc,, +vllm/model_executor/guided_decoding/__pycache__/utils.cpython-311.pyc,, +vllm/model_executor/guided_decoding/__pycache__/xgrammar_decoding.cpython-311.pyc,, +vllm/model_executor/guided_decoding/guided_fields.py,sha256=buAymWm9-1u6oIscVajQ0qQvwHVVYbvVDY6quQQlPyk,1450 +vllm/model_executor/guided_decoding/lm_format_enforcer_decoding.py,sha256=uUzfxJuP1NDjgLzPdEHJqGCj17J2JInP_LksjCP2utQ,2678 +vllm/model_executor/guided_decoding/outlines_decoding.py,sha256=K9rDRJr1NAUmaboTH4nZCwGta38uOOK4vHVLRgDA9s4,5188 +vllm/model_executor/guided_decoding/outlines_logits_processors.py,sha256=HlKi6Oy4VF85_BFTHYS3QYQ6JQPVkq5kZFf9W1wKKIw,8683 +vllm/model_executor/guided_decoding/utils.py,sha256=3Of64yPGTZ4Ohu5crzc8JqaJRo_47-i5-iVN4sgSPx4,7693 +vllm/model_executor/guided_decoding/xgrammar_decoding.py,sha256=RrTIdjAUhAEJZO1K0TKfoG6KsI_M4WtGbDnwAcp6YEo,13113 +vllm/model_executor/layers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +vllm/model_executor/layers/__pycache__/__init__.cpython-311.pyc,, +vllm/model_executor/layers/__pycache__/activation.cpython-311.pyc,, +vllm/model_executor/layers/__pycache__/layernorm.cpython-311.pyc,, +vllm/model_executor/layers/__pycache__/linear.cpython-311.pyc,, +vllm/model_executor/layers/__pycache__/logits_processor.cpython-311.pyc,, +vllm/model_executor/layers/__pycache__/pooler.cpython-311.pyc,, +vllm/model_executor/layers/__pycache__/rejection_sampler.cpython-311.pyc,, +vllm/model_executor/layers/__pycache__/resampler.cpython-311.pyc,, +vllm/model_executor/layers/__pycache__/rotary_embedding.cpython-311.pyc,, +vllm/model_executor/layers/__pycache__/sampler.cpython-311.pyc,, +vllm/model_executor/layers/__pycache__/spec_decode_base_sampler.cpython-311.pyc,, +vllm/model_executor/layers/__pycache__/typical_acceptance_sampler.cpython-311.pyc,, +vllm/model_executor/layers/__pycache__/utils.cpython-311.pyc,, +vllm/model_executor/layers/__pycache__/vocab_parallel_embedding.cpython-311.pyc,, +vllm/model_executor/layers/activation.py,sha256=EHZZLcttE3vxTJDu47rEaYklObw4HK4xX6tdcDcnRas,12197 +vllm/model_executor/layers/fused_moe/__init__.py,sha256=8NLBBk3J3kbdmy2lOmYqoowsqTXEBgM6wy2rdFAtxvw,1168 +vllm/model_executor/layers/fused_moe/__pycache__/__init__.cpython-311.pyc,, +vllm/model_executor/layers/fused_moe/__pycache__/fused_marlin_moe.cpython-311.pyc,, +vllm/model_executor/layers/fused_moe/__pycache__/fused_moe.cpython-311.pyc,, +vllm/model_executor/layers/fused_moe/__pycache__/layer.cpython-311.pyc,, +vllm/model_executor/layers/fused_moe/__pycache__/moe_pallas.cpython-311.pyc,, +vllm/model_executor/layers/fused_moe/__pycache__/moe_torch_iterative.cpython-311.pyc,, +"vllm/model_executor/layers/fused_moe/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=iNGsE2ZeVnQEnN4A8UJ9Jv0d3hbRF2MJ9oBgjup5Szk,2737 +"vllm/model_executor/layers/fused_moe/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=hH5rRN9Wtyv35azxMzyUMHWtiKgOHev5tNjIG8j6dsE,2751 +"vllm/model_executor/layers/fused_moe/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=qPumkNxaHMvVBnEjPe_Xiuz9ICb6Hqc-9I1DAR8s3gA,4130 +"vllm/model_executor/layers/fused_moe/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=s47lb8VLnyxMgWlqcIR4BdPBsjKWL4olXF49uZvygzQ,4140 +"vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=gzfjrYDcS0vsACq7ONGVkNA3FqVjr3e89q9fO9kokkg,4133 +"vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json",sha256=Np7yRX9Z7Y7Z5Nutbl02wpKdZRltbt4WqlPlleiYs2E,4146 +"vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=XsNfNXY8v0eatazkLCDiDclI0FnTudUGLYO01e1_4aA,4149 +"vllm/model_executor/layers/fused_moe/configs/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=H0K4_O1CMbNLi-srcycT3lSl4JaBl3EGF89GY5Rj9MU,4130 +"vllm/model_executor/layers/fused_moe/configs/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=arPqstZMzZjz8BNpY3alKT4vGCJyUj5I2hEeK02aq98,4152 +"vllm/model_executor/layers/fused_moe/configs/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=wjnQ4v-dflJMR3iFDHBuZI_1R0xXjsNoWc2kHu6C8JI,4135 +"vllm/model_executor/layers/fused_moe/configs/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=7WHPz_0fxeI3Ed0D9VIpZVoeN9RtJVVARvptfcmQu40,4146 +"vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-40GB.json",sha256=pCCKkdUzzuBVtljyk7AEIAbeDf12DUiieXaODZXzm5E,3254 +"vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=trX2-c4N6hTTD6zFNi6A2bT3FkhxKjkM2rPl-o1K9ss,3250 +"vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=I4d56uD7E1JMXD9RAxq3FebdPquDsnNEkVaIY9Ctm9w,3246 +"vllm/model_executor/layers/fused_moe/configs/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=ypuAxMQ7JESPXLBltt68wly2wTrJzlnobhUMip6xAmc,2751 +"vllm/model_executor/layers/fused_moe/configs/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=tUptlureu5QgyAEedtx5sm7CFudXAE6fIXepOb9gfas,2745 +"vllm/model_executor/layers/fused_moe/configs/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=h57svdmDlZC_D8w9XWjPRS8ciYVkJiPEYfhrD2NRVVY,4127 +"vllm/model_executor/layers/fused_moe/configs/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=JmXhUnhX6YOy8RsmT0zFLGyNCpRBPV2q2Db9Y9ctZeE,4144 +"vllm/model_executor/layers/fused_moe/configs/E=16,N=2688,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=G4PKqWxh0MlBhg7QHKj0m--_fP3Ll0gs7VJaeg-NIDM,3254 +"vllm/model_executor/layers/fused_moe/configs/E=16,N=2688,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=bKX9AvcxN6k-i3RUmHSchZZ3rjoYRYb4iBqhCI4L3MY,3257 +"vllm/model_executor/layers/fused_moe/configs/E=16,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=bWR6XBZ4nJ_ROg8rEgrQGc04I3BDbwILDHMZxATO-H4,2740 +"vllm/model_executor/layers/fused_moe/configs/E=16,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json",sha256=Gu1wROuky-xS0dsFgbXS2QD_hOVV8yol9a5iqiYyq3s,2749 +"vllm/model_executor/layers/fused_moe/configs/E=16,N=3200,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=_9HO7SaR6aQeh6vqCDpo3kjHnGJ9BVKLiMwYYgd3SmQ,2913 +"vllm/model_executor/layers/fused_moe/configs/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=2ONiQSa9odzdPe1dIgBpP24l5z-5wB1eos06xOj0V_Q,2738 +"vllm/model_executor/layers/fused_moe/configs/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=Twkm9DVNxijpowfvioJ_4cKwIIlAWdyNWO9TA3gxAHs,4149 +"vllm/model_executor/layers/fused_moe/configs/E=16,N=6400,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=THQWP1o2bWhnJh0rq3ZIVvs_sagIJgoK4x3pJbiFbHk,2910 +"vllm/model_executor/layers/fused_moe/configs/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=o1pR3rNpO1eW4BHOKpPIQLjviw4P2X5Fr4HQBcdHA-I,2747 +"vllm/model_executor/layers/fused_moe/configs/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=iySqae0zI_PRBLqV-vfSCwDS4Jxcl5QjWa2NnhndL0U,2752 +"vllm/model_executor/layers/fused_moe/configs/E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json",sha256=Uhq0SrWiCrldkWbb0ZZZhWaCZ0SsvpiNL4z30KZUN5g,2747 +"vllm/model_executor/layers/fused_moe/configs/E=16,N=800,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=ydsFUdXdVE_ZSScVhUxvxOFwKG-nkTraNeN69wqzxIM,2903 +"vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=0aSYzpv_cBAlpWCPrfGgNTCfae1KdKQnT56E8XFQl7A,3262 +"vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=R4B2n2vGt4pPo6jS4Bmnx8AYtcfF9qQJE5bD7OhmXHs,3265 +"vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=6QPLhZurIqcMVdy3w0Dd7gLViKxsyJRBz-qd8agpi6Q,3248 +"vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=ozS2ECxk-Dsd4Y9DgCGGwDwJlCf5T20ANf5gnTUMuSc,3252 +"vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=oxOKFDrgmw1YmgxTtRa1uoe3p09ylTLrkj_jOTqNh1Q,3249 +"vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=WegYsHl39QVlHu_4EZJSrgA4LQ5fYxSVNWFhoL6W2Rc,3251 +"vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=X8FVPE7rLblDs_Dw_Iu-KDw9H7PaC417EHyVclYjfv8,3733 +"vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=AMD_Instinct_MI300X.json",sha256=Ezk-gq1ET8Kan5LQvk4XbwDcgsbuIBoaa3HlDwI-364,4734 +"vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=V_sgDtEtGEuBsGVa0maYJHhhGqe1NE7l-1ek2ed9WP8,3082 +"vllm/model_executor/layers/fused_moe/configs/E=8,N=16384,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=LgHbxG1kQV36zZPkJcnurHYzwAjMh04lvEHEsfzS1t0,3732 +"vllm/model_executor/layers/fused_moe/configs/E=8,N=16384,device_name=AMD_Instinct_MI300X.json",sha256=_fcdkmWvdMqHiH8ZAGke-zXhH7qVPQx5CmKELW5hRCA,4735 +"vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=yTf2R9cngSf4OafucAYlDDn4-bftaMFKaY7qhaBZPqQ,3739 +"vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=AMD_Instinct_MI300X.json",sha256=_1eVE7ok935L2V43-3D3bVNWSVaoViia19sh0VrXmXM,4735 +"vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json",sha256=AffDc0_51ML8HiA3757zbD10TZJdUsUDIYIqO4g0yUw,3250 +"vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=IEYBNjt9HGnzoOVSWvL0A0jUqq926QD0_BvVYR4RA1Y,3252 +"vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=Ns9Y12aZbJnFhcG3nwb67bDqqiQAo9tdTAIe8K2Ajz4,3255 +"vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=6jRC0oOpVpq5c1xePFKNRy-Xtmb038i4LE9N2zao2W4,3730 +"vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=AMD_Instinct_MI300X.json",sha256=cFWeyNJtEbs-Bfohgzclxo1rcYGU863oV0BzJyQ4T0w,4734 +"vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=HOxWmCI2ifHmWc0or2y8nEen86jDeLDov1-tuMzuhxo,3256 +"vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=csHezh0HGWaNwrblGzMgcE95hqbqjWS8HImLRJYr_ts,3266 +"vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=_5weLBinQCDzyV75hHKIT95Y0ce94KWft2_5BC6EkbQ,3254 +"vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=ro3drDpWAdeXH7IjMvx8wYGhIuDPOl0bpbJaIB5Msns,3732 +"vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=AMD_Instinct_MI300X.json",sha256=qvGJh6BN6FJ9T-kR_ttpu9D9csWmDUfGYCAmo39kQsY,4734 +"vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json",sha256=Ru460ZgnUP4U8OsJfwF8n-AI-gfcolNR3_qzoxG6DtY,3254 +"vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=K6BGrKw_oHTAtHjsZldcjp-BUM1dIecKXrrRn9OpRGs,3254 +"vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=qqFoMaObuO8pFWcSb9q0wYsdC4eSCO7B-_ruQhR1N9M,3264 +"vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=-5nkLIunjG1ghPoUEtt2AXEQw9oGiilP7K3UvQv9CqE,3252 +"vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_L40S.json",sha256=qX5_yErBEwDRzhv2FvxrS3pEMa8zn0GHzLp5TUMX90g,3872 +"vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=ysRCWmxV20K2BYD9XEUtxwREFGtA3QHI191vHRA0k_Q,3733 +"vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=AMD_Instinct_MI300X.json",sha256=L8VA1sfygHoyLJ-Ybfs8DP5c0YWFmMkwxHT8yJ9PEFM,4732 +"vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=DxYu8regZOSFu8ugFGA_QbwWK4g8xwQUZF9a_nNY4Cs,3255 +"vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=obzfE_9XgsbFNfC9biYOHxR-V_Bgc7PKT8qZZJaiJJc,3262 +"vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=qwKy8oaMsd3QrXgQbM_x9xcfYiHK_Ou1CEwDPL5Gbgo,3259 +"vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=QsR-Xr9vyuiArMTSo-dX-1DFgATfqwIGOzFuQJAuE_Y,3734 +"vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=AMD_Instinct_MI300X.json",sha256=halUboffSrtXJ09gUM6BTDySLpO_pSvxnYHqSFleyoY,4735 +"vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=BAJnXTZoewwCtzJLUPJ0oYuALv640MvDuLseGcsYaaw,3252 +"vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=-Tj7ImS6ZFDof_0VTyq7kVm8XD9B54RD6CUOPSf3Jjg,3265 +"vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=tme0ydWzIxdABZLk4tU8G_X2dJUYGGZNkQzNGcmcvUc,3261 +"vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=kF4Fx0yHUmiMSLFNXT6xqAEA4AgCaHOoy_3irv4dNss,3732 +"vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=AMD_Instinct_MI300X.json",sha256=uOlVzTdJl_4VrRK4wmxIb8JKfveFZRjO9syjw_oEeL0,4732 +"vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=sY2nWMPh9lsIkhPCjkHO245wpnfFbrHmzdcZDVFPVww,3265 +vllm/model_executor/layers/fused_moe/fused_marlin_moe.py,sha256=dzv596s3Dz0IvNrzQRY2MOVtCiHB9V3V9BnZ2VBR0K0,12629 +vllm/model_executor/layers/fused_moe/fused_moe.py,sha256=FB7G9i3NwUdQ2AXCqn2p8nsT1N-1Q-XfVfzyM2xWCE4,53611 +vllm/model_executor/layers/fused_moe/layer.py,sha256=XB93XjY8-muIxoC77lN5ErToiG6ZWEY-FPEX3XybEkM,27237 +vllm/model_executor/layers/fused_moe/moe_pallas.py,sha256=20nVD5HOmg1D9Jol0k4IS4PF_k6QIUIZBGIUxPNlrIE,2338 +vllm/model_executor/layers/fused_moe/moe_torch_iterative.py,sha256=EifnhcLw46jB1DJLbXh06LKg3U54Tt-k3RH7DZRwo08,1884 +vllm/model_executor/layers/layernorm.py,sha256=iQ5B9JX2r-Ir56o0LMmsqwEipOJNaBPCaVFM4JiRXes,6879 +vllm/model_executor/layers/linear.py,sha256=Ago4DWmcYggy92QA4__7b-cnaddfqG0rHHxaDX1wRcA,51875 +vllm/model_executor/layers/logits_processor.py,sha256=EVOsKTlu7lSgUCCLQYCinqV4pA1c74NThDalG1kchDg,7685 +vllm/model_executor/layers/mamba/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +vllm/model_executor/layers/mamba/__pycache__/__init__.cpython-311.pyc,, +vllm/model_executor/layers/mamba/__pycache__/mamba_mixer.cpython-311.pyc,, +vllm/model_executor/layers/mamba/mamba_mixer.py,sha256=qWLliA7YxXm5ZzZxLKsYW8hEZMyXyd6pzSkHvMBEMb0,10120 +vllm/model_executor/layers/mamba/ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +vllm/model_executor/layers/mamba/ops/__pycache__/__init__.cpython-311.pyc,, +vllm/model_executor/layers/mamba/ops/__pycache__/causal_conv1d.cpython-311.pyc,, +vllm/model_executor/layers/mamba/ops/__pycache__/mamba_ssm.cpython-311.pyc,, +vllm/model_executor/layers/mamba/ops/causal_conv1d.py,sha256=_ZiWUKMLApKDWDH8iB_8Zw_GGGAFHDJRxbcMWQlMYac,4470 +vllm/model_executor/layers/mamba/ops/mamba_ssm.py,sha256=HUDI4IAh17QrAYYLWU6oRVZixym8yFuTscmtifQaza4,14110 +vllm/model_executor/layers/pooler.py,sha256=1HUO5hcGpxoFE966sotGRF2SrLYGn_MOj4OpHLVrm1w,10904 +vllm/model_executor/layers/quantization/__init__.py,sha256=Zd8kVWjyyspQMRPcTVJ7ZAiW-I477qZ5KyIsvFlXtNw,4727 +vllm/model_executor/layers/quantization/__pycache__/__init__.cpython-311.pyc,, +vllm/model_executor/layers/quantization/__pycache__/aqlm.cpython-311.pyc,, +vllm/model_executor/layers/quantization/__pycache__/awq.cpython-311.pyc,, +vllm/model_executor/layers/quantization/__pycache__/awq_marlin.cpython-311.pyc,, +vllm/model_executor/layers/quantization/__pycache__/awq_triton.cpython-311.pyc,, +vllm/model_executor/layers/quantization/__pycache__/base_config.cpython-311.pyc,, +vllm/model_executor/layers/quantization/__pycache__/bitsandbytes.cpython-311.pyc,, +vllm/model_executor/layers/quantization/__pycache__/deepspeedfp.cpython-311.pyc,, +vllm/model_executor/layers/quantization/__pycache__/experts_int8.cpython-311.pyc,, +vllm/model_executor/layers/quantization/__pycache__/fbgemm_fp8.cpython-311.pyc,, +vllm/model_executor/layers/quantization/__pycache__/fp8.cpython-311.pyc,, +vllm/model_executor/layers/quantization/__pycache__/gguf.cpython-311.pyc,, +vllm/model_executor/layers/quantization/__pycache__/gptq.cpython-311.pyc,, +vllm/model_executor/layers/quantization/__pycache__/gptq_marlin.cpython-311.pyc,, +vllm/model_executor/layers/quantization/__pycache__/gptq_marlin_24.cpython-311.pyc,, +vllm/model_executor/layers/quantization/__pycache__/hqq_marlin.cpython-311.pyc,, +vllm/model_executor/layers/quantization/__pycache__/ipex_quant.cpython-311.pyc,, +vllm/model_executor/layers/quantization/__pycache__/kv_cache.cpython-311.pyc,, +vllm/model_executor/layers/quantization/__pycache__/marlin.cpython-311.pyc,, +vllm/model_executor/layers/quantization/__pycache__/modelopt.cpython-311.pyc,, +vllm/model_executor/layers/quantization/__pycache__/moe_wna16.cpython-311.pyc,, +vllm/model_executor/layers/quantization/__pycache__/neuron_quant.cpython-311.pyc,, +vllm/model_executor/layers/quantization/__pycache__/qqq.cpython-311.pyc,, +vllm/model_executor/layers/quantization/__pycache__/schema.cpython-311.pyc,, +vllm/model_executor/layers/quantization/__pycache__/tpu_int8.cpython-311.pyc,, +vllm/model_executor/layers/quantization/aqlm.py,sha256=1cox6rKyx40kaL_arjH74cv-MXzEmY1V9HnKsN4pdxw,13605 +vllm/model_executor/layers/quantization/awq.py,sha256=VqBBjy3nSkJTDTxuxNZ4d_Vp9iivJuvjRdqSRfIDZXQ,7050 +vllm/model_executor/layers/quantization/awq_marlin.py,sha256=oZBIvc5HDVqqyt-iFUYsRydryhqUQmemK-iRl2a_ESw,19337 +vllm/model_executor/layers/quantization/awq_triton.py,sha256=SNr8Xro-iH8IrPJBvGdI6R_gsV0QmNnEzWOZhx2p014,12415 +vllm/model_executor/layers/quantization/base_config.py,sha256=nPAlJhsuJvfSIZmNQJDsCaYsWqpfc_zBMDJbrlM_KGk,4915 +vllm/model_executor/layers/quantization/bitsandbytes.py,sha256=cUbub--j-HGzlvlyJ5k5PN2EHYdwYmNhIniF2S2cdEI,14200 +vllm/model_executor/layers/quantization/compressed_tensors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +vllm/model_executor/layers/quantization/compressed_tensors/__pycache__/__init__.cpython-311.pyc,, +vllm/model_executor/layers/quantization/compressed_tensors/__pycache__/compressed_tensors.cpython-311.pyc,, +vllm/model_executor/layers/quantization/compressed_tensors/__pycache__/compressed_tensors_moe.cpython-311.pyc,, +vllm/model_executor/layers/quantization/compressed_tensors/__pycache__/triton_scaled_mm.cpython-311.pyc,, +vllm/model_executor/layers/quantization/compressed_tensors/__pycache__/utils.cpython-311.pyc,, +vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py,sha256=mBpPmIJ400gFYIgWDIaRQkE133j5PPlY4lvdzHI00m0,26758 +vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py,sha256=DAP3-1sKqdchMfhxaE6-3v_f-X129odEVObJPNMvKLQ,24809 +vllm/model_executor/layers/quantization/compressed_tensors/schemes/__init__.py,sha256=BwKgT5i-Gu1d45Z72x5PUP3Yect8hO37yW9rfUeIoug,931 +vllm/model_executor/layers/quantization/compressed_tensors/schemes/__pycache__/__init__.cpython-311.pyc,, +vllm/model_executor/layers/quantization/compressed_tensors/schemes/__pycache__/compressed_tensors_24.cpython-311.pyc,, +vllm/model_executor/layers/quantization/compressed_tensors/schemes/__pycache__/compressed_tensors_scheme.cpython-311.pyc,, +vllm/model_executor/layers/quantization/compressed_tensors/schemes/__pycache__/compressed_tensors_w4a16_24.cpython-311.pyc,, +vllm/model_executor/layers/quantization/compressed_tensors/schemes/__pycache__/compressed_tensors_w8a16_fp8.cpython-311.pyc,, +vllm/model_executor/layers/quantization/compressed_tensors/schemes/__pycache__/compressed_tensors_w8a8_fp8.cpython-311.pyc,, +vllm/model_executor/layers/quantization/compressed_tensors/schemes/__pycache__/compressed_tensors_w8a8_int8.cpython-311.pyc,, +vllm/model_executor/layers/quantization/compressed_tensors/schemes/__pycache__/compressed_tensors_wNa16.cpython-311.pyc,, +vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_24.py,sha256=l0iiT1vCWYwqAvw5RhDGRQS-mJsWGSdMWHOxLIr1hF0,13882 +vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_scheme.py,sha256=Jxb5WOHR4m98DRO4y3XKJMDpT_NIp3EwkAJe8HYqwqE,1527 +vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w4a16_24.py,sha256=v0u6xXo4zJ530YWbnnJXbJFoMrjUVveF6z4HVKhCqrk,6207 +vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a16_fp8.py,sha256=GmDa7Kwrqj7Ja5-VZ7DRY8MTcmPChOKOK7ku3gguLT8,5409 +vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py,sha256=5XQvPJ73jkBemtF6JXyazqzX9xOtGAX_Aizy0cHqmuw,6284 +vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_int8.py,sha256=yns8-JTL4TM6boMggXk8E-yOalJgQ9O1neAs57oMh3g,4872 +vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_wNa16.py,sha256=N1GHQwN38JcHGDNC0oll59pJePaW1rqb6aZL8QS9LhY,7002 +vllm/model_executor/layers/quantization/compressed_tensors/triton_scaled_mm.py,sha256=gMLaK9uSOVqYw1wv8klnGLc1qass2K-DnpWTSm0rvuM,7731 +vllm/model_executor/layers/quantization/compressed_tensors/utils.py,sha256=YSJeIjPjkJ_dZIzKEnGGz419L7rC3HJnvhhohySbntE,7707 +vllm/model_executor/layers/quantization/deepspeedfp.py,sha256=4uu33p5aLq8QixKMuZSEd0sM3NnXKkD5iiYKpi7zQQc,7114 +vllm/model_executor/layers/quantization/experts_int8.py,sha256=6Xh4yzbPX6JHcznITDM-YoHn1E20YgnZe3smH5lMlr4,7286 +vllm/model_executor/layers/quantization/fbgemm_fp8.py,sha256=FUdGx9LV6nQU40_CBDgft1e7AC_3TZwcWjeKN8as5Vw,6499 +vllm/model_executor/layers/quantization/fp8.py,sha256=Q50CyRsgMuB6juYhikCqrLh1BIK56Xeu9QPTGogTgkU,32180 +vllm/model_executor/layers/quantization/gguf.py,sha256=ekv5leaAakl_6WTCuX9QaQTupRMHr7U-cp8ihT4b95Q,8290 +vllm/model_executor/layers/quantization/gptq.py,sha256=fj3aQlLROaRMEAf4vy1oOQXNhlT0Y8hDj95eQUGoXOw,9221 +vllm/model_executor/layers/quantization/gptq_marlin.py,sha256=_mOx7VYh814UD4WDYyF3DhzvGY9RqArPHe2yaQodXjI,22372 +vllm/model_executor/layers/quantization/gptq_marlin_24.py,sha256=zo-mfwHb1ZQbCov9hMs4VNC1hOT5J6UxYxXW7eVptmY,10854 +vllm/model_executor/layers/quantization/hqq_marlin.py,sha256=3sy-hO8XoTRocSn0y85OZYK-fd9w9dZy5uQbU63oYyg,12695 +vllm/model_executor/layers/quantization/ipex_quant.py,sha256=DcwEYr0Xp4pVq6TKCbF3Hcc8fBNz2uQKfL94AITEr_g,9734 +vllm/model_executor/layers/quantization/kernels/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +vllm/model_executor/layers/quantization/kernels/__pycache__/__init__.cpython-311.pyc,, +vllm/model_executor/layers/quantization/kernels/mixed_precision/MPLinearKernel.py,sha256=Qx5iCPjsnW69RjhPY6qRRsTWddK4gf4sbrgh6czvtYE,2879 +vllm/model_executor/layers/quantization/kernels/mixed_precision/__init__.py,sha256=uvXWMKZhuGumUmlRUDGY7l-f79hXVDk7LQ2CifWsVNA,2846 +vllm/model_executor/layers/quantization/kernels/mixed_precision/__pycache__/MPLinearKernel.cpython-311.pyc,, +vllm/model_executor/layers/quantization/kernels/mixed_precision/__pycache__/__init__.cpython-311.pyc,, +vllm/model_executor/layers/quantization/kernels/mixed_precision/__pycache__/exllama.cpython-311.pyc,, +vllm/model_executor/layers/quantization/kernels/mixed_precision/__pycache__/machete.cpython-311.pyc,, +vllm/model_executor/layers/quantization/kernels/mixed_precision/__pycache__/marlin.cpython-311.pyc,, +vllm/model_executor/layers/quantization/kernels/mixed_precision/exllama.py,sha256=pqVO7rcPUT28KaVAoYJ1e96JSF18scX--uz_WprnUKI,6151 +vllm/model_executor/layers/quantization/kernels/mixed_precision/machete.py,sha256=JBJ8VeQ7zR8rGRAJO3IzZGiwItnCCqMup1eWOFMD9os,5206 +vllm/model_executor/layers/quantization/kernels/mixed_precision/marlin.py,sha256=GZuxk-7xiEVT5uasGIvPPYgvKtaHnoETGdokTLHZTrk,5893 +vllm/model_executor/layers/quantization/kernels/scaled_mm/ScaledMMLinearKernel.py,sha256=EKNHGvrBoiElOpU-LOa4V0pui-MkLDcwxE9AScv9Sdc,2046 +vllm/model_executor/layers/quantization/kernels/scaled_mm/__init__.py,sha256=1_Q-qSl5JYKiQ48_nvuNar-LoCgp4zYmk3G2N4cyzsM,3321 +vllm/model_executor/layers/quantization/kernels/scaled_mm/__pycache__/ScaledMMLinearKernel.cpython-311.pyc,, +vllm/model_executor/layers/quantization/kernels/scaled_mm/__pycache__/__init__.cpython-311.pyc,, +vllm/model_executor/layers/quantization/kernels/scaled_mm/__pycache__/cutlass.cpython-311.pyc,, +vllm/model_executor/layers/quantization/kernels/scaled_mm/__pycache__/triton.cpython-311.pyc,, +vllm/model_executor/layers/quantization/kernels/scaled_mm/__pycache__/xla.cpython-311.pyc,, +vllm/model_executor/layers/quantization/kernels/scaled_mm/cutlass.py,sha256=-xzsBNkRLZ9WnMq4Iq2YvXjuSDMtmNdP2oEa4QGJIcE,5989 +vllm/model_executor/layers/quantization/kernels/scaled_mm/triton.py,sha256=3ssC8LJcQ-iPtlxEdoAZfPngiAuCaAy3-RWuBGz3ldE,1283 +vllm/model_executor/layers/quantization/kernels/scaled_mm/xla.py,sha256=DVQP94zjroZjb_ZQLS6lXXLtm50OOAMbFz0y-imjwSo,4199 +vllm/model_executor/layers/quantization/kv_cache.py,sha256=V4TdygQR6dm3Om95Z_KctJi71jK5GMghvUIc_o4SlhU,4077 +vllm/model_executor/layers/quantization/marlin.py,sha256=2kmoUfynMW0rZtCwXFwUPktdD5gB4-yerAI67S1LUCY,9590 +vllm/model_executor/layers/quantization/modelopt.py,sha256=1PgzYMw6TFEF7tlyZpTo80YsciEOFRJwq-_46SRsih4,6526 +vllm/model_executor/layers/quantization/moe_wna16.py,sha256=SKJtvXddNPokNAQcCKJ5BcoihdJkr6SOdNjPpCN2Lbs,18735 +vllm/model_executor/layers/quantization/neuron_quant.py,sha256=BUvTpDhFVLByd_mr8yRrenVfjfrMRvlCXj-MyOyIdUc,2392 +vllm/model_executor/layers/quantization/qqq.py,sha256=NewuQ2IZRI-SGWPTYktbt_ue7D8n3F8JkZnAj1RFgWo,9911 +vllm/model_executor/layers/quantization/quark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +vllm/model_executor/layers/quantization/quark/__pycache__/__init__.cpython-311.pyc,, +vllm/model_executor/layers/quantization/quark/__pycache__/quark.cpython-311.pyc,, +vllm/model_executor/layers/quantization/quark/__pycache__/quark_moe.cpython-311.pyc,, +vllm/model_executor/layers/quantization/quark/__pycache__/utils.cpython-311.pyc,, +vllm/model_executor/layers/quantization/quark/quark.py,sha256=soCfkI9IkQL217riDEHPmtXH0f-QHKcXmRXMgx2bUKw,16724 +vllm/model_executor/layers/quantization/quark/quark_moe.py,sha256=eHbLgLdjKSiD7hn1JS75Uy_15jEd_tJ4NjHnwBCSXAo,10660 +vllm/model_executor/layers/quantization/quark/schemes/__init__.py,sha256=uEyTk05b5tNt-oEmSSkySwnWJ87o-oVphnSJoTf3MAM,221 +vllm/model_executor/layers/quantization/quark/schemes/__pycache__/__init__.cpython-311.pyc,, +vllm/model_executor/layers/quantization/quark/schemes/__pycache__/quark_scheme.cpython-311.pyc,, +vllm/model_executor/layers/quantization/quark/schemes/__pycache__/quark_w8a8_fp8.cpython-311.pyc,, +vllm/model_executor/layers/quantization/quark/schemes/__pycache__/quark_w8a8_int8.cpython-311.pyc,, +vllm/model_executor/layers/quantization/quark/schemes/quark_scheme.py,sha256=f_5qRmZ3SZvIgBWiMlNjfu-WN1uVwSvfLGFe9QS25R4,1491 +vllm/model_executor/layers/quantization/quark/schemes/quark_w8a8_fp8.py,sha256=KjxRZaD35GI3zckZVZUDib2ij7G8y1erbMWrJG7LmIQ,5935 +vllm/model_executor/layers/quantization/quark/schemes/quark_w8a8_int8.py,sha256=KKth7fai8d27MgtYbMCwF14ULY57Rp72JuBme4UWH3E,4677 +vllm/model_executor/layers/quantization/quark/utils.py,sha256=9VCOhQ_qNa31Se-LBuV11m1_HXNV4Fp8UJU9q0pRljM,3558 +vllm/model_executor/layers/quantization/schema.py,sha256=vb8XZXdDPgY437o96uNlQOUDhPoGeowrlwO1b4QdugE,3686 +vllm/model_executor/layers/quantization/tpu_int8.py,sha256=waQV-CegANwNFpD3ipO4mibocgLjZAVFubvOi1c4Omk,4416 +vllm/model_executor/layers/quantization/utils/__init__.py,sha256=VbdLnvlGCFpa2o9SRnEMflRyJ3NOXd6j6d1fPN_xm5w,166 +vllm/model_executor/layers/quantization/utils/__pycache__/__init__.cpython-311.pyc,, +vllm/model_executor/layers/quantization/utils/__pycache__/fp8_utils.cpython-311.pyc,, +vllm/model_executor/layers/quantization/utils/__pycache__/layer_utils.cpython-311.pyc,, +vllm/model_executor/layers/quantization/utils/__pycache__/machete_utils.cpython-311.pyc,, +vllm/model_executor/layers/quantization/utils/__pycache__/marlin_utils.cpython-311.pyc,, +vllm/model_executor/layers/quantization/utils/__pycache__/marlin_utils_fp8.cpython-311.pyc,, +vllm/model_executor/layers/quantization/utils/__pycache__/marlin_utils_test.cpython-311.pyc,, +vllm/model_executor/layers/quantization/utils/__pycache__/marlin_utils_test_24.cpython-311.pyc,, +vllm/model_executor/layers/quantization/utils/__pycache__/marlin_utils_test_qqq.cpython-311.pyc,, +vllm/model_executor/layers/quantization/utils/__pycache__/quant_utils.cpython-311.pyc,, +vllm/model_executor/layers/quantization/utils/__pycache__/w8a8_utils.cpython-311.pyc,, +"vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=tkLjwLC_aVXhzuvo-2QHkojXZauPJsf3jNHFn1S7uRA,3244 +"vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=Qoj9rLLRDbKM4IKBCXvN8RcxzSmNPd0TQUiM7CXDqHI,3241 +"vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=4D3Ku4y7BCVEJzueKvQC_KvOR026w3ONWsxfsA_YrEc,3249 +"vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=7v4tp0RaT4vxF4urSBrkK5FR_5ikeFQ1htF3DwDl1lk,3249 +"vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=0J2MFgaLkv-mfVE5x363lgVKYU6miLG_xRO3tJUga_M,3249 +"vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=4ubbhwSFX_XbefRLEkLoWxJkcetFWPzsszPu0X3_Wrw,3242 +"vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=FhyniGTx5QeCuVrBSVTQys6q05Pr5lPEcPykpAX7Iyo,3247 +"vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=0v17v78pETXv6S2ZoibekxOVhiTmCm807DYG4DONUck,3259 +"vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=I44PvJj758-sw_fCOVROLTpG0NQ5_5PCYyQcpZC1YSY,3259 +"vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=-CVHqClROli9FWe_FnlnuAG2LiFivDFK_nghH6t-BWc,3261 +"vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=GsLoYkaZ2p4Qu0Coj-X90s7JWyfZBOloIHPlyNKSIes,3246 +"vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=8zuJhFdd6aXREpiqPFhIKEFWA5lgLVGrG0-a9UXcBqk,3262 +"vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=Zn1TvhAoPOv0zQBYHOZhwdDw3oqyxm0zIa7IJkTCHpo,3247 +"vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=yqjO7zML7EseBJw6Bn5MTyHeAitkPsl1dndXeL6Rn6A,3257 +"vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=-nQIhKAOVCQrxLV6HDlcD0V8HMWvqrv-vyiORVU7qls,3244 +"vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=M3nwpZd2-0w263ywZt9gaw53z7MN673T5tl4tc43Ntk,3249 +"vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=vLoV3JMtvHOKpR5D1BeCQPMuYlWUAlrXu54gByNkwKY,3266 +"vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=Mtw7a9BSspj2TzC-aPxE82o1LEvwzgbUuIofwRxUNA0,3263 +"vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=NHdx3tZnfLF7NplswMzcTRbQEQFLtChg4rd7GU9lMbM,3262 +"vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=laYeH4w0iZOj2Yg3vDgtKoroNQnwBEX4GUGLrO9095I,3260 +"vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=TWcPDZ2miQMD6OWDC1FteRs80ND9RC-oJL3PLVmJbtI,3257 +"vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=6Z7kIa14RjVq3ek_C15q5mUu1IrY2r0OP8S-_pm-MYU,3252 +"vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=r63SZkUJJV87B00hAX074_uaC7wwQXdurlJsB1jUA0I,3254 +"vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=xBhxdCFf3waTUsLxJxA54R90zODbC_DKI3XXBVKjKRw,3252 +"vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=O_SV2vo_oaABfT6Mxqcmo12pnhKtfX4TnXfe02OcHJk,3254 +"vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=g12Xkurat7oUS7LdS9pHLKFlur4_FaMGiGBvdq-iBCs,3242 +"vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=tFdrY5nADmXUlShdN8w8Jzkxuj_RPLXCRceX9FhQ35E,3251 +"vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=TO2qRGmp37v53Zqu8Joeq_BSbtwM_mpVoozGyoNg0-o,3254 +"vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=0xquf00fgfrDODpaxyre0VDcjqfzqExj939rzeJ8pMo,3244 +"vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=qG6v3n3qF6LE2DdGT-mDIXecZ1a7vg7p3QqXYCMX85k,3254 +"vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=EgFTGyW_YuDwyEDUCoGglyI1ETdj9J7AR0UfJ86jMoI,3249 +"vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=NiorJgOotxkQcP49ID3z5al1UA4QQDrT8MvbCwAWL5Y,3248 +"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=bPQWtvaJrzOOIgI-R-MIxs_f4yC_FobkDydu3OkOFtg,3252 +"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=ZRgiuHZ2SFC6u-WV5DGwau4k1RiPLI67eENO0e-5Ylg,3253 +"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=-hP_P8NM0K04mGzTmpGBNibQ5xxh5gPz5WtoMXhoz1E,3253 +"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=FB5Le4obvPoCgFSnC_3-Uh59n-Mt4Rol8saXVcK3RPw,3252 +"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=kLviGvVngpgOuelfKtvv9Is7MWQ89rGxlomMRP6t0Ic,3250 +"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=vlys0Zi_CaaU41OHGbWSBtbVglFi98bgqEySBMc9Sdg,3258 +"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=YWyByOlKSqp5lbcUa8eu6N2dHRKJqJDbCDSjdDQJngg,3249 +"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=j5PTW0IC4Z2yQIygcdICaOsvb639u6Mv-ZpJYkrBQ2k,3254 +"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Ggy4hejkcWjiw5Bi-wGzSP5JLVuvOjip_rbjXFBJZbs,3257 +"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=Xy4mgZx5iiEvuv2ydO4dFNIT8s0jgBhNHE1vu93fGJM,3250 +"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=x476nFeltB_2iO9_6y-z2P_unAbh7ghLPFi5z2LOTOo,3253 +"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=4uWiQMh3cZY_EtLA0a3PU8Z1VCunF2PpolTPYeP9Rjo,3256 +"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=D0moiKqS73oril32iNj5gRJUWpT2SZ5jf-ZesUZnNv4,3254 +"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=N37dUL_J2JVpgLFlnlz__Ck7Z4njROnNAO8V2oiDqr8,3253 +"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=LdtOyXsA9r18GiFkmDOkiRinsDSZBZ8NYapL59EZ4iM,3264 +"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=07GarBHmiiYkyqn-qxEtrAcgCETuUbqm6HqlbH9yJi8,3252 +"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=kEuvCsW3YNByF-DALYqPZpW3TL8ZbtQ5gUNq7-8YvZ4,3252 +"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=PD4AJYCkHfy2ivv9baMouFXzBTy0eKMumbAfxfm91HI,3256 +"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=FFBjSWlpKXMxfAUUYUqXbOK_Hd7qBeBsfbcaa9uB4qY,3249 +vllm/model_executor/layers/quantization/utils/fp8_utils.py,sha256=u49_10Un3mKV9NICpkoXbMFGDu_tlZJ542_BG1WMAX8,18523 +vllm/model_executor/layers/quantization/utils/layer_utils.py,sha256=HmjtrTYHbc5u6kKfE8cnu5O2Nqg9ZyRy9BAGHYgenDY,1562 +vllm/model_executor/layers/quantization/utils/machete_utils.py,sha256=OeMLMQDbifbxz-HRYmjrC4RnMOutos17544P-h_dNE8,1074 +vllm/model_executor/layers/quantization/utils/marlin_utils.py,sha256=nTnBnKDkzatlScXWVfGATpg9YMkttp4e-m7Y63wDHHc,13678 +vllm/model_executor/layers/quantization/utils/marlin_utils_fp8.py,sha256=bASNhLOrC-7AaC-xGUIQiDqUx0HRWPX1liD4iqzjas8,3672 +vllm/model_executor/layers/quantization/utils/marlin_utils_test.py,sha256=HkXMXXwpP4mfxpWI8p6BRjoq2ipVGpWLe1eWYSYxi-c,5311 +vllm/model_executor/layers/quantization/utils/marlin_utils_test_24.py,sha256=zk_DLYsG7n7Y9mvtEkY89fXSH9kPEsuPLfbX0W38DEA,17558 +vllm/model_executor/layers/quantization/utils/marlin_utils_test_qqq.py,sha256=ybGP1JEn-Mti_vvEs8Ch4cl1NfRK4TMDxdYWEYZSvxI,4101 +vllm/model_executor/layers/quantization/utils/quant_utils.py,sha256=XOrnnp5RJdyfggUTrMsjR4OBPaNhfTd55HvoZw5plt0,19458 +vllm/model_executor/layers/quantization/utils/w8a8_utils.py,sha256=WQKrSvdkHem-mVpAs19APMpyTRVadmA90DTKHVF8rS8,9582 +vllm/model_executor/layers/rejection_sampler.py,sha256=4Rp-lgHMcZ4JFZs2ujBG2Rmm2FmPoGbq-ZbxQW_cVeU,16469 +vllm/model_executor/layers/resampler.py,sha256=fA3oc51Ku2jW2orqWVX0voeMc1zSooV6EyGteRIvGAs,10444 +vllm/model_executor/layers/rotary_embedding.py,sha256=lfv76Vwhfh5LvqzK6NBMV-R9Cot3mRYmcJhnqiAHV44,43994 +vllm/model_executor/layers/sampler.py,sha256=xsB75r9N0NPJa74kofiqXJ13koDs-Mkd_BP9F_jvSys,53762 +vllm/model_executor/layers/spec_decode_base_sampler.py,sha256=J_RiMvR0LACXjsTufxD3w4s4wVZtZ9waeDd6XEFqRkE,10119 +vllm/model_executor/layers/typical_acceptance_sampler.py,sha256=uzlrDmPtV19Mv17iJIceBU4Lj9IiJ1M3kXTxZ_y0mek,7047 +vllm/model_executor/layers/utils.py,sha256=PyCRuK3cnHx31HgQ2kiOt-Fx3Stm0O3q6ACOfVia8Qc,2674 +vllm/model_executor/layers/vocab_parallel_embedding.py,sha256=cgtcGzxKQ-JdiV3-z_22LdCHqm6SFaElmdZWTII2u2Q,22621 +vllm/model_executor/model_loader/__init__.py,sha256=SCcBIQqgBnp-uob0RHRC2La8mElcfAkXaJ_jwPLbxWI,646 +vllm/model_executor/model_loader/__pycache__/__init__.cpython-311.pyc,, +vllm/model_executor/model_loader/__pycache__/loader.cpython-311.pyc,, +vllm/model_executor/model_loader/__pycache__/neuron.cpython-311.pyc,, +vllm/model_executor/model_loader/__pycache__/openvino.cpython-311.pyc,, +vllm/model_executor/model_loader/__pycache__/tensorizer.cpython-311.pyc,, +vllm/model_executor/model_loader/__pycache__/utils.cpython-311.pyc,, +vllm/model_executor/model_loader/__pycache__/weight_utils.cpython-311.pyc,, +vllm/model_executor/model_loader/loader.py,sha256=h8V8Tw1q8s_EL-1mXxvZn1mk_riop7d3Kmo1JFr2mkc,62460 +vllm/model_executor/model_loader/neuron.py,sha256=vfxSDDhrVRrBw5KsJaO0ICGXnf5ZlnEECd0E8Me8ETc,8328 +vllm/model_executor/model_loader/openvino.py,sha256=SnzygL5Em3WQ410wHLF099T8cngQ8K9kpwBP9CtvG_c,7716 +vllm/model_executor/model_loader/tensorizer.py,sha256=IrDTxwTz43NIJxiWJ-Ravu3iw20O3Opv-xKntAW4oUs,20260 +vllm/model_executor/model_loader/utils.py,sha256=ULvJOHzFoGMZlVPYU7iIilttSWPYknFgaFRGqZnc1LI,6513 +vllm/model_executor/model_loader/weight_utils.py,sha256=xlqPpwJ69zA9xPuL-ZtObfqZ2XJZh3a6EBpbGAyCChs,26237 +vllm/model_executor/models/__init__.py,sha256=VA7jLZHGmibl55M5pQLfaW-FwlTmlEBOUzlQ1PWwqmQ,758 +vllm/model_executor/models/__pycache__/__init__.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/adapters.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/arctic.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/aria.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/baichuan.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/bart.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/bert.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/blip.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/blip2.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/bloom.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/chameleon.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/chatglm.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/clip.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/commandr.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/dbrx.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/decilm.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/deepseek.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/deepseek_v2.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/deepseek_vl2.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/eagle.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/exaone.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/fairseq2_llama.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/falcon.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/florence2.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/fuyu.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/gemma.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/gemma2.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/glm.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/glm4_vision_encoder.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/gpt2.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/gpt_bigcode.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/gpt_j.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/gpt_neox.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/granite.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/granitemoe.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/gritlm.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/h2ovl.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/idefics2_vision_model.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/idefics3.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/interfaces.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/interfaces_base.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/intern_vit.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/internlm2.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/internlm2_ve.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/internvl.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/jais.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/jamba.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/llama.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/llava.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/llava_next.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/llava_next_video.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/llava_onevision.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/mamba.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/mamba_cache.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/medusa.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/minicpm.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/minicpm3.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/minicpmo.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/minicpmv.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/mixtral.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/mixtral_quant.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/mllama.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/mlp_speculator.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/module_mapping.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/molmo.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/mpt.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/nemotron.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/nvlm_d.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/olmo.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/olmo2.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/olmoe.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/opt.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/orion.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/paligemma.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/persimmon.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/phi.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/phi3.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/phi3_small.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/phi3v.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/phimoe.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/pixtral.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/qwen.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/qwen2.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/qwen2_5_vl.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/qwen2_audio.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/qwen2_moe.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/qwen2_rm.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/qwen2_vl.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/registry.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/roberta.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/siglip.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/solar.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/stablelm.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/starcoder2.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/telechat2.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/transformers.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/ultravox.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/utils.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/vision.cpython-311.pyc,, +vllm/model_executor/models/__pycache__/whisper.cpython-311.pyc,, +vllm/model_executor/models/adapters.py,sha256=1G3hLCo2DSoGAbjE-Os-RQM5DIuw6OmHzZIE7FSYoUU,8341 +vllm/model_executor/models/arctic.py,sha256=UzPduVU4GRJlHUbBasCAcBwjFKibdJY5WCSa5UJjLQ8,25325 +vllm/model_executor/models/aria.py,sha256=ude6Rto5mTho80WtnQmzUXaJwKgs3Y5ivl85ZpCiD5Y,25496 +vllm/model_executor/models/baichuan.py,sha256=_H8q99d7T36sQRcI9egEoLapJdUvxZLlIXtdplWFqso,19452 +vllm/model_executor/models/bart.py,sha256=F0VurjNMn17VER5qobIqGFMmRCif7KHsB3VRNJkZpu8,36581 +vllm/model_executor/models/bert.py,sha256=i5G_BURorjAhXaqmPbFJSw-mAgFc7Wr09n0vdVON96I,21005 +vllm/model_executor/models/blip.py,sha256=E0ty49zD_wOmBZmCJwCunXurCfXFD7CbROUVs9e37Zc,12190 +vllm/model_executor/models/blip2.py,sha256=pNMbpa1FBFWjGlC0sgaicKYDnJ4FshYRkioUOv2G_cE,26212 +vllm/model_executor/models/bloom.py,sha256=enNsQf5c6iEqMRzmtNzuq4nhx883ieWoThGwRNXP5-s,14907 +vllm/model_executor/models/chameleon.py,sha256=a5k-u03agHgE723AKuYA9l9dmzwbHXTzJOyzNetqBeI,46254 +vllm/model_executor/models/chatglm.py,sha256=pMkV4vvjMlGAcVxLaVG5wiS21tPo3l5BWt24IRR64do,29943 +vllm/model_executor/models/clip.py,sha256=6YSE95hvt9R2QpnR1DxILvDJemIEzOjxiYIqLUbrCmI,19805 +vllm/model_executor/models/commandr.py,sha256=UlSbMeZfsna8oz2QwZEdp6jo65h3LAzB4owlLj8jw8M,19966 +vllm/model_executor/models/dbrx.py,sha256=VJqlpmeRaRK2fCCWo0VwkCzCRQ8tf3M5NmySNrtwE78,19242 +vllm/model_executor/models/decilm.py,sha256=eMmIM3LUlTkZh9EnNksdBJySO9cjP39LE8AAT4aGM_o,5380 +vllm/model_executor/models/deepseek.py,sha256=_Xe3Xr5tY3ek0KAPrTFC56ZF7veNwiVvMhG5K3nW8y8,20583 +vllm/model_executor/models/deepseek_v2.py,sha256=1PyjHQ-GQK1yTX3u68uQEOUklUe0BIxHC8XNJA4Dpfw,34275 +vllm/model_executor/models/deepseek_vl2.py,sha256=P7_ZwvsNS_HZC8bfcd0xqD2NMhOaIsvVQc6YMzTuHLM,25033 +vllm/model_executor/models/eagle.py,sha256=KjspfLcYIcSMFahuyoRyzn_-ebSrowojbwmlqdMvRQM,9319 +vllm/model_executor/models/exaone.py,sha256=rwdJICbgSB0GBMuVBr03r0BKQ_zXn_0aMpr25PjGIdU,22018 +vllm/model_executor/models/fairseq2_llama.py,sha256=J983ipTj1VNjEtr8AO5zse78kpdRqnL2WzUIC-gRZSU,6489 +vllm/model_executor/models/falcon.py,sha256=44i1htfX_jyLqApu2_irj278swH1hP-e0PTf7aDEXrg,22073 +vllm/model_executor/models/florence2.py,sha256=WK2yDQvFT4LbNi5TUu48y0f1EirMsXqzlnno70iRcWQ,10131 +vllm/model_executor/models/fuyu.py,sha256=TzFjXm8_vz0VFk8JB7aHIve66h6aA-ryUAN8ofEsJDM,14696 +vllm/model_executor/models/gemma.py,sha256=Y0BP-sz0wExJp4sZwB9PUGvrACC9obzkwAU4EYA1CM0,17496 +vllm/model_executor/models/gemma2.py,sha256=-kLNtndPqtOoeOENch4iK9oVRPZ2bYMQ2vThWtLIeZI,18886 +vllm/model_executor/models/glm.py,sha256=cXlSy28_9DLF7_n9QGsXSOmA449pnA7d0TeyHW457DM,979 +vllm/model_executor/models/glm4_vision_encoder.py,sha256=tqh9uA0P2f5s87QzsV6dID3hL01ErA5-1TUBsOO1KfU,10456 +vllm/model_executor/models/gpt2.py,sha256=FaR_kRUZx3FhTmNcVDFh49gKQTXgsmqSsd3p0gtbfog,13367 +vllm/model_executor/models/gpt_bigcode.py,sha256=VZwjWG4xhlARF-gXT-lCKysSlggI_VYQCTxEUXIWtQw,13954 +vllm/model_executor/models/gpt_j.py,sha256=azmOuWE5n6sriGJ3FZbuDzbnVSLaREOhH1LC_eVGIcY,13875 +vllm/model_executor/models/gpt_neox.py,sha256=feEujPn2yPNXN0wSkBtyVfHfc8RJr-Iw2f_0ngnEIxk,14051 +vllm/model_executor/models/granite.py,sha256=VXloy_FH-B2Ro4BXHDDprxo_k3t8PvV1Uaopm9_8nW0,21158 +vllm/model_executor/models/granitemoe.py,sha256=RPMq8An4Ym-XuiZ9fGuCxp3FKgFUvJft1VO1UGL_d4U,18817 +vllm/model_executor/models/gritlm.py,sha256=axmHQRxBCU6gLLpLu3Gs9c7hWqIAxC1UrL8RLM9kzZg,9152 +vllm/model_executor/models/h2ovl.py,sha256=N7cFONfGq7pL8124r6KhKK8lKagYu6iuJhJg8AeQhVs,18250 +vllm/model_executor/models/idefics2_vision_model.py,sha256=ayLc5O8hjSwBhmZRdnvx8dtTU8x3ejghzu3ftrOcGiI,13601 +vllm/model_executor/models/idefics3.py,sha256=SkShAmm1WSqpYEqHIFu2LMpKW1xBBaHCOhk7d_opnwU,27232 +vllm/model_executor/models/interfaces.py,sha256=FF6TgcN9qQcPKelRc8vjPj2lrycWEW6kGT2vgupFQ3c,12651 +vllm/model_executor/models/interfaces_base.py,sha256=wErHsx42tbVyXxcHajAWN85hoAsl4HwHGPg2Qo67LjA,4777 +vllm/model_executor/models/intern_vit.py,sha256=jcUq9Q6xp0BHdWs7Zgo-iDoUAiO3bcIrBsYrrGr6xsc,17332 +vllm/model_executor/models/internlm2.py,sha256=2rACIQ02NurhpuPAYKNp2x6iU81S12CBfhS0ro68uYc,18587 +vllm/model_executor/models/internlm2_ve.py,sha256=qRdTU06JySIA1DBtmASrnY3IGXUD_nhZsQERfodKtOA,6123 +vllm/model_executor/models/internvl.py,sha256=5kpoVq2Nq7UeevP0J2sTG6taN3m6LP4QPjPhmjE4Zis,33616 +vllm/model_executor/models/jais.py,sha256=lnvZ9BFwJG5pzv_HkfVB7aBeG1IrDo0RszCLhu1jxoU,15533 +vllm/model_executor/models/jamba.py,sha256=PooOuQRo7waQ8OJ4NDkiD2g_cgvPHL3Mbi-uRvxDHYg,26068 +vllm/model_executor/models/llama.py,sha256=LuFw4GLHl_b_N7n1JD_-4kNcILIH03nBatsrMU-Q1W4,24225 +vllm/model_executor/models/llava.py,sha256=iy4I7DY1LTVIM41Q4sCaZ3cbxiPguHt1X5gjrA8kzfA,31182 +vllm/model_executor/models/llava_next.py,sha256=K9P9ToMBvZnsvdJCD2ZjlsWxec1NTYbTetCGn7rCNQM,24222 +vllm/model_executor/models/llava_next_video.py,sha256=OvM5Gk-08YOvg-WAu8UTPnZhOIZBv7Nv7reXWP1s-Bg,18645 +vllm/model_executor/models/llava_onevision.py,sha256=FEfwfLoS-Ohy2hr0JmP3itHVsUj_88K60CJHh3_E64c,37227 +vllm/model_executor/models/mamba.py,sha256=-Hm6slyN380xsS1mdqLEdvjl7HqR6hlR7XfG022aIHM,12925 +vllm/model_executor/models/mamba_cache.py,sha256=lIXqrv7ge48MJp0LTe7QWeVHvlniEXfxeQMJrgGM5og,7080 +vllm/model_executor/models/medusa.py,sha256=c0jYTHJXo8FfueT0-y_yYjA7e91TIFz4PYRu7G5Kqlo,8558 +vllm/model_executor/models/minicpm.py,sha256=JOVHd0YE3olndaK2d4hXeXvELQj8VFADj-XfPiGaD78,25057 +vllm/model_executor/models/minicpm3.py,sha256=bnK4HEJbD-OEyUZVRxudTvfFmTPQJ4QtyZspa09Eim8,9853 +vllm/model_executor/models/minicpmo.py,sha256=izsU-3mwpklxbHpmMmF4xRGvgGH39_c4Lu-IxfcYZeo,33228 +vllm/model_executor/models/minicpmv.py,sha256=xIYhr2dOHRu-AU9rHDp1MLS1fECJaW0McsT3bZpEots,60364 +vllm/model_executor/models/mixtral.py,sha256=PoCLCjry9uZBZhWSdOnrWebmRkbVF7qb-85EXAyzfcA,20749 +vllm/model_executor/models/mixtral_quant.py,sha256=ewTrriEZlR14NWLDmzEVgSPPjXl572XHAlRQHUlA-W0,18851 +vllm/model_executor/models/mllama.py,sha256=6xRJVUjLE2lytRFsyDOdvVDv0TMTvNwve-fgyZVoIPU,63312 +vllm/model_executor/models/mlp_speculator.py,sha256=hv7XRnxENUo4R07K_Ox_9RVTZyKEDS96fYNECVKjz0A,7905 +vllm/model_executor/models/module_mapping.py,sha256=d_72RpeQ7wVO9LXl2eeQDIRyurGtEMsN33ey5IdDG_8,1781 +vllm/model_executor/models/molmo.py,sha256=YA9QK_PBx14zfDrsU7jQQw2N_gDSlFponFM8CpMJyzM,51182 +vllm/model_executor/models/mpt.py,sha256=o-tCgyyRdaGCNjCvhToYU26dEiRLbLoLe0Hh8PSqSzM,13421 +vllm/model_executor/models/nemotron.py,sha256=HZEkqoQWNU0cRLNl_Amdcj6Wp4JyjOiWts1YcxLqRrs,21787 +vllm/model_executor/models/nvlm_d.py,sha256=SoSuZbsa3R-eJMY1XjEo-EO0FCTCynenqPrbXzXFCOM,8968 +vllm/model_executor/models/olmo.py,sha256=Xnfapv4Ryt-JxOJkh6cqaMYXebUQ4LxpwVKp6tI-EFI,16063 +vllm/model_executor/models/olmo2.py,sha256=J71jBvD9UoLhsh97dpMPtJbtfONYZH2eIjMiTb-WkUk,16840 +vllm/model_executor/models/olmoe.py,sha256=UY08on5PeihDweaWnEg6iebINBujyQ2-4HHkfsmswjQ,19161 +vllm/model_executor/models/opt.py,sha256=v2WFYCJvjUJuJMfMIWERt8IoGatyX9f1Qj81bZnLG58,17405 +vllm/model_executor/models/orion.py,sha256=Cy2YMih8IJq4bbhW-ZXfxGFu1UaLdZ815OppeAlB8P8,14901 +vllm/model_executor/models/paligemma.py,sha256=u5MpXY-Du9cGNYoG10q6stmaYqw_-bc0IZPD5mvzn6w,12870 +vllm/model_executor/models/persimmon.py,sha256=cpKyIDlmlJSwAcvrBZw1FG2gaPkrrhX0bh5h19UzA7I,15245 +vllm/model_executor/models/phi.py,sha256=9U-TpDl7pfPv7GwciAVOM_hRaKZD2aKzAjiXz3UGBIA,15032 +vllm/model_executor/models/phi3.py,sha256=dfxzNvOZ2XPpQIy5mdRFsxFZObV1_h5vlC-ak2hw3HI,388 +vllm/model_executor/models/phi3_small.py,sha256=dWf_AH0npQN-fHhmUEiR74cCCgxbRNPJfznPw2rP3KE,18879 +vllm/model_executor/models/phi3v.py,sha256=c8mGnjXVdwHpjgiyEyjUUH1Y9lnH68rvtGdgOJEq4as,28114 +vllm/model_executor/models/phimoe.py,sha256=IdAAb0jEnayLnDY_D76QS2TwShrmzM6X_5k9i04yluw,25472 +vllm/model_executor/models/pixtral.py,sha256=ABWdfLqWhCr28H0A9vDr7HYkLHGg7ipPlPoiPc0Z5uA,41529 +vllm/model_executor/models/qwen.py,sha256=7_1VoLjiSGdr2pr9VnRfZutY9HH82IiIc9K7WYXNx20,41502 +vllm/model_executor/models/qwen2.py,sha256=a6RdZmM2ecaxM3vP8lG0Ogm3_Phg9cQRgzeZwCYkLco,23047 +vllm/model_executor/models/qwen2_5_vl.py,sha256=EDFReNttsBng86dUewcWYazXxtGI5ch14viXgsL26Yo,46131 +vllm/model_executor/models/qwen2_audio.py,sha256=tY9YFLh_H1PNUwMyiBNrjlWgHbN2qSPihly8JNDUPjg,17617 +vllm/model_executor/models/qwen2_moe.py,sha256=em1sY2Z2TPNqjTpxoZ-HeVdljoEGMh0D1ovhtpAZ_t4,23132 +vllm/model_executor/models/qwen2_rm.py,sha256=aqgmbfEvQ999rhjskXQxhNyNvBkT9K_fYJjJ7fA7QYI,4884 +vllm/model_executor/models/qwen2_vl.py,sha256=yNtb6CsQWIDoPo8GtiIyO2e9CdxpgIiRWgEiEdKxvE8,52595 +vllm/model_executor/models/registry.py,sha256=SSdZR3zT3la_3QnGb9dTie_SRv3b3kudYBCDJ5FwkP4,20528 +vllm/model_executor/models/roberta.py,sha256=gvbrPs3nYsBqRmagCPygSYAPIixQCXHXiblEYZudkgg,10719 +vllm/model_executor/models/siglip.py,sha256=dFgb6-AAp4I-EbSb3Lbpgv5NzfhFo4yX5G9wZEbnrNg,23509 +vllm/model_executor/models/solar.py,sha256=LKrf1z-QSVbUX-JtUMEeh94MhoaQnWuPMyc6NKRsgD4,21015 +vllm/model_executor/models/stablelm.py,sha256=8gTWWqAQeYbukECa6Nz5g7-ftMq6Q1aL0BPYCNQf5GU,15989 +vllm/model_executor/models/starcoder2.py,sha256=bGwDgB5s0syaAMn2Rc-dSrZQIca3VKY3PFgi5xVZFBE,15298 +vllm/model_executor/models/telechat2.py,sha256=wtbgsiw2c21C3H4Db5LYi_DIqRJXpZhO9thMUWoR1zw,5829 +vllm/model_executor/models/transformers.py,sha256=-N6Cg6F6PbxwXCoZ2Jqh_9h5_tuNFwPLED-q2iZteyQ,10459 +vllm/model_executor/models/ultravox.py,sha256=XK7w7IGLIpPKHvHenHrriM4xFdJufVgk92NFc1M5anA,22630 +vllm/model_executor/models/utils.py,sha256=z9SuSqehkN2wDmTRHkzY3S71MYWTTIgdx9EQN7wViGk,21546 +vllm/model_executor/models/vision.py,sha256=LkaAo0Re1iMZI41JxAsUshVeTZU6OKv5t2BcPJrEFi0,5554 +vllm/model_executor/models/whisper.py,sha256=zdqLtrWZOGsqLI09gh2WqCqrdyYRY79jNYVnG3jwJf4,28179 +vllm/model_executor/parameter.py,sha256=SAO196rVPbQycr1CixtAROIC2SHFXuki2Q0ykAL5uLo,15753 +vllm/model_executor/pooling_metadata.py,sha256=FeDxnEg8W8-ZOtg17JE1z26RfrKaU2_ZaWvwkochqKA,2077 +vllm/model_executor/sampling_metadata.py,sha256=bnNL-A_4DtG3alnqi38kAtOXywlDEAqdm_GPF86s_7E,22967 +vllm/model_executor/utils.py,sha256=o1nKePmbzfAwPqVqXJbOGwDUXy_fLUGaFZlsPEbMhpI,1915 +vllm/multimodal/__init__.py,sha256=3iq4bjqOr0U4LC7kLvqIH-xdlIeCzn8lmmMLU_ecfO8,967 +vllm/multimodal/__pycache__/__init__.cpython-311.pyc,, +vllm/multimodal/__pycache__/audio.cpython-311.pyc,, +vllm/multimodal/__pycache__/base.cpython-311.pyc,, +vllm/multimodal/__pycache__/hasher.cpython-311.pyc,, +vllm/multimodal/__pycache__/image.cpython-311.pyc,, +vllm/multimodal/__pycache__/inputs.cpython-311.pyc,, +vllm/multimodal/__pycache__/parse.cpython-311.pyc,, +vllm/multimodal/__pycache__/processing.cpython-311.pyc,, +vllm/multimodal/__pycache__/profiling.cpython-311.pyc,, +vllm/multimodal/__pycache__/registry.cpython-311.pyc,, +vllm/multimodal/__pycache__/utils.cpython-311.pyc,, +vllm/multimodal/__pycache__/video.cpython-311.pyc,, +vllm/multimodal/audio.py,sha256=Dt6wQhAOnB0Di_4jMTU8n20GtYq99LwBBM1Tgz94trg,2092 +vllm/multimodal/base.py,sha256=yuCE4JDh76aWVrKlz7ZY_eNAkuACq6FAZ8teabukYyM,15964 +vllm/multimodal/hasher.py,sha256=qUzT2Oz-4wTcTWuA8fHz4kGDk1k7HRunBZvhHIaOYRg,2838 +vllm/multimodal/image.py,sha256=NGxSAXwQnE5AqlZGrkaIFvWIePvzaJPZ4YquVfxpyzc,4529 +vllm/multimodal/inputs.py,sha256=6DpfaJ9KTV5PjaHUjHBo4jCdwePUuBWovwUu8YdMKt4,23667 +vllm/multimodal/parse.py,sha256=_-kJEvSQsHWOERLTWQ4e51frKBfnkA1FmH30pB6G4Fw,11359 +vllm/multimodal/processing.py,sha256=NrPx-WNjFYM-D9OzK1xxPwIucTvo12nxvp9K4pUvd18,43358 +vllm/multimodal/profiling.py,sha256=SkEw8ObL3FpXZ2BcybQOqYJMxTMJg58PcWTvo_Vj5bY,6857 +vllm/multimodal/registry.py,sha256=827zcATMEtSyPTaSMkPq7152OpwHglXlxfw7FOMjMOQ,16266 +vllm/multimodal/utils.py,sha256=vEgQmhIcR5hJQy9u0h0ilJ_zWTMyj_Q0jAalC369cRE,17118 +vllm/multimodal/video.py,sha256=33mntwtUdLkfn5780H5oRHBBA1rK3cMPrpBZ99oj_U8,6104 +vllm/outputs.py,sha256=VhYOTyo7-ZUK7k808oOycsq8wUq046hR45U3ZwhDgTE,19881 +vllm/platforms/__init__.py,sha256=ffMBLIbsJ4QgJ9mfglnDtXoJXWu1Y7CqIo0wNmUqoJk,7332 +vllm/platforms/__pycache__/__init__.cpython-311.pyc,, +vllm/platforms/__pycache__/cpu.cpython-311.pyc,, +vllm/platforms/__pycache__/cuda.cpython-311.pyc,, +vllm/platforms/__pycache__/hpu.cpython-311.pyc,, +vllm/platforms/__pycache__/interface.cpython-311.pyc,, +vllm/platforms/__pycache__/neuron.cpython-311.pyc,, +vllm/platforms/__pycache__/openvino.cpython-311.pyc,, +vllm/platforms/__pycache__/rocm.cpython-311.pyc,, +vllm/platforms/__pycache__/tpu.cpython-311.pyc,, +vllm/platforms/__pycache__/xpu.cpython-311.pyc,, +vllm/platforms/cpu.py,sha256=32LYr4_vfIafXtkjLLUx63y8s35hdqdgzWPgwMGxA3k,5572 +vllm/platforms/cuda.py,sha256=3Dvm9hivXW8ePq7Q7bsip3Fo1x2hWFROfLN4PnrxwMA,14593 +vllm/platforms/hpu.py,sha256=1SIy0lqR-Sb7z6WXRMwQwAZ4HzHzqU0WfkzAUsUk4zg,3351 +vllm/platforms/interface.py,sha256=cwJcvbKOEgoB2rGj_O6a7k45sdYkieLcsCUMys_3g14,9705 +vllm/platforms/neuron.py,sha256=AAJ_0_xoC_v5zo3YrHtSDJAxgyrT3mribpNZVurND90,1817 +vllm/platforms/openvino.py,sha256=ZYlSScMIf-pED-2AYCc5-8w5Z86F36A4au6ZbpcPlCo,5885 +vllm/platforms/rocm.py,sha256=JYjqIyPuhh5SUagDdyrm69EFvlxI17fIgKD-YpAEelU,6617 +vllm/platforms/tpu.py,sha256=lhRE9sYuvSjplOH3H1XnBdT_LHqxTt42bXSK-RwEiU0,3472 +vllm/platforms/xpu.py,sha256=c2Ym0nVkfZtdF6ehzA7w7k-jVxsQBZdiKr0W5eHGsT4,5352 +vllm/plugins/__init__.py,sha256=eZT4FNCwDJhjMADo5KgKBaGLd_7YR9LzhFgnLuwhAnc,2962 +vllm/plugins/__pycache__/__init__.cpython-311.pyc,, +vllm/pooling_params.py,sha256=zotcVMzPjm46lxg9fAcCFf2goFqbf6cySIAEg1fnVdo,751 +vllm/profiler/__init__.py,sha256=GWIaNWjYFXuIGl4qCtZyhz-Z88ysZH-mz4G7pYKOe78,128 +vllm/profiler/__pycache__/__init__.cpython-311.pyc,, +vllm/profiler/__pycache__/layerwise_profile.cpython-311.pyc,, +vllm/profiler/__pycache__/utils.cpython-311.pyc,, +vllm/profiler/layerwise_profile.py,sha256=WIh-o0z6si4ua2sCvqRjs4OekSY5PvP7XPgBLud9DTI,13837 +vllm/profiler/utils.py,sha256=itpGYyhLXCQ07nT_dL7seAPJyDlPv9f2Dg6Grh5lpAE,4663 +vllm/prompt_adapter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +vllm/prompt_adapter/__pycache__/__init__.cpython-311.pyc,, +vllm/prompt_adapter/__pycache__/layers.cpython-311.pyc,, +vllm/prompt_adapter/__pycache__/models.cpython-311.pyc,, +vllm/prompt_adapter/__pycache__/request.cpython-311.pyc,, +vllm/prompt_adapter/__pycache__/utils.cpython-311.pyc,, +vllm/prompt_adapter/__pycache__/worker_manager.cpython-311.pyc,, +vllm/prompt_adapter/layers.py,sha256=aoeVO5L_abEwB92Groio_5lB9al37OiEJac_GkSuCko,2726 +vllm/prompt_adapter/models.py,sha256=D1hawhlVLFoa0a6FFIRBsWLYVgEbtaT2WCkSTgB4PLc,13721 +vllm/prompt_adapter/request.py,sha256=2_iVKLKdWE3kbt7ja_wPHhCerMqVXlg6ztDw344Yd6I,838 +vllm/prompt_adapter/utils.py,sha256=fMMwK-mzuhT9Wu9araO0rSdtNkAmTNvsCAfQXfOkWQk,3668 +vllm/prompt_adapter/worker_manager.py,sha256=qMEPVkdg2_L4bYSIBg_XPEM5As8UgrPrgudoiG4kEAE,7536 +vllm/py.typed,sha256=F5LUrt0voM87SNuuOky2X9veCVDqJUgRg_VohYqDigY,65 +vllm/sampling_params.py,sha256=JWzBhdW6anpuRdlWY7BXLLHsTKje1EhVGI3ov744qao,22364 +vllm/scalar_type.py,sha256=hW0VB2_F5_EFO7tDLDkdN4LrENiynZsa3D3zSndp9kc,11806 +vllm/scripts.py,sha256=6EJGP0l5nYAkS9kOFBfNuhZ8BCjCPPCPi-WvYmymqqc,7249 +vllm/sequence.py,sha256=DfBfbyQbGYiuQbcV6b9NS_XUa9U8VcKWkgCX1aCtlEM,57947 +vllm/spec_decode/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +vllm/spec_decode/__pycache__/__init__.cpython-311.pyc,, +vllm/spec_decode/__pycache__/batch_expansion.cpython-311.pyc,, +vllm/spec_decode/__pycache__/draft_model_runner.cpython-311.pyc,, +vllm/spec_decode/__pycache__/interfaces.cpython-311.pyc,, +vllm/spec_decode/__pycache__/medusa_worker.cpython-311.pyc,, +vllm/spec_decode/__pycache__/metrics.cpython-311.pyc,, +vllm/spec_decode/__pycache__/mlp_speculator_worker.cpython-311.pyc,, +vllm/spec_decode/__pycache__/mqa_scorer.cpython-311.pyc,, +vllm/spec_decode/__pycache__/multi_step_worker.cpython-311.pyc,, +vllm/spec_decode/__pycache__/ngram_worker.cpython-311.pyc,, +vllm/spec_decode/__pycache__/proposer_worker_base.cpython-311.pyc,, +vllm/spec_decode/__pycache__/smaller_tp_proposer_worker.cpython-311.pyc,, +vllm/spec_decode/__pycache__/spec_decode_worker.cpython-311.pyc,, +vllm/spec_decode/__pycache__/target_model_runner.cpython-311.pyc,, +vllm/spec_decode/__pycache__/top1_proposer.cpython-311.pyc,, +vllm/spec_decode/__pycache__/util.cpython-311.pyc,, +vllm/spec_decode/batch_expansion.py,sha256=_n2h_IcPtL1XhAEVtv0pOPNPvf9GDqfojdZhCAneUjk,22740 +vllm/spec_decode/draft_model_runner.py,sha256=LDc_sNHBkkxTkocuFBgeA4Tdi62oAciZOH4tGG02Dxw,13973 +vllm/spec_decode/interfaces.py,sha256=_hnecfFBgi-_wKMIxPobP-DRaefD769WqethORvcofA,3086 +vllm/spec_decode/medusa_worker.py,sha256=liPn4iIMqJ8owL4cXgrDTVb3bjpUKBlqeNDlFlwEvjc,4900 +vllm/spec_decode/metrics.py,sha256=zCNetl5Ajwk4e1Yl6cig45GQU-lIwwz3FYBtzTGimws,8062 +vllm/spec_decode/mlp_speculator_worker.py,sha256=NuOIAQtZxCWPs7YXWRJW3KwFCT4i3ETCobsA0gK-qVs,3737 +vllm/spec_decode/mqa_scorer.py,sha256=GdhVN2Ef3L_LlnoftqivY0rRKgPF7EV81XUu3PyMouo,7508 +vllm/spec_decode/multi_step_worker.py,sha256=GtoiHk_XleDafQCIeE-JlfYJOFEU320rn7gI7Z3-qRQ,18013 +vllm/spec_decode/ngram_worker.py,sha256=rljyKSx-odVLsGVG6U6H53P4T8U6fblPyAM3xJOaNaI,7754 +vllm/spec_decode/proposer_worker_base.py,sha256=Iv_xg7F5o3hCCRFNj84BR_mpWy9K4xAUKrLLcK-tzLo,2089 +vllm/spec_decode/smaller_tp_proposer_worker.py,sha256=WY29xqghYzyX7mFS-s4zBxBjqRguxUqCWZEK7fUXbUw,6175 +vllm/spec_decode/spec_decode_worker.py,sha256=5DL4UUwMukkQ-YHdZwH1y3y_QHjgoZ7-1DOjvG9xh8w,60601 +vllm/spec_decode/target_model_runner.py,sha256=sRW1sqPhEdEwL5MvHCRRmd-lmc-G8LjJgG7R22R6JM0,2073 +vllm/spec_decode/top1_proposer.py,sha256=sEvZMEoj_s9-aXzkX6faUZuffoPtxATuMbEmRnZKecM,12354 +vllm/spec_decode/util.py,sha256=Btt4tyWuZUeOdIScKIDwD5xUJSeTNZwjbFiQtkF5oVU,9684 +vllm/tracing.py,sha256=fR1aDoun27YzEPBuhmb3Namm4DNvXsunqQ8X9v_2YOw,4749 +vllm/transformers_utils/__init__.py,sha256=RZEL-BvlcJuQJFMPZLM-LjViwZxSsV2o8Tnx7strgDU,617 +vllm/transformers_utils/__pycache__/__init__.cpython-311.pyc,, +vllm/transformers_utils/__pycache__/config.cpython-311.pyc,, +vllm/transformers_utils/__pycache__/detokenizer.cpython-311.pyc,, +vllm/transformers_utils/__pycache__/detokenizer_utils.cpython-311.pyc,, +vllm/transformers_utils/__pycache__/processor.cpython-311.pyc,, +vllm/transformers_utils/__pycache__/s3_utils.cpython-311.pyc,, +vllm/transformers_utils/__pycache__/tokenizer.cpython-311.pyc,, +vllm/transformers_utils/__pycache__/utils.cpython-311.pyc,, +vllm/transformers_utils/config.py,sha256=iVoBL_qKY4syyfb0UyelFZsO6O9bD5b6fmAVxK5daQ4,22470 +vllm/transformers_utils/configs/__init__.py,sha256=Jmr0QgIrZCL_NtbUY5e-seSzJs4CpDwpak2NFhb2OrI,1993 +vllm/transformers_utils/configs/__pycache__/__init__.cpython-311.pyc,, +vllm/transformers_utils/configs/__pycache__/arctic.cpython-311.pyc,, +vllm/transformers_utils/configs/__pycache__/chatglm.cpython-311.pyc,, +vllm/transformers_utils/configs/__pycache__/cohere2.cpython-311.pyc,, +vllm/transformers_utils/configs/__pycache__/dbrx.cpython-311.pyc,, +vllm/transformers_utils/configs/__pycache__/deepseek_vl2.cpython-311.pyc,, +vllm/transformers_utils/configs/__pycache__/eagle.cpython-311.pyc,, +vllm/transformers_utils/configs/__pycache__/exaone.cpython-311.pyc,, +vllm/transformers_utils/configs/__pycache__/falcon.cpython-311.pyc,, +vllm/transformers_utils/configs/__pycache__/h2ovl.cpython-311.pyc,, +vllm/transformers_utils/configs/__pycache__/internvl.cpython-311.pyc,, +vllm/transformers_utils/configs/__pycache__/jais.cpython-311.pyc,, +vllm/transformers_utils/configs/__pycache__/medusa.cpython-311.pyc,, +vllm/transformers_utils/configs/__pycache__/mllama.cpython-311.pyc,, +vllm/transformers_utils/configs/__pycache__/mlp_speculator.cpython-311.pyc,, +vllm/transformers_utils/configs/__pycache__/mpt.cpython-311.pyc,, +vllm/transformers_utils/configs/__pycache__/nemotron.cpython-311.pyc,, +vllm/transformers_utils/configs/__pycache__/nvlm_d.cpython-311.pyc,, +vllm/transformers_utils/configs/__pycache__/olmo2.cpython-311.pyc,, +vllm/transformers_utils/configs/__pycache__/solar.cpython-311.pyc,, +vllm/transformers_utils/configs/__pycache__/telechat2.cpython-311.pyc,, +vllm/transformers_utils/configs/__pycache__/ultravox.cpython-311.pyc,, +vllm/transformers_utils/configs/arctic.py,sha256=_c6snrc6ypgzlZmylCZD8bNNhqFSuppKDlDTeWLzdLE,8990 +vllm/transformers_utils/configs/chatglm.py,sha256=6H5Hv6Z_yziBZG9q4N_0Obj6eHsGL9DrxQeBhkLsZ9Y,2870 +vllm/transformers_utils/configs/cohere2.py,sha256=yeLdn79NO2kyuYH9IqRAO0WGbMJCh5ntd1jabtZaWJs,10353 +vllm/transformers_utils/configs/dbrx.py,sha256=d0xE5WH999Pxqp7v1MXJzmk10cwp0UMOOBADD8bO5rU,10957 +vllm/transformers_utils/configs/deepseek_vl2.py,sha256=QICTzlaoLil0UUXc0fwgl-CImEPpjOnTy1dAUweKLDw,7252 +vllm/transformers_utils/configs/eagle.py,sha256=er_MAjFQmUxBh3qeCcGIUWlqel9PoRQbsO1VFQ4boLI,1593 +vllm/transformers_utils/configs/exaone.py,sha256=o82MCDMCtD8cHvrPzJroEpJV5e4xuVK3yzlHBq4IjvE,8883 +vllm/transformers_utils/configs/falcon.py,sha256=1w9gXJJPzvup7Hd05O1xYzp_IDXDdtxByt06U92uy7c,2917 +vllm/transformers_utils/configs/h2ovl.py,sha256=Tsyex8PgWS_WEuzgTZ9vGcgn7Pv1m0yJDs64Q2meT_Q,489 +vllm/transformers_utils/configs/internvl.py,sha256=hqm1INrEucyhhPKQhwRiwEZ6Ulw1gvnFIw1EISnE5QI,1867 +vllm/transformers_utils/configs/jais.py,sha256=ZSi5YaOVeramO6DOZ5nj7nQwsE20Sxgl6ZxhAFPAdys,10359 +vllm/transformers_utils/configs/medusa.py,sha256=2gSa-OtMNHi2eL_AJDgbslqb9ntcg9fRfhhgRPGoxr0,1943 +vllm/transformers_utils/configs/mllama.py,sha256=lIkiJ83huJQq7kLAV2hfLWAcUxWVT9aa1YAcSRUzz1Y,805 +vllm/transformers_utils/configs/mlp_speculator.py,sha256=MgeWpPARW5jwb8Nw1hnZaqJbdDdBOc_a_ESTeRy3O8g,2437 +vllm/transformers_utils/configs/mpt.py,sha256=zLtFoXM4PKJK67mQoeBOwLQrT4wR-zdEMYAsGrHV108,7589 +vllm/transformers_utils/configs/nemotron.py,sha256=xw--8lmM5VzLM6Nfyha4vaeRvVYh5v3bjrAP_Z010nk,8974 +vllm/transformers_utils/configs/nvlm_d.py,sha256=2Mr9ZAI6VG0DbLDw0BnFEIgeiZd7ip3bSoVsfcEeNqQ,458 +vllm/transformers_utils/configs/olmo2.py,sha256=d_YFuCwGdWR6-P7oQA2LS5XR-vD6TEUByo7qnn0Ez3Q,8075 +vllm/transformers_utils/configs/solar.py,sha256=y5b9R4mQXdgi-fUv2ZqMIFouW7P57lT5nppn54aCOuo,10841 +vllm/transformers_utils/configs/telechat2.py,sha256=JsOuzKHPQHqtJBZNi27dtwc-FWelsQ9GlmORN2AubPE,2200 +vllm/transformers_utils/configs/ultravox.py,sha256=14glDZ9IVoGMt-DMp_O2NeUDhKge_xHDRKamvuoqSS8,4119 +vllm/transformers_utils/detokenizer.py,sha256=tjD1mplMeTvXrFA25A8tYJr3pDezESoaM3m88N7D8HY,7250 +vllm/transformers_utils/detokenizer_utils.py,sha256=qrDj9iKcbjtMYOE12wnFkWccFmVTRB9W8W6BkqzBItg,6769 +vllm/transformers_utils/processor.py,sha256=0Qh4moGATpwqlDrGExHi8c7HoLDAIJq_MT5bQPtGwIw,3618 +vllm/transformers_utils/processors/__init__.py,sha256=WloJ524I5uG04zlyJVWoPtDGVzlRvWpVsuwcczjOM3o,165 +vllm/transformers_utils/processors/__pycache__/__init__.cpython-311.pyc,, +vllm/transformers_utils/processors/__pycache__/deepseek_vl2.cpython-311.pyc,, +vllm/transformers_utils/processors/deepseek_vl2.py,sha256=BI8GbCxTl1mkkQ-f3NINzEa-fJ4o78_JlIGn5Ba90So,14598 +vllm/transformers_utils/s3_utils.py,sha256=KZEHwQjQcn1ph8kxWbd0Wzb2lnqYGzeiQsLssFRy9-E,4733 +vllm/transformers_utils/tokenizer.py,sha256=6BxkoQv9lxjOsqbo3pK3U0kN81cFgTT9qLdNogb-RX4,8905 +vllm/transformers_utils/tokenizer_group/__init__.py,sha256=LKgVU0nyJxinK3lJqSmcrGSV0WRsh208y-C0nnZFhLE,2455 +vllm/transformers_utils/tokenizer_group/__pycache__/__init__.cpython-311.pyc,, +vllm/transformers_utils/tokenizer_group/__pycache__/base_tokenizer_group.cpython-311.pyc,, +vllm/transformers_utils/tokenizer_group/__pycache__/ray_tokenizer_group.cpython-311.pyc,, +vllm/transformers_utils/tokenizer_group/__pycache__/tokenizer_group.cpython-311.pyc,, +vllm/transformers_utils/tokenizer_group/base_tokenizer_group.py,sha256=PMkdfJWPsuhf6pHuLZ_vNWbIV5DZE6WxKXU1Qh_WaYQ,2055 +vllm/transformers_utils/tokenizer_group/ray_tokenizer_group.py,sha256=KnFwojkzriWCVXdauEYhpeqratpl_JuLadhxUnceFT8,9807 +vllm/transformers_utils/tokenizer_group/tokenizer_group.py,sha256=BEDWo8fgUtTqrwPFThK2h7uHP49ea9wUmLZeHCbVTYk,4569 +vllm/transformers_utils/tokenizers/__init__.py,sha256=c92zhoHIgS-rSpv2_mR9Rm5AotVZuDaDDUPDjXrUfNA,167 +vllm/transformers_utils/tokenizers/__pycache__/__init__.cpython-311.pyc,, +vllm/transformers_utils/tokenizers/__pycache__/mistral.cpython-311.pyc,, +vllm/transformers_utils/tokenizers/mistral.py,sha256=8sDRsMV421m92VU15svs_PaK3O5ltmpQRMthBwV0K6Y,15297 +vllm/transformers_utils/utils.py,sha256=dNXEHooc1Fs62-goF5Zri6u4Qa6fmQrHwvtknVdlcPc,529 +vllm/triton_utils/__init__.py,sha256=wDNYu8rRvsNfoosE-FXjcEtbXRuteh6X_nxdIMVe5xk,281 +vllm/triton_utils/__pycache__/__init__.cpython-311.pyc,, +vllm/triton_utils/__pycache__/custom_cache_manager.cpython-311.pyc,, +vllm/triton_utils/__pycache__/importing.cpython-311.pyc,, +vllm/triton_utils/custom_cache_manager.py,sha256=M6Ee0uBVqN9HXlp_qKdZ7Wqat14sSqnkErO8wrYeUsA,2265 +vllm/triton_utils/importing.py,sha256=_ADlYGmmMMVaUX6K97C94FjaKKywGPTgZAgGzSDpTKw,450 +vllm/usage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +vllm/usage/__pycache__/__init__.cpython-311.pyc,, +vllm/usage/__pycache__/usage_lib.cpython-311.pyc,, +vllm/usage/usage_lib.py,sha256=g-hS5j3g56lK7lAp3hqEFuMzwo1YLzsyu6aoSDPuolU,8421 +vllm/utils.py,sha256=6rx8FC8FRs878Ubl-qlykSahb7JHWHEyXc5nXiZOQjA,75830 +vllm/v1/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +vllm/v1/__pycache__/__init__.cpython-311.pyc,, +vllm/v1/__pycache__/kv_cache_interface.cpython-311.pyc,, +vllm/v1/__pycache__/outputs.cpython-311.pyc,, +vllm/v1/__pycache__/request.cpython-311.pyc,, +vllm/v1/__pycache__/serial_utils.cpython-311.pyc,, +vllm/v1/__pycache__/utils.cpython-311.pyc,, +vllm/v1/attention/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +vllm/v1/attention/__pycache__/__init__.cpython-311.pyc,, +vllm/v1/attention/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +vllm/v1/attention/backends/__pycache__/__init__.cpython-311.pyc,, +vllm/v1/attention/backends/__pycache__/flash_attn.cpython-311.pyc,, +vllm/v1/attention/backends/flash_attn.py,sha256=GUSuwNWJF3SVlvAysNXca_qAWId-OWHjn2_CJKUsEeg,17304 +vllm/v1/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +vllm/v1/core/__pycache__/__init__.cpython-311.pyc,, +vllm/v1/core/__pycache__/encoder_cache_manager.cpython-311.pyc,, +vllm/v1/core/__pycache__/kv_cache_manager.cpython-311.pyc,, +vllm/v1/core/__pycache__/kv_cache_utils.cpython-311.pyc,, +vllm/v1/core/__pycache__/scheduler.cpython-311.pyc,, +vllm/v1/core/encoder_cache_manager.py,sha256=OFTBwqzbGCUFFj6A4zptePajUAW0EGnIGLl3R7PD64U,4624 +vllm/v1/core/kv_cache_manager.py,sha256=o7c8wg3S5FlNx3LMWnng24dXmiZ6KZYu44x_sGwQCkA,21371 +vllm/v1/core/kv_cache_utils.py,sha256=hNS-7Uk13dFjH6U6T3_qca3TwxFO7o0oQL7SM25qnNQ,17248 +vllm/v1/core/scheduler.py,sha256=8HHIGBkanR7HLqwut7_VdJH796e8U1SSLfiHaqpty8U,27167 +vllm/v1/engine/__init__.py,sha256=eIUx6-vw8oJHeWN94t5Bb0aAFGcuJjCHVDYq75hOrSE,3024 +vllm/v1/engine/__pycache__/__init__.cpython-311.pyc,, +vllm/v1/engine/__pycache__/async_llm.cpython-311.pyc,, +vllm/v1/engine/__pycache__/core.cpython-311.pyc,, +vllm/v1/engine/__pycache__/core_client.cpython-311.pyc,, +vllm/v1/engine/__pycache__/detokenizer.cpython-311.pyc,, +vllm/v1/engine/__pycache__/llm_engine.cpython-311.pyc,, +vllm/v1/engine/__pycache__/mm_input_mapper.cpython-311.pyc,, +vllm/v1/engine/__pycache__/output_processor.cpython-311.pyc,, +vllm/v1/engine/__pycache__/processor.cpython-311.pyc,, +vllm/v1/engine/async_llm.py,sha256=UyBKtkjUome0Vr-bu4nDPQ5zF3r7JdP3OgssAtX98sM,14239 +vllm/v1/engine/core.py,sha256=rFvfX7yzOkxdiy07iIx8GFM8OjyEyBTT8znzikVHvQg,11527 +vllm/v1/engine/core_client.py,sha256=uksWk6uoT0pC0E2QucXHfq7h0ZIL5DRXzFCS1kXTT8s,10829 +vllm/v1/engine/detokenizer.py,sha256=w5d3ipzDdg8OoWySfbNRUiT56UCW74hX7wZr0CNIylU,6500 +vllm/v1/engine/llm_engine.py,sha256=-vAz89iFujkAnj83SPjE1W4U4eAPVBazDLqrbJRIdtU,7092 +vllm/v1/engine/mm_input_mapper.py,sha256=7Oc89E9scGXKlqa92bXT8sf9kqgjMSzDCQo2y97M4Ok,5128 +vllm/v1/engine/output_processor.py,sha256=Xn6Nx554mFBUrzk2EE7eFsriSdAoYv8-nREnKFZ099Q,7767 +vllm/v1/engine/processor.py,sha256=rGfj0-zy_OvepX8WzEpni-tQKiR1GvFahatqhVm5Ja0,9801 +vllm/v1/executor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +vllm/v1/executor/__pycache__/__init__.cpython-311.pyc,, +vllm/v1/executor/__pycache__/abstract.cpython-311.pyc,, +vllm/v1/executor/__pycache__/multiproc_executor.cpython-311.pyc,, +vllm/v1/executor/abstract.py,sha256=PMxiTgz9aFSMXhg5RvJ3dJI3NYXz4LyrhA1sChoL5L0,3624 +vllm/v1/executor/multiproc_executor.py,sha256=tVOolD_D3i5i9rg6uoZreu766WAopVghvTQ_aQqunlo,14125 +vllm/v1/kv_cache_interface.py,sha256=0-n-04vufyNiDB6RSHAR3QYUbB8UlBWT0Yln1TS9Xco,3412 +vllm/v1/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +vllm/v1/metrics/__pycache__/__init__.cpython-311.pyc,, +vllm/v1/metrics/__pycache__/loggers.cpython-311.pyc,, +vllm/v1/metrics/__pycache__/stats.cpython-311.pyc,, +vllm/v1/metrics/loggers.py,sha256=AdcLH_19T-rMWE3Nzy9gvuosoy2tJtF7Qxkvl8cc7Pg,8451 +vllm/v1/metrics/stats.py,sha256=OFkm1fypbjqfjp7QrP66wnb8rxwf36HqWBJsxxoGexE,2847 +vllm/v1/outputs.py,sha256=GdsZ06OLJdYeZma5ojv8suUoIibGKDsqPKIgjWVBtIA,1007 +vllm/v1/request.py,sha256=yUzy_DzOFybElnpNdTc7GLSJd9Dch2n-bEZtTx21tWM,6009 +vllm/v1/sample/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +vllm/v1/sample/__pycache__/__init__.cpython-311.pyc,, +vllm/v1/sample/__pycache__/metadata.cpython-311.pyc,, +vllm/v1/sample/__pycache__/sampler.cpython-311.pyc,, +vllm/v1/sample/metadata.py,sha256=e76Oop4JBSEn1P7MQ7UrQRkn2uaNvjo6SeG1iRJT8Zs,683 +vllm/v1/sample/ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +vllm/v1/sample/ops/__pycache__/__init__.cpython-311.pyc,, +vllm/v1/sample/ops/__pycache__/penalties.cpython-311.pyc,, +vllm/v1/sample/ops/__pycache__/topk_topp_sampler.cpython-311.pyc,, +vllm/v1/sample/ops/penalties.py,sha256=dFk7rV5xKo9Aq0vhKa41FXOn0EYT8aLlqwR2gMgWWzs,2261 +vllm/v1/sample/ops/topk_topp_sampler.py,sha256=WYuJV4RL-FJfqHzPG4B7tT2fMZRiA1YHeRRrbpInySk,7692 +vllm/v1/sample/sampler.py,sha256=lZnzSzCINYH0BuP-9ZjE5UD2b26JjZCiIqO9CVQTgjY,5069 +vllm/v1/serial_utils.py,sha256=Kzrt_ObsfIbLJC-0jzlvijPtD1sCkMvPerJUXGVH8JA,200 +vllm/v1/stats/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +vllm/v1/stats/__pycache__/__init__.cpython-311.pyc,, +vllm/v1/stats/__pycache__/common.cpython-311.pyc,, +vllm/v1/stats/common.py,sha256=4IZaANJVZZtNAngonteN9erA4wP4WirBLj0D8CKY1Aw,17243 +vllm/v1/utils.py,sha256=jSi9kVGceXsqPMyTiHhWwmSP0K6qoivzC0rIUvks_bA,5939 +vllm/v1/worker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +vllm/v1/worker/__pycache__/__init__.cpython-311.pyc,, +vllm/v1/worker/__pycache__/block_table.cpython-311.pyc,, +vllm/v1/worker/__pycache__/gpu_input_batch.cpython-311.pyc,, +vllm/v1/worker/__pycache__/gpu_model_runner.cpython-311.pyc,, +vllm/v1/worker/__pycache__/gpu_worker.cpython-311.pyc,, +vllm/v1/worker/block_table.py,sha256=VkT1mhAkgWH-lzsRYqrXR3FCDM2p2xDtCnt-iHqjzQk,2492 +vllm/v1/worker/gpu_input_batch.py,sha256=fWlovFiHVduCfnfUaqyOHHYrndJmEHzM0L8vefEvsEQ,18285 +vllm/v1/worker/gpu_model_runner.py,sha256=xAEnV87iFAWRn7nOWFUBwRXLRpbB57XK4ZChQ3kA2Do,52931 +vllm/v1/worker/gpu_worker.py,sha256=QSMmIANmkc7YwPBpgsh6L_99eyT6fhWasbp6GDS2O5Y,12029 +vllm/version.py,sha256=FezMCgn-HcEBLdlhPHeLde8CpiHzYM-h0vERNcjoo4g,335 +vllm/vllm_flash_attn/__init__.py,sha256=TmGCpoLn69L1Yvx5uvJ2CEEQdwgPsoHT5kUa1tI4Qs4,309 +vllm/vllm_flash_attn/__pycache__/__init__.cpython-311.pyc,, +vllm/vllm_flash_attn/__pycache__/flash_attn_interface.cpython-311.pyc,, +vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so,sha256=HQRmcexu55HoazXbE7Dh8OYp_uzC1JvoLu2TgX8paSE,220721128 +vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so,sha256=YyUAc9Mt0xrz3YLKaCWq269FYsnEzZM2rbhvkfPU4UM,275990736 +vllm/vllm_flash_attn/flash_attn_interface.py,sha256=3H1Ret-yAcom5BAUF2fRbqEcwMA3O3Jto51p7fSzBOA,24016 +vllm/worker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +vllm/worker/__pycache__/__init__.cpython-311.pyc,, +vllm/worker/__pycache__/cache_engine.cpython-311.pyc,, +vllm/worker/__pycache__/cpu_enc_dec_model_runner.cpython-311.pyc,, +vllm/worker/__pycache__/cpu_model_runner.cpython-311.pyc,, +vllm/worker/__pycache__/cpu_pooling_model_runner.cpython-311.pyc,, +vllm/worker/__pycache__/cpu_worker.cpython-311.pyc,, +vllm/worker/__pycache__/enc_dec_model_runner.cpython-311.pyc,, +vllm/worker/__pycache__/hpu_model_runner.cpython-311.pyc,, +vllm/worker/__pycache__/hpu_worker.cpython-311.pyc,, +vllm/worker/__pycache__/model_runner.cpython-311.pyc,, +vllm/worker/__pycache__/model_runner_base.cpython-311.pyc,, +vllm/worker/__pycache__/multi_step_model_runner.cpython-311.pyc,, +vllm/worker/__pycache__/multi_step_tpu_worker.cpython-311.pyc,, +vllm/worker/__pycache__/multi_step_worker.cpython-311.pyc,, +vllm/worker/__pycache__/neuron_model_runner.cpython-311.pyc,, +vllm/worker/__pycache__/neuron_worker.cpython-311.pyc,, +vllm/worker/__pycache__/openvino_model_runner.cpython-311.pyc,, +vllm/worker/__pycache__/openvino_worker.cpython-311.pyc,, +vllm/worker/__pycache__/pooling_model_runner.cpython-311.pyc,, +vllm/worker/__pycache__/tpu_model_runner.cpython-311.pyc,, +vllm/worker/__pycache__/tpu_worker.cpython-311.pyc,, +vllm/worker/__pycache__/utils.cpython-311.pyc,, +vllm/worker/__pycache__/worker.cpython-311.pyc,, +vllm/worker/__pycache__/worker_base.cpython-311.pyc,, +vllm/worker/__pycache__/xpu_model_runner.cpython-311.pyc,, +vllm/worker/__pycache__/xpu_worker.cpython-311.pyc,, +vllm/worker/cache_engine.py,sha256=cBpnZouy9kTiGWzg_WWLGaa9KMchC8UwyXkmvomUrc8,7050 +vllm/worker/cpu_enc_dec_model_runner.py,sha256=2OYtwp1Y3SZo13KZvSP7V7PjYn40i2ChR9tlx7-mW54,13135 +vllm/worker/cpu_model_runner.py,sha256=pwnqGJQgbAh8VEDXWI50PvuspTgtTbsWaDblkkp1BfY,28815 +vllm/worker/cpu_pooling_model_runner.py,sha256=yNxunKCqCIsbhHk-OSz3HxiBI85lE5vFFEkQ7BeTynM,5363 +vllm/worker/cpu_worker.py,sha256=acb-D-uEBwnwVfUTkHotXAs7EhdgV8Sd4b-dL6PO9NM,15547 +vllm/worker/enc_dec_model_runner.py,sha256=02qM0PJeZfMetmWJy8orlSX4TO4CFZHRRZFBIP9SkUU,22836 +vllm/worker/hpu_model_runner.py,sha256=eS8UYJK_J4_yR7U8YVuyEOexvwVGOAxdvTE_ivN9Z0Y,89277 +vllm/worker/hpu_worker.py,sha256=zi5AvpShrl7_rnHBwDTFqpkLhn6fszze-bDU3ScKa5s,21449 +vllm/worker/model_runner.py,sha256=x9KNwneFLCZo7wSrquMSSnG85WmInJKoJXtmRzPx3b4,89960 +vllm/worker/model_runner_base.py,sha256=FbU_tMPxO6u2AEgdupiE2q99FcxMpVYffDyuN0GHwzE,8558 +vllm/worker/multi_step_model_runner.py,sha256=F1XIpv7RcD_ZpF0jY0VvIpE_HUsl1dlMofEity8UjeA,39221 +vllm/worker/multi_step_tpu_worker.py,sha256=c6A1A6Aagis6VyskYP6jP35Mt5paZwDV2jn750Sn5LA,4450 +vllm/worker/multi_step_worker.py,sha256=90a_O9w3bmGHjs5rGVsNEbYzMMhNVZSqLLyOVAHCxk0,9416 +vllm/worker/neuron_model_runner.py,sha256=aY3Ch_9QlkdG564MmahfGr2HlQgSLWC7XHGQnpAsdVY,14883 +vllm/worker/neuron_worker.py,sha256=ecWBUuUjWcd8iLI-zW_pqJMqNhsYlaReZ3zdllUkfFQ,4983 +vllm/worker/openvino_model_runner.py,sha256=_rTyHFWZCIyhbMfBavoqfnjhBR_5si8JCNWFqPr1nEs,14612 +vllm/worker/openvino_worker.py,sha256=qziTiMKhiawK4-hiHBl9u3oFnlGZhWuJdj42pPxyzW0,24760 +vllm/worker/pooling_model_runner.py,sha256=HKIJcRjY-kkju4CQdT_j4Tc_-l4WjGr1O4zNwEF0-Qo,8739 +vllm/worker/tpu_model_runner.py,sha256=fT9HI5s-VPxT36zbUevyELbN4Ypqdj6-lb85-0cE-FY,40930 +vllm/worker/tpu_worker.py,sha256=w5a8-dHwdFScDGYxPR-etJPVyhatQSe_N5LJhmjS-m8,13206 +vllm/worker/utils.py,sha256=qDJF2qtK6Pyfa6JV_ZTpIUDYl-rwn3imoshuS7yXr_A,1918 +vllm/worker/worker.py,sha256=TkWZKnwV1ERL3iVgvSCYFu75BpphrTwmwnDI06BXCzk,24401 +vllm/worker/worker_base.py,sha256=XQE4XtlD5yQHuhJ6_GrXCJA7Ouq1THz76K9MRoXP_5s,23399 +vllm/worker/xpu_model_runner.py,sha256=znuq7NtPXsdwHNbhu0FmM0qAsezdKvp8ahaea5eRRO4,25580 +vllm/worker/xpu_worker.py,sha256=ocSjanfjalPQy91LN2hX_SDbjQKxVxTr-PH8mJqIn6M,7929 diff --git a/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/REQUESTED b/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/REQUESTED new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/WHEEL b/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/WHEEL new file mode 100644 index 0000000000000000000000000000000000000000..da2ac557c89f110fa0e5a2f5ad6c3795e352a695 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/WHEEL @@ -0,0 +1,5 @@ +Wheel-Version: 1.0 +Generator: setuptools (75.8.0) +Root-Is-Purelib: false +Tag: cp38-abi3-linux_x86_64 + diff --git a/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/entry_points.txt b/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/entry_points.txt new file mode 100644 index 0000000000000000000000000000000000000000..6e42a03cf3892aa1d87aa4bf82d1b6a951aaa232 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/entry_points.txt @@ -0,0 +1,2 @@ +[console_scripts] +vllm = vllm.scripts:main diff --git a/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/top_level.txt b/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/top_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..e7a6c7781dce0db526824db3fe81e3675526d398 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/top_level.txt @@ -0,0 +1 @@ +vllm diff --git a/.venv/lib/python3.11/site-packages/xgrammar/__init__.py b/.venv/lib/python3.11/site-packages/xgrammar/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f77e36f074136980ff68bff42f2f30201f2788ec --- /dev/null +++ b/.venv/lib/python3.11/site-packages/xgrammar/__init__.py @@ -0,0 +1,13 @@ +from . import testing +from .compiler import CompiledGrammar, GrammarCompiler +from .contrib import hf +from .grammar import Grammar, StructuralTagItem +from .matcher import ( + GrammarMatcher, + allocate_token_bitmask, + apply_token_bitmask_inplace, + bitmask_dtype, + get_bitmask_shape, + reset_token_bitmask, +) +from .tokenizer_info import TokenizerInfo, VocabType diff --git a/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3792c1041f77e23d183839bcdf80ec54ff5c10bf Binary files /dev/null and b/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/__init__.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/base.cpython-311.pyc b/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/base.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..05ad733b20d4a43db9edb2c0849d0a9a6a17b479 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/base.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/compiler.cpython-311.pyc b/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/compiler.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ff930e72fa605a3445a64567f1a92be5beda1de3 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/compiler.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/grammar.cpython-311.pyc b/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/grammar.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b082a1e30bc37420cba3424991796407607b7e16 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/grammar.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/matcher.cpython-311.pyc b/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/matcher.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3c5ebfe36508055b5652512a8f03d42c1e4d57a5 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/matcher.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/testing.cpython-311.pyc b/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/testing.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..84536f71f943f2656f277fb89a9edaaf2fa60f4e Binary files /dev/null and b/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/testing.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/tokenizer_info.cpython-311.pyc b/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/tokenizer_info.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dc2baf6ae1ab2215951a5ef63a003fbb2d9a1142 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/tokenizer_info.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/version.cpython-311.pyc b/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/version.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b922a08382e2ec077337018dabbf44fe2b40ecd7 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/version.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/xgrammar/base.py b/.venv/lib/python3.11/site-packages/xgrammar/base.py new file mode 100644 index 0000000000000000000000000000000000000000..13a824c9791110cc4b77c46824628b594adcec17 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/xgrammar/base.py @@ -0,0 +1,82 @@ +"""This module provides classes to handle C++ objects from pybind11.""" + +import os + +if os.environ.get("XGRAMMAR_BUILD_DOCS") != "1": + from . import xgrammar_bindings as _core +else: + _core = "dummy namespace" + + +class XGRObject: + """The base class for all objects in XGrammar. This class provides methods to handle the + C++ handle from pybind11. + + In subclasses, the handle should be initialized via the the _create_from_handle, or via + the _init_handle method called within the __init__ method, and should not be modified + afterwards. Subclasses should use the _handle property to access the handle. When comparing + two objects, the equality is checked by comparing the C++ handles. + + For performance considerations, objects in XGrammar should be lightweight and only maintain + a handle to the C++ objects. Heavy operations should be performed on the C++ side. + """ + + @classmethod + def _create_from_handle(cls, handle) -> "XGRObject": + """Construct an object of the class from a C++ handle. + + Parameters + ---------- + cls + The class of the object. + + handle + The C++ handle. + + Returns + ------- + obj : XGRObject + An object of type cls. + """ + obj = cls.__new__(cls) + obj.__handle = handle + return obj + + def _init_handle(self, handle): + """Initialize an object with a handle. This method should be called in the __init__ + method of the subclasses of XGRObject to initialize the C++ handle. + + Parameters + ---------- + handle + The C++ handle. + """ + self.__handle = handle + + @property + def _handle(self): + """Get the C++ handle of the object. + + Returns + ------- + handle + The C++ handle. + """ + return self.__handle + + def __eq__(self, other: object) -> bool: + """Compare two XGrammar objects by comparing their C++ handles. + + Parameters + ---------- + other : object + The other object to compare with. + + Returns + ------- + equal : bool + Whether the two objects have the same C++ handle. + """ + if not isinstance(other, XGRObject): + return NotImplemented + return self._handle == other._handle diff --git a/.venv/lib/python3.11/site-packages/xgrammar/compiler.py b/.venv/lib/python3.11/site-packages/xgrammar/compiler.py new file mode 100644 index 0000000000000000000000000000000000000000..9fac5bb0e193529b1742a934524b5516ca53d779 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/xgrammar/compiler.py @@ -0,0 +1,185 @@ +"""Compiling grammar for efficient token mask generation.""" + +import json +from typing import List, Optional, Tuple, Type, Union, overload + +from pydantic import BaseModel + +from .base import XGRObject, _core +from .grammar import Grammar, StructuralTagItem, _handle_pydantic_schema +from .tokenizer_info import TokenizerInfo + + +class CompiledGrammar(XGRObject): + """This is the primary object to store compiled grammar. + + A CompiledGrammar can be used to construct GrammarMatcher + to generate token masks efficiently. + + Note + ---- + Do not construct this class directly, instead + use :class:`GrammarCompiler` to construct the object. + """ + + @property + def grammar(self) -> Grammar: + """The original grammar.""" + return Grammar._create_from_handle(self._handle.grammar) + + @property + def tokenizer_info(self) -> TokenizerInfo: + """The tokenizer info associated with the compiled grammar.""" + return TokenizerInfo._create_from_handle(self._handle.tokenizer_info) + + +class GrammarCompiler(XGRObject): + """The compiler for grammars. It is associated with a certain tokenizer info, and compiles + grammars into CompiledGrammar with the tokenizer info. It allows parallel compilation with + multiple threads, and has a cache to store the compilation result, avoiding compiling the + same grammar multiple times. + + Parameters + ---------- + tokenizer_info : TokenizerInfo + The tokenizer info. + + max_threads : int, default: 8 + The maximum number of threads used to compile the grammar. + + cache_enabled : bool, default: True + Whether to enable the cache. + """ + + def __init__( + self, + tokenizer_info: TokenizerInfo, + *, + max_threads: int = 8, + cache_enabled: bool = True, + ): + if not isinstance(tokenizer_info, TokenizerInfo): + raise ValueError( + "Please convert the tokenizer to TokenizerInfo before passing it " + "to GrammarCompiler." + ) + + self._init_handle(_core.GrammarCompiler(tokenizer_info._handle, max_threads, cache_enabled)) + + def compile_json_schema( + self, + schema: Union[str, Type[BaseModel]], + *, + any_whitespace: bool = True, + indent: Optional[int] = None, + separators: Optional[Tuple[str, str]] = None, + strict_mode: bool = True, + ) -> CompiledGrammar: + """Get CompiledGrammar from the specified JSON schema and format. The indent + and separators parameters follow the same convention as in json.dumps(). + + Parameters + ---------- + schema : Union[str, Type[BaseModel]] + The schema string or Pydantic model. + + indent : Optional[int], default: None + The number of spaces for indentation. If None, the output will be in one line. + + separators : Optional[Tuple[str, str]], default: None + Two separators used in the schema: comma and colon. Examples: (",", ":"), (", ", ": "). + If None, the default separators will be used: (",", ": ") when the indent is not None, + and (", ", ": ") otherwise. + + strict_mode : bool, default: True + Whether to use strict mode. In strict mode, the generated grammar will not allow + properties and items that is not specified in the schema. This is equivalent to + setting unevaluatedProperties and unevaluatedItems to false. + + Returns + ------- + compiled_grammar : CompiledGrammar + The compiled grammar. + """ + schema_str = _handle_pydantic_schema(schema) + return CompiledGrammar._create_from_handle( + self._handle.compile_json_schema( + schema_str, any_whitespace, indent, separators, strict_mode + ) + ) + + def compile_builtin_json_grammar(self) -> CompiledGrammar: + """Get CompiledGrammar from the standard JSON. + + Returns + ------- + compiled_grammar : CompiledGrammar + The compiled grammar. + """ + return CompiledGrammar._create_from_handle(self._handle.compile_builtin_json_grammar()) + + def compile_structural_tag( + self, tags: List[StructuralTagItem], triggers: List[str] + ) -> CompiledGrammar: + """Compile a grammar from structural tags. See Grammar.from_structural_tag() for more + details. + + Parameters + ---------- + tags : List[StructuralTagItem] + The structural tags. + + triggers : List[str] + The triggers. + + Returns + ------- + compiled_grammar : CompiledGrammar + The compiled grammar. + """ + tags_tuple = [(tag.start, _handle_pydantic_schema(tag.schema_), tag.end) for tag in tags] + return CompiledGrammar._create_from_handle( + self._handle.compile_structural_tag(tags_tuple, triggers) + ) + + @overload + def compile_grammar(self, ebnf_string: str, *, root_rule_name: str = "root") -> CompiledGrammar: + """Compile a grammar from EBNF string. The EBNF string should follow the format + in https://github.com/ggerganov/llama.cpp/blob/master/grammars/README.md. + + Parameters + ---------- + ebnf_string : str + The grammar string in EBNF format. + + root_rule_name : str, default: "root" + The name of the root rule in the grammar. + + Returns + ------- + compiled_grammar : CompiledGrammar + The compiled grammar. + """ + ... + + @overload + def compile_grammar(self, grammar: Grammar) -> CompiledGrammar: + """Compile a grammar object. + + Returns + ------- + compiled_grammar : CompiledGrammar + The compiled grammar. + """ + ... + + def compile_grammar( + self, grammar: Union[str, Grammar], *, root_rule_name: str = "root" + ) -> CompiledGrammar: + if isinstance(grammar, str): + grammar = Grammar.from_ebnf(grammar, root_rule_name=root_rule_name) + return CompiledGrammar._create_from_handle(self._handle.compile_grammar(grammar._handle)) + + def clear_cache(self) -> None: + """Clear all cached compiled grammars.""" + self._handle.clear_cache() diff --git a/.venv/lib/python3.11/site-packages/xgrammar/contrib/__init__.py b/.venv/lib/python3.11/site-packages/xgrammar/contrib/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.venv/lib/python3.11/site-packages/xgrammar/contrib/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/xgrammar/contrib/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cfd860c8bd4caa90e7a0e746dcf27e5d6ba19388 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/xgrammar/contrib/__pycache__/__init__.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/xgrammar/contrib/__pycache__/hf.cpython-311.pyc b/.venv/lib/python3.11/site-packages/xgrammar/contrib/__pycache__/hf.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cd832142f367546c47d7ec6e5385c91baa8f8261 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/xgrammar/contrib/__pycache__/hf.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/xgrammar/contrib/hf.py b/.venv/lib/python3.11/site-packages/xgrammar/contrib/hf.py new file mode 100644 index 0000000000000000000000000000000000000000..75c546012a9bafaaeecccff6401f65a6ef0cf999 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/xgrammar/contrib/hf.py @@ -0,0 +1,104 @@ +""" +This file helps integrate xgrammar in HF transformers package by extending +transformers.LogitsProcessor, which is to be fed to `model.generate()`. +""" + +from typing import List + +import torch +import transformers + +import xgrammar as xgr + + +class LogitsProcessor(transformers.LogitsProcessor): + """ + LogitsProcessor for processing logits in transformers' generate() method. + + Example usage + ------------- + .. code:: python + + model_name = "Qwen/Qwen2.5-0.5B-Instruct" + tokenizer = AutoTokenizer.from_pretrained(model_name) + config = AutoConfig.from_pretrained(model_name) + # This can be larger than tokenizer.vocab_size due to paddings + full_vocab_size = config.vocab_size + tokenizer_info = xgr.TokenizerInfo.from_huggingface(tokenizer, vocab_size=full_vocab_size) + + grammar_compiler = xgr.GrammarCompiler(tokenizer_info) + compiled_grammar = grammar_compiler.compile_builtin_json_grammar() + xgr_logits_processor = xgr.contrib.hf.LogitsProcessor(compiled_grammar) + model.generate(prompt, logits_processor=[xgr_logits_processor]) + + For an end-to-end example, see folder `examples/hf_transformers/`. + + Notes + ----- + - Note that this LogitsProcessor can only be used once. For each `generate()` call, + instantiate a new one. + - Note that this implementation may contain extra overhead. + """ + + def __init__(self, compiled_grammar: xgr.CompiledGrammar): + """Initialize the LogitsProcessor. + + Parameters + ---------- + compiled_grammar : xgr.CompiledGrammar + A grammar compiled according to the given grammar and the model's tokenizer_info. + """ + self.matchers: List[xgr.GrammarMatcher] = [] + self.compiled_grammar = compiled_grammar + self.full_vocab_size = self.compiled_grammar.tokenizer_info.vocab_size + self.token_bitmask = None + self.prefilled = False + self.batch_size = 0 + + def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor: + """ + Accept token sampled in the last iteration, fill in bitmask, and apply bitmask to logits. + + Returns: + scores: Logits modified with bitmask. + """ + # Lazily initialize GrammarMatchers and bitmask + if len(self.matchers) == 0: + self.batch_size = input_ids.shape[0] + self.matchers = [ + xgr.GrammarMatcher(self.compiled_grammar) for _ in range(self.batch_size) + ] + self.token_bitmask = xgr.allocate_token_bitmask(self.batch_size, self.full_vocab_size) + + if input_ids.shape[0] != self.batch_size: + raise RuntimeError( + "Expect input_ids.shape[0] to be LogitsProcessor.batch_size." + + f"Got {input_ids.shape[0]} for the former, and {self.batch_size} for the latter." + ) + + if not self.prefilled: + # Have not sampled a token yet + self.prefilled = True + else: + for i in range(self.batch_size): + if not self.matchers[i].is_terminated(): + sampled_token = input_ids[i][-1] + assert self.matchers[i].accept_token(sampled_token) + + for i in range(self.batch_size): + if not self.matchers[i].is_terminated(): + self.matchers[i].fill_next_token_bitmask(self.token_bitmask, i) + + # We only support masking logits on CUDA or CPU + device_type = scores.device.type + if device_type != "cuda": + scores = scores.to("cpu") + xgr.apply_token_bitmask_inplace(scores, self.token_bitmask.to(scores.device)) + if device_type != "cuda": + scores = scores.to(device_type) + + # NOTE: Cannot reset here because __call__ is not invoked when stop token + # is sampled. This is why each `generate()` call needs to instantiate an + # LogitsProcessor + + return scores diff --git a/.venv/lib/python3.11/site-packages/xgrammar/grammar.py b/.venv/lib/python3.11/site-packages/xgrammar/grammar.py new file mode 100644 index 0000000000000000000000000000000000000000..1c1fbb17a338fae787f99bd6dc55737f8799fce0 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/xgrammar/grammar.py @@ -0,0 +1,271 @@ +"""This module provides classes representing grammars.""" + +import json +from typing import List, Optional, Tuple, Type, Union + +from pydantic import BaseModel, Field + +from .base import XGRObject, _core + + +class StructuralTagItem(BaseModel): + """A structural tag item. See Grammar.from_structural_tag() for more details. + + Attributes + ---------- + start : str + The start tag. + + schema_ : Union[str, Type[BaseModel]] + The schema. + + end : str + The end tag. + """ + + start: str + schema_: Union[str, Type[BaseModel]] = Field(alias="schema") + end: str + + +def _handle_pydantic_schema(schema: Union[str, Type[BaseModel]]) -> str: + if isinstance(schema, type) and issubclass(schema, BaseModel): + if hasattr(schema, "model_json_schema"): + # pydantic 2.x + return json.dumps(schema.model_json_schema()) + elif hasattr(schema, "schema_json"): + # pydantic 1.x + return json.dumps(schema.schema_json()) + else: + raise ValueError("The schema should have a model_json_schema or json_schema method.") + elif isinstance(schema, str): + return schema + else: + raise ValueError("The schema should be a string or a Pydantic model.") + + +class Grammar(XGRObject): + """This class represents a grammar object in XGrammar, and can be used later in the + grammar-guided generation. + + The Grammar object supports context-free grammar (CFG). EBNF (extended Backus-Naur Form) is + used as the format of the grammar. There are many specifications for EBNF in the literature, + and we follow the specification of GBNF (GGML BNF) in + https://github.com/ggerganov/llama.cpp/blob/master/grammars/README.md. + + When printed, the grammar will be converted to GBNF format. + """ + + def __str__(self) -> str: + """Print the BNF grammar to a string, in EBNF format. + + Returns + ------- + grammar_string : str + The BNF grammar string. + """ + return self._handle.to_string() + + @staticmethod + def from_ebnf(ebnf_string: str, *, root_rule_name: str = "root") -> "Grammar": + """Construct a grammar from EBNF string. The EBNF string should follow the format + in https://github.com/ggerganov/llama.cpp/blob/master/grammars/README.md. + + Parameters + ---------- + ebnf_string : str + The grammar string in EBNF format. + + root_rule_name : str, default: "root" + The name of the root rule in the grammar. + + Raises + ------ + RuntimeError + When converting the regex pattern fails, with details about the parsing error. + """ + return Grammar._create_from_handle(_core.Grammar.from_ebnf(ebnf_string, root_rule_name)) + + @staticmethod + def from_json_schema( + schema: Union[str, Type[BaseModel]], + *, + any_whitespace: bool = True, + indent: Optional[int] = None, + separators: Optional[Tuple[str, str]] = None, + strict_mode: bool = True, + ) -> "Grammar": + """Construct a grammar from JSON schema. Pydantic model or JSON schema string can be + used to specify the schema. + + It allows any whitespace by default. If user want to specify the format of the JSON, + set `any_whitespace` to False and use the `indent` and `separators` parameters. The + meaning and the default values of the parameters follows the convention in json.dumps(). + + It internally converts the JSON schema to a EBNF grammar. + + Parameters + ---------- + schema : Union[str, Type[BaseModel]] + The schema string or Pydantic model. + + any_whitespace : bool, default: True + Whether to use any whitespace. If True, the generated grammar will ignore the + indent and separators parameters, and allow any whitespace. + + indent : Optional[int], default: None + The number of spaces for indentation. If None, the output will be in one line. + + Note that specifying the indentation means forcing the LLM to generate JSON strings + strictly formatted. However, some models may tend to generate JSON strings that + are not strictly formatted. In this case, forcing the LLM to generate strictly + formatted JSON strings may degrade the generation quality. See + for more + details. + + separators : Optional[Tuple[str, str]], default: None + Two separators used in the schema: comma and colon. Examples: (",", ":"), (", ", ": "). + If None, the default separators will be used: (",", ": ") when the indent is not None, + and (", ", ": ") otherwise. + + strict_mode : bool, default: True + Whether to use strict mode. In strict mode, the generated grammar will not allow + properties and items that is not specified in the schema. This is equivalent to + setting unevaluatedProperties and unevaluatedItems to false. It also disallows empty + JSON objects and arrays. + + This helps LLM to generate accurate output in the grammar-guided generation with JSON + schema. + + Returns + ------- + grammar : Grammar + The constructed grammar. + + Raises + ------ + RuntimeError + When converting the json schema fails, with details about the parsing error. + """ + schema_str = _handle_pydantic_schema(schema) + return Grammar._create_from_handle( + _core.Grammar.from_json_schema( + schema_str, any_whitespace, indent, separators, strict_mode + ), + ) + + @staticmethod + def from_regex(regex_string: str) -> "Grammar": + """Create a grammar from a regular expression string. + + Parameters + ---------- + regex_string : str + The regular expression pattern to create the grammar from. + + Returns + ------- + grammar : Grammar + The constructed grammar from the regex pattern. + + Raises + ------ + RuntimeError + When parsing the regex pattern fails, with details about the parsing error. + """ + return Grammar._create_from_handle(_core.Grammar.from_regex(regex_string)) + + @staticmethod + def from_structural_tag(tags: List[StructuralTagItem], triggers: List[str]) -> "Grammar": + """Create a grammar from structural tags. The structural tag handles the dispatching + of different grammars based on the tags and triggers: it initially allows any output, + until a trigger is encountered, then dispatch to the corresponding tag; when the end tag + is encountered, the grammar will allow any following output, until the next trigger is + encountered. + + The tags parameter is used to specify the output pattern. It is especially useful for LLM + function calling, where the pattern is: + {"arg1": ..., "arg2": ...}. + This pattern consists of three parts: a start tag (), a parameter list + according to some schema ({"arg1": ..., "arg2": ...}), and an end tag (). This + pattern can be described in a StructuralTagItem with a start tag, a schema, and an end tag. + The structural tag is able to handle multiple such patterns by passing them into multiple + tags. + + The triggers parameter is used to trigger the dispatching of different grammars. The trigger + should be a prefix of a provided start tag. When the trigger is encountered, the + corresponding tag should be used to constrain the following output. There can be multiple + tags matching the same trigger. Then if the trigger is encountered, the following output + should match one of the tags. For example, in function calling, the triggers can be + ["). + + The corrrespondence of tags and triggers is automatically determined: all tags with the + same trigger will be grouped together. User should make sure any trigger is not a prefix + of another trigger: then the corrrespondence of tags and triggers will be ambiguous. + + To use this grammar in grammar-guided generation, the GrammarMatcher constructed from + structural tag will generate a mask for each token. When the trigger is not encountered, + the mask will likely be all-1 and not have to be used (fill_next_token_bitmask returns + False, meaning no token is masked). When a trigger is encountered, the mask should be + enforced (fill_next_token_bitmask will return True, meaning some token is masked) to the + output logits. + + The benefit of this method is the token boundary between tags and triggers is automatically + handled. The user does not need to worry about the token boundary. + + Parameters + ---------- + tags : List[StructuralTagItem] + The structural tags. + + triggers : List[str] + The triggers. + + Examples + -------- + >>> class Schema1(BaseModel): + >>> arg1: str + >>> arg2: int + >>> class Schema2(BaseModel): + >>> arg3: float + >>> arg4: List[str] + >>> tags = [ + >>> StructuralTagItem(start="", schema=Schema1, end=""), + >>> StructuralTagItem(start="", schema=Schema2, end=""), + >>> ] + >>> triggers = [">> grammar = Grammar.from_structural_tag(tags, triggers) + """ + tags_tuple = [(tag.start, _handle_pydantic_schema(tag.schema_), tag.end) for tag in tags] + return Grammar._create_from_handle(_core.Grammar.from_structural_tag(tags_tuple, triggers)) + + @staticmethod + def builtin_json_grammar() -> "Grammar": + """Get the grammar of standard JSON. This is compatible with the official JSON grammar + specification in https://www.json.org/json-en.html. + + Returns + ------- + grammar : Grammar + The JSON grammar. + """ + return Grammar._create_from_handle(_core.Grammar.builtin_json_grammar()) + + @staticmethod + def concat(*grammars: "Grammar") -> "Grammar": + """Create a grammar that matches the concatenation of the grammars in the list. That is + equivalent to using the `+` operator to concatenate the grammars in the list. + + Parameters + ---------- + grammars : List[Grammar] + The grammars to create the concatenation of. + + Returns + ------- + grammar : Grammar + The concatenation of the grammars. + """ + grammar_handles = [grammar._handle for grammar in grammars] + return Grammar._create_from_handle(_core.Grammar.concat(grammar_handles)) diff --git a/.venv/lib/python3.11/site-packages/xgrammar/kernels/__init__.py b/.venv/lib/python3.11/site-packages/xgrammar/kernels/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f99dfba4ad1870a8470602deee76ce94199c5b86 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/xgrammar/kernels/__init__.py @@ -0,0 +1,4 @@ +"""The kernels for XGrammar.""" + +from .apply_token_bitmask_inplace_cpu import apply_token_bitmask_inplace_cpu +from .apply_token_bitmask_inplace_triton import apply_token_bitmask_inplace_triton diff --git a/.venv/lib/python3.11/site-packages/xgrammar/kernels/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/xgrammar/kernels/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f670a1aaf362af0f2ff6261b4630cf97f32ab4b9 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/xgrammar/kernels/__pycache__/__init__.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/xgrammar/kernels/__pycache__/apply_token_bitmask_inplace_cpu.cpython-311.pyc b/.venv/lib/python3.11/site-packages/xgrammar/kernels/__pycache__/apply_token_bitmask_inplace_cpu.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c64165ca6a1b79022df590af0cfb9ff8fd0362b8 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/xgrammar/kernels/__pycache__/apply_token_bitmask_inplace_cpu.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/xgrammar/kernels/__pycache__/apply_token_bitmask_inplace_triton.cpython-311.pyc b/.venv/lib/python3.11/site-packages/xgrammar/kernels/__pycache__/apply_token_bitmask_inplace_triton.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..387e6a72adab3d4cf931b5a9ea29b961b8c2a8ce Binary files /dev/null and b/.venv/lib/python3.11/site-packages/xgrammar/kernels/__pycache__/apply_token_bitmask_inplace_triton.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/xgrammar/kernels/apply_token_bitmask_inplace_cpu.py b/.venv/lib/python3.11/site-packages/xgrammar/kernels/apply_token_bitmask_inplace_cpu.py new file mode 100644 index 0000000000000000000000000000000000000000..dab190af36f33ab9b785d92670dc8b0305c2305d --- /dev/null +++ b/.venv/lib/python3.11/site-packages/xgrammar/kernels/apply_token_bitmask_inplace_cpu.py @@ -0,0 +1,41 @@ +"""CPU implementation for in-place applying token mask.""" + +import time +from typing import List, Optional, Union + +import torch + +from ..base import _core + + +def apply_token_bitmask_inplace_cpu( + logits: torch.Tensor, + bitmask: torch.Tensor, + indices: Optional[Union[List[int], torch.Tensor]] = None, +) -> None: + """Apply token bitmask in-place on CPU.""" + if logits.device.type != "cpu": + raise ValueError("logits must be on CPU") + if bitmask.device.type != "cpu": + raise ValueError("bitmask must be on CPU") + if logits.dtype != torch.float32: + raise ValueError("logits must be of type float32") + if bitmask.dtype != torch.int32: + raise ValueError("bitmask must be of type int32") + if logits.dim() != 1 and logits.dim() != 2: + raise ValueError("logits should be 1D or 2D, but got {}D".format(logits.dim())) + if bitmask.dim() != 1 and bitmask.dim() != 2: + raise ValueError("bitmask should be 1D or 2D, but got {}D".format(bitmask.dim())) + + logits_shape = (1, logits.shape[0]) if logits.dim() == 1 else (logits.shape[0], logits.shape[1]) + bitmask_shape = ( + (1, bitmask.shape[0]) if bitmask.dim() == 1 else (bitmask.shape[0], bitmask.shape[1]) + ) + + _core.kernels.apply_token_bitmask_inplace_cpu( + logits.data_ptr(), + logits_shape, + bitmask.data_ptr(), + bitmask_shape, + indices, + ) diff --git a/.venv/lib/python3.11/site-packages/xgrammar/kernels/apply_token_bitmask_inplace_triton.py b/.venv/lib/python3.11/site-packages/xgrammar/kernels/apply_token_bitmask_inplace_triton.py new file mode 100644 index 0000000000000000000000000000000000000000..f91956da8de352ef3035d684d1db63aff723f893 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/xgrammar/kernels/apply_token_bitmask_inplace_triton.py @@ -0,0 +1,75 @@ +from typing import List, Optional, Union + +import torch +import triton +import triton.language as tl + + +@triton.jit +def apply_token_bitmask_inplace_kernel( + logits_ptr, + bitmask_ptr, + indices_ptr, + num_rows, + vocab_size, + bitmask_size, + NUM_SMS: tl.constexpr, + BLOCK_SIZE: tl.constexpr, +): + pid = tl.program_id(0) + num_blocks = tl.cdiv(vocab_size, BLOCK_SIZE) + for work_id in tl.range(pid, num_rows * num_blocks, NUM_SMS): + block_offset = (work_id % num_blocks) * BLOCK_SIZE + row_id = work_id // num_blocks + batch_id = tl.load(indices_ptr + row_id) + offsets = block_offset + tl.arange(0, BLOCK_SIZE) + bitmask_offsets = block_offset // 32 + tl.arange(0, BLOCK_SIZE // 32) + vocab_mask = offsets < vocab_size + packed_bitmask_mask = bitmask_offsets < bitmask_size + packed_bitmask = tl.load( + bitmask_ptr + batch_id * bitmask_size + bitmask_offsets, packed_bitmask_mask + ) + bitmask = ((packed_bitmask[:, None] >> (tl.arange(0, 32)[None, :])) & 1) == 0 + bitmask = bitmask.reshape(BLOCK_SIZE) + + tl.store(logits_ptr + batch_id * vocab_size + offsets, -float("inf"), vocab_mask & bitmask) + + +def apply_token_bitmask_inplace_triton( + logits: torch.Tensor, + bitmask: torch.Tensor, + indices: Optional[Union[List[int], torch.Tensor]] = None, +): + def ceil_div(a, b): + return (a + b - 1) // b + + NUM_SMS = torch.cuda.get_device_properties("cuda").multi_processor_count + BLOCK_SIZE = 4096 + # Check input tensor shapes. + if logits.ndim == 2: + batch_size, vocab_size = logits.shape + elif logits.ndim == 1: + batch_size = 1 + (vocab_size,) = logits.shape + else: + raise ValueError(f"Invalid logits tensor shape {logits.shape}") + + if indices is None: + indices = torch.arange(batch_size, dtype=torch.int32, device=logits.device) + elif isinstance(indices, list): + indices = torch.tensor(indices, dtype=torch.int32, device=logits.device) + + grid = lambda meta: (NUM_SMS,) + + apply_token_bitmask_inplace_kernel[grid]( + logits, + bitmask, + indices, + indices.shape[0], + vocab_size, + ceil_div(vocab_size, 32), + NUM_SMS, + BLOCK_SIZE, + num_warps=BLOCK_SIZE // 32 // (16 // logits.element_size()), + num_stages=3, + ) diff --git a/.venv/lib/python3.11/site-packages/xgrammar/matcher.py b/.venv/lib/python3.11/site-packages/xgrammar/matcher.py new file mode 100644 index 0000000000000000000000000000000000000000..64a52895c00eebfa1a512f613ff1ee6da3687c14 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/xgrammar/matcher.py @@ -0,0 +1,325 @@ +"""Match the output of the LLM to the specified grammar, then generate the mask for the next +token.""" + +import math +from typing import List, Optional, Tuple, Union + +import torch + +from .base import XGRObject, _core +from .compiler import CompiledGrammar +from .kernels import apply_token_bitmask_inplace_cpu, apply_token_bitmask_inplace_triton + +"""The dtype of the bitmask: int32.""" +bitmask_dtype = torch.int32 + + +_is_cuda_available = torch.cuda.is_available() + + +def get_bitmask_shape(batch_size: int, vocab_size: int) -> Tuple[int, int]: + """Return the shape of the bitmask: (batch_size, ceil(vocab_size / 32))""" + return (batch_size, math.ceil(vocab_size / 32)) + + +_FULL_MASK = torch.tensor(-1, dtype=bitmask_dtype) + + +def allocate_token_bitmask(batch_size: int, vocab_size: int) -> torch.Tensor: + """Allocate the bitmask for the next token prediction. The bitmask is an int32 tensor on + CPU with shape (batch_size, ceil(vocab_size / 32)). Users who have their own needs to + manage CUDA memory can construct the tensor with get_bitmask_shape and bitmask_dtype + themselves. + + The reason why we use int32 instead of uint32 is that old versions of PyTorch do not support + uint32. + + Parameters + ---------- + batch_size : int + The batch size of the bitmask. + + vocab_size : int + The size of the vocabulary. + + Returns + ------- + bitmask : torch.Tensor + The shape of the bitmask. + """ + # In CUDA, use pinned memory to speed up data transfer from CPU to GPU + return torch.full( + get_bitmask_shape(batch_size, vocab_size), + _FULL_MASK, + dtype=bitmask_dtype, + pin_memory=_is_cuda_available, + ) + + +def reset_token_bitmask(bitmask: torch.Tensor) -> None: + """Reset the bitmask to the full mask.""" + bitmask.fill_(_FULL_MASK) + + +def apply_token_bitmask_inplace( + logits: torch.Tensor, + bitmask: torch.Tensor, + *, + indices: Optional[List[int]] = None, +) -> None: + """Apply the bitmask to the logits in-place. The bitmask is a 01 bitwise compressed tensor, + where 0 means the token is masked and 1 means the token is not masked. It can be generated by + allocate_token_bitmask and filled by fill_next_token_bitmask. After applying the bitmask, the + masked logits will be set to -inf. + + The shape of logits and bitmask should be (batch_size, vocab_size) and + (batch_size, bitmask_size) respectively. bitmask_size = ceil(vocab_size / 32). The operation is: + + .. code:: python + + for i in range(batch_size): + for j in range(vocab_size): + if get_bitmask_value(bitmask, i, j) == 0: + logits[i, j] = -inf + + get_bitmask_value(bitmask, i, j) gets the j-th bit of the i-th row of the bitmask. + + Indices can be used to specify which logits in the batch to apply the bitmask to. It is + especially useful when there are structured requests and unstructured requests mixed in the + same batch by skipping masking the logits in the unstructured requests. When specified, the + operation will be + + .. code:: python + + for batch_id in indices: + for j in range(vocab_size): + if get_bitmask_value(bitmask, batch_id, j) == 0: + logits[batch_id, j] = -inf + + The logits and bitmask should be on the same device. If both them are on CUDA, we launch a CUDA + kernel to apply bitmask. If both them are on CPU, we use a CPU implementation. The CUDA kernel + is optimized and should be preferred. + + In practice, the bitmask is allocated on CPU, and the logits is usually on GPU, so users should + manually copy the bitmask to GPU before calling this function. + + Parameters + ---------- + logits : torch.Tensor + The tensor to apply the bitmask to. + + bitmask : torch.Tensor + The bitmask to apply. + + indices : Optional[List[int]], default: None + A list of indices to specify which logits in the batch to apply the bitmask to. If None, + apply the bitmask to all logits in the batch. + """ + if bitmask.device != logits.device: + raise ValueError( + "logits and bitmask should be on the same device. " + + f"But got logits.device: {logits.device}, bitmask.device: {bitmask.device}" + ) + + if logits.device.type == "cuda": + apply_token_bitmask_inplace_triton(logits, bitmask, indices) + elif logits.device.type == "cpu": + apply_token_bitmask_inplace_cpu(logits, bitmask, indices) + else: + raise ValueError("Currently, logit masking is only supported on CUDA or CPU.") + + +class GrammarMatcher(XGRObject): + """Match the output of the LLM to the specified grammar, then generate the mask for the next + token. This is the core class in the grammar-guided generation. + + This class maintains a stateful matcher that can accept tokens and strings, then match them + to the specified grammar. The matcher can provide a bitmask for the next token prediction, + so that the output of the LLM follows the specified grammar. Its state can be reset and + rolled back by tokens. It also provides utilities for jump-forward decoding. + + After matching the whole grammar, the matcher will accept a stop token. The token mask at + this time will only allow stop tokens. After accepting the stop token, the matcher will + terminate, then it cannot accept any new token or generate a new token mask, meaning the + generation is finished. + + Under the hood, it utilizes a pushdown automaton with backtracking to match the grammar, + with optimizations specific to LLM token mask generation. + + Parameters + ---------- + compiled_grammar : CompiledGrammar + The initialization context for the grammar matcher. + + override_stop_tokens : Optional[Union[int, List[int]]], default: None + If not None, the stop tokens to override the ones in the grammar. + + terminate_without_stop_token : bool, default: False + Whether to terminate the matcher without accepting a stop token. + + max_rollback_tokens : int, default: 0 + The maximum number of rollback tokens allowed. The rollback operation is useful for + jump-forward decoding and speculative decoding. + """ + + def __init__( + self, + compiled_grammar: CompiledGrammar, + *, + override_stop_tokens: Optional[Union[int, List[int]]] = None, + terminate_without_stop_token: bool = False, + max_rollback_tokens: int = 0, + ) -> None: + if not isinstance(compiled_grammar, CompiledGrammar): + raise ValueError("The grammar should be compiled before passing it to GrammarMatcher.") + + if isinstance(override_stop_tokens, int): + override_stop_tokens = [override_stop_tokens] + + self._init_handle( + _core.GrammarMatcher( + compiled_grammar._handle, + override_stop_tokens, + terminate_without_stop_token, + max_rollback_tokens, + ) + ) + + def accept_token(self, token_id: int, *, debug_print: bool = False) -> bool: + """Accept one token and update the state of the matcher. + + Parameters + ---------- + token_id : int + The id of the token to accept. + + debug_print : bool, default: False + Whether to print information about the internal state of the matcher. Helpful + for debugging. + + Returns + ------- + accepted : bool + Whether the token is accepted. + """ + return self._handle.accept_token(token_id, debug_print) + + def fill_next_token_bitmask( + self, bitmask: torch.Tensor, index: int = 0, *, debug_print: bool = False + ) -> bool: + """Fill the bitmask for the next token prediction. The input bitmask can be generated + by allocate_token_bitmask, and must be on CPU. bitmask[index] will be filled with the + next token bitmask. + + This method does not change the matcher state. + + Parameters + ---------- + bitmask : torch.Tensor + The bitmask for the next token prediction. + + index : int, default: 0 + The batch id of the bitmask. + + debug_print : bool, default: False + Whether to print information about generated bitmask. Helpful for debugging. + + Returns + ------- + need_apply : bool + Whether the bitmask need to be applied (not all-true). An optimization: if False, + this means the bitmask is already all-true, so no need to apply it. + """ + if bitmask.device.type != "cpu": + raise ValueError("bitmask should be on CPU.") + if bitmask.dtype != bitmask_dtype: + raise ValueError(f"bitmask should be of type {bitmask_dtype}.") + return self._handle.fill_next_token_bitmask( + bitmask.data_ptr(), list(bitmask.shape), index, debug_print + ) + + def find_jump_forward_string(self) -> str: + """Find the jump-forward string for jump-forward decoding. This is the longest string that + certainly conforms with the current grammar from the current matcher state. This string + can become the output of the LLM without requiring LLM decoding. + + This method does not change the matcher state. + + Returns + ------- + jump_forward_string : str + The jump-forward string. + """ + return self._handle.find_jump_forward_string() + + def rollback(self, num_tokens: int = 1) -> None: + """Rollback the matcher to a previous state by several tokens. + + Parameters + ---------- + num_tokens : int, default: 1 + The number of tokens to rollback. It cannot exceed the current number of steps, nor can + it exceed the specified maximum number of rollback tokens. + """ + self._handle.rollback(num_tokens) + + def is_terminated(self) -> bool: + """Check if the matcher has terminated. If terminate_without_stop_token is False, the + matcher will terminate if it has accepted the stop token. Otherwise, the matcher will + terminate after matching the whole grammar. + + Returns + ------- + terminated : bool + Whether the matcher has terminated. + """ + return self._handle.is_terminated() + + def reset(self) -> None: + """Reset the matcher to the initial state.""" + return self._handle.reset() + + @property + def max_rollback_tokens(self) -> int: + """Get the maximum number of rollback tokens allowed. + + Returns + ------- + max_rollback_tokens : int + The maximum number of rollback tokens. + """ + return self._handle.max_rollback_tokens + + @property + def stop_token_ids(self) -> List[int]: + """The ids of the stop tokens used in the matcher. If specified, the provided stop tokens + will be used. Otherwise, the stop tokens will be detected from the vocabulary. + + Returns + ------- + stop_token_ids : List[int] + The ids of the stop tokens. + """ + return self._handle.stop_token_ids + + def _debug_accept_string( + self, input_str: Union[str, bytes], *, debug_print: bool = False + ) -> bool: + """Accept a string and update the state of the matcher. The whole string is considered + as one step in rollback. It is only used to complement the functionality of accept_token. + + Parameters + ---------- + input_str : Union[str, bytes] + The string to be accepted. + + debug_print : bool, default: False + Whether to print information about the internal state of the matcher. Helpful for + debugging. + + Returns + ------- + accepted : bool + Whether the string is accepted. + """ + return self._handle._debug_accept_string(input_str, debug_print) diff --git a/.venv/lib/python3.11/site-packages/xgrammar/support/__init__.py b/.venv/lib/python3.11/site-packages/xgrammar/support/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.venv/lib/python3.11/site-packages/xgrammar/support/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/xgrammar/support/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..00a5f9682ebf767c69cd0d2af86f356fe2a09d7d Binary files /dev/null and b/.venv/lib/python3.11/site-packages/xgrammar/support/__pycache__/__init__.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/xgrammar/support/__pycache__/logging.cpython-311.pyc b/.venv/lib/python3.11/site-packages/xgrammar/support/__pycache__/logging.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1212aabfad7c05df7bc2082e24ac434a3b04f3d6 Binary files /dev/null and b/.venv/lib/python3.11/site-packages/xgrammar/support/__pycache__/logging.cpython-311.pyc differ diff --git a/.venv/lib/python3.11/site-packages/xgrammar/support/logging.py b/.venv/lib/python3.11/site-packages/xgrammar/support/logging.py new file mode 100644 index 0000000000000000000000000000000000000000..7059b122c05f0dc1bc2d185cdc01eb2daebda744 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/xgrammar/support/logging.py @@ -0,0 +1,21 @@ +""" +Logging support for XGrammar. It derives from Python's logging module, and in the future, +it can be easily replaced by other logging modules such as structlog. +""" + +import logging + + +def enable_logging(): + """Enable XGrammar's default logging formpat""" + logging.basicConfig( + level=logging.INFO, + style="{", + datefmt="%Y-%m-%d %H:%M:%S", + format="[{asctime}] {levelname} {filename}:{lineno}: {message}", + ) + + +def getLogger(name: str): # pylint: disable=invalid-name + """Get a logger according to the given name""" + return logging.getLogger(name) diff --git a/.venv/lib/python3.11/site-packages/xgrammar/testing.py b/.venv/lib/python3.11/site-packages/xgrammar/testing.py new file mode 100644 index 0000000000000000000000000000000000000000..e451f74f97403abdfbc6d1806af6bcee4e692e7e --- /dev/null +++ b/.venv/lib/python3.11/site-packages/xgrammar/testing.py @@ -0,0 +1,208 @@ +"""Testing utilities.""" + +import time +from typing import List, Optional, Tuple, Union + +import torch + +from .base import _core +from .compiler import CompiledGrammar, GrammarCompiler +from .grammar import Grammar +from .matcher import GrammarMatcher, bitmask_dtype, get_bitmask_shape +from .tokenizer_info import TokenizerInfo + + +def _json_schema_to_ebnf( + schema: str, + *, + any_whitespace: bool = True, + indent: Optional[int] = None, + separators: Optional[Tuple[str, str]] = None, + strict_mode: bool = True, +) -> str: + """Convert JSON schema string to BNF grammar string. For test purposes. + + Parameters + ---------- + schema : str + The schema string. + + indent : Optional[int], default: None + The number of spaces for indentation. If None, the output will be in one line. + + separators : Optional[Tuple[str, str]], default: None + Two separators used in the schema: comma and colon. Examples: (",", ":"), (", ", ": "). + If None, the default separators will be used: (",", ": ") when the indent is not None, + and (", ", ": ") otherwise. + + strict_mode : bool, default: True + Whether to use strict mode. In strict mode, the generated grammar will not allow + properties and items that is not specified in the schema. This is equivalent to + setting unevaluatedProperties and unevaluatedItems to false. + + This helps LLM to generate accurate output in the grammar-guided generation with JSON + schema. + + Returns + ------- + bnf_string : str + The BNF grammar string. + """ + return _core.testing._json_schema_to_ebnf( + schema, + any_whitespace, + indent, + separators, + strict_mode, + ) + + +def _regex_to_ebnf(regex: str, with_rule_name: bool = True) -> str: + r"""Convert a regex string to BNF grammar string. For test purposes. The regex grammar + follows the syntax in JavaScript (ECMA 262). Check + https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Regular_expressions + for a tutorial. Currently the following features are not supported: + 1. Backreference (\1) + 2. non-capturing group, naming capture groups and assertions ((?...)) + 3. Unicode character class escape (\p{...}) + 4. Word boundary (\b) + 5. Unicode property escapes (\p{...}) + 6. Quantifier with range {x,y}. Now user can just repeat the element as a workaround. + + This method is primarily intended for testing and debugging purposes. + + Parameters + ---------- + regex : str + The regex string to be converted. + + Returns + ------- + bnf_string : str + The BNF grammar string converted from the input regex. + """ + return _core.testing._regex_to_ebnf(regex, with_rule_name) + + +def _is_grammar_accept_string( + grammar: Union[Grammar, str], + input_str: str, + *, + debug_print: bool = False, + print_time: bool = False, +) -> bool: + """Check if a grammar accepts a string. For test purposes. + + Parameters + ---------- + grammar : Union[Grammar, str] + The grammar to check. Can be either a Grammar object or a BNF grammar string. + input_str : str + The input string to check. + debug_print : bool, default: False + Whether to print debug information during matching. + print_time : bool, default: False + Whether to print timing information. + + Returns + ------- + bool + True if the grammar accepts the string, False otherwise. + """ + + if isinstance(grammar, str): + grammar = Grammar.from_ebnf(grammar) + grammar_compiler = GrammarCompiler(TokenizerInfo([]), cache_enabled=False) + compiled_grammar = grammar_compiler.compile_grammar(grammar) + matcher = GrammarMatcher(compiled_grammar, terminate_without_stop_token=True) + + if print_time: + start = time.monotonic_ns() + accepted = matcher._debug_accept_string(input_str, debug_print=debug_print) + + if print_time: + end = time.monotonic_ns() + print(f"Accepting {input_str}, result: {accepted}, time: {(end - start) / 1e3} us") + + if not accepted: + return False + return matcher.is_terminated() + + +def _get_masked_tokens_from_bitmask( + bitmask: torch.Tensor, vocab_size: int, index: int = 0 +) -> List[int]: + """Get the ids of the rejected tokens from the bitmask. Mainly for debug purposes. + + Parameters + ---------- + bitmask : torch.Tensor + The rejected token bitmask. Should be generated by allocate_token_bitmask and + filled by fill_next_token_bitmask. Should be on CPU. + + index : int, default: 0 + The batch index of the bitmask. For batch inference, bitmask[index] will be used. + Otherwise is ignored. + + Returns + ------- + rejected_token_ids : List[int] + A list of rejected token ids. + """ + if bitmask.device.type != "cpu": + raise ValueError("bitmask should be on CPU.") + if bitmask.dtype != bitmask_dtype: + raise ValueError(f"bitmask should be of type {bitmask_dtype}.") + return _core.testing._get_masked_tokens_from_bitmask( + bitmask.data_ptr(), list(bitmask.shape), vocab_size, index + ) + + +def _get_matcher_from_grammar_and_tokenizer_info( + grammar: Union[Grammar, str], tokenizer_info: Optional[TokenizerInfo] = None, **kwargs +) -> GrammarMatcher: + """Create a GrammarMatcher from a grammar and tokenizer info. + + Parameters + ---------- + grammar : Union[Grammar, str] + The grammar to create the matcher from. Can be either a Grammar object or a string + containing EBNF grammar. + tokenizer_info : Optional[TokenizerInfo], default: None + Information about the tokenizer to use with this grammar. If None, an empty + TokenizerInfo will be created. + **kwargs + Additional keyword arguments to pass to the GrammarMatcher constructor. + + Returns + ------- + matcher : GrammarMatcher + The created grammar matcher. + """ + if tokenizer_info is None: + tokenizer_info = TokenizerInfo([]) + grammar_compiler = GrammarCompiler(tokenizer_info, cache_enabled=False) + compiled_grammar = grammar_compiler.compile_grammar(grammar) + return GrammarMatcher(compiled_grammar, **kwargs) + + +def _get_grammar_union(*grammars: "Grammar") -> "Grammar": + """Create a grammar that matches any of the grammars in the list. That is equivalent to + using the `|` operator to concatenate the grammars in the list. + + Parameters + ---------- + grammars : List[Grammar] + The grammars to create the union of. + + Returns + ------- + grammar : Grammar + The union of the grammars. + """ + grammar_handles = [grammar._handle for grammar in grammars] + return Grammar._create_from_handle(_core.Grammar.union(grammar_handles)) + + +def _get_allow_empty_rule_ids(compiled_grammar: CompiledGrammar) -> List[int]: + return _core.testing._get_allow_empty_rule_ids(compiled_grammar._handle) diff --git a/.venv/lib/python3.11/site-packages/xgrammar/tokenizer_info.py b/.venv/lib/python3.11/site-packages/xgrammar/tokenizer_info.py new file mode 100644 index 0000000000000000000000000000000000000000..75a00d085080cc7d31086ffff2293eb440e020cf --- /dev/null +++ b/.venv/lib/python3.11/site-packages/xgrammar/tokenizer_info.py @@ -0,0 +1,333 @@ +"""This module provides the tokenizer info class to handle the tokenizer information.""" + +from enum import Enum +from typing import List, Optional, Union + +import sentencepiece +import tiktoken +from transformers import PreTrainedTokenizerBase, PreTrainedTokenizerFast + +from .base import XGRObject, _core +from .support import logging + +logging.enable_logging() +logger = logging.getLogger(__name__) + + +class VocabType(Enum): + """The type of the vocabulary. Used in TokenizerInfo. XGrammar supports three types of + vocabularies: + + RAW + The vocabulary is in the raw format. The tokens in the vocabulary are kept in their + original form without any processing. This kind of tokenizer includes the tiktoken + tokenizer, e.g. microsoft/Phi-3-small-8k-instruct, Qwen/Qwen-7B-Chat, etc. + + BYTE_FALLBACK + The vocabulary used in the byte fallback BPE tokenizer. The tokens are encoded through + the byte-fallback conversion. E.g. "\u001B" -> "<0x1B>", " apple" -> "▁apple". This kind of + tokenizer includes meta-llama/Llama-2-7b-chat, microsoft/Phi-3.5-mini-instruct, etc. + + BYTE_LEVEL + The vocabulary used in the byte level BPE tokenizer. The tokens are encoded through + the byte-to-unicode conversion, as in + https://github.com/huggingface/transformers/blob/87be06ca77166e6a6215eee5a990ab9f07238a18/src/transformers/models/gpt2/tokenization_gpt2.py#L38-L59 + + This kind of tokenizer includes meta-llama/Meta-Llama-3-8B-Instruct, + meta-llama/Meta-Llama-3.1-8B-Instruct, etc. + """ + + RAW = "RAW" + BYTE_FALLBACK = "BYTE_FALLBACK" + BYTE_LEVEL = "BYTE_LEVEL" + + +class TokenizerInfo(XGRObject): + """The tokenizer info contains the vocabulary, the type of the vocabulary, and necessary + information for the grammar-guided generation. + + Note that although some tokenizers will encode the tokens in a special format, e.g. + "<0x1B>" for "\u001B" in the ByteFallback tokenizer, and "Ġ" for " " in the Byte-Level BPE + tokenizer, TokenizerInfo always decodes the vocabulary to the original format (e.g. "\u001B" + and " "). + + Also note that some models (e.g. Phi-3 and Deepseek-V2) may pad the vocabulary to a multiple + of 32. In this case, the model's vocab_size is larger than the tokenizer's vocabulary size. + Please pass the model's vocab_size to the vocab_size parameter in the constructor, because + this information is used to determine the size of the token mask. + + Parameters + ---------- + encoded_vocab : Union[List[bytes], List[str]] + The encoded vocabulary of the tokenizer. + + vocab_type : VocabType, default: VocabType.RAW + The type of the vocabulary. See also VocabType. + + vocab_size : Optional[int], default: None + The size of the vocabulary. If not provided, the vocabulary size will be len(encoded_vocab). + + stop_token_ids : Optional[List[int]], default: None + The stop token ids. If not provided, the stop token ids will be auto detected (but may not + be correct). + + prepend_space_in_tokenization : bool, default: False + Whether the tokenizer will prepend a space before the text in the tokenization process. + """ + + def __init__( + self, + encoded_vocab: Union[List[bytes], List[str]], + vocab_type: VocabType = VocabType.RAW, + *, + vocab_size: Optional[int] = None, + stop_token_ids: Optional[Union[List[int], int]] = None, + prepend_space_in_tokenization: bool = False, + ) -> None: + if isinstance(stop_token_ids, int): + stop_token_ids = [stop_token_ids] + self._init_handle( + _core.TokenizerInfo( + encoded_vocab, + vocab_type.value, + vocab_size, + stop_token_ids, + prepend_space_in_tokenization, + ) + ) + + @staticmethod + def _is_tiktoken_tokenizer(tokenizer: PreTrainedTokenizerBase) -> bool: + # helper to check if tokenizer is a tiktoken tokenizer + has_tiktoken_encoding = hasattr(tokenizer, "tokenizer") and isinstance( + tokenizer.tokenizer, tiktoken.Encoding + ) + + filename_pattern = ( + "vocab_file" in tokenizer.vocab_files_names + and "tiktoken" in tokenizer.vocab_files_names["vocab_file"] + ) + + return has_tiktoken_encoding or filename_pattern + + @staticmethod + def _is_sentencepiece_tokenizer(tokenizer: PreTrainedTokenizerBase) -> bool: + # helper to check if tokenizer is a sentence piece tokenizer + has_sp_model_attr = hasattr(tokenizer, "sp_model") and isinstance( + tokenizer.sp_model, sentencepiece.SentencePieceProcessor + ) + + has_nested_sp_model_attr = ( + hasattr(tokenizer, "tokenizer") + and hasattr(tokenizer.tokenizer, "sp_model") + and isinstance(tokenizer.tokenizer.sp_model, sentencepiece.SentencePieceProcessor) + ) + + return has_sp_model_attr or has_nested_sp_model_attr + + @staticmethod + def from_huggingface( + tokenizer: PreTrainedTokenizerBase, + *, + vocab_size: Optional[int] = None, + stop_token_ids: Optional[Union[List[int], int]] = None, + ) -> "TokenizerInfo": + """Construct the tokenizer info from the huggingface tokenizer. This constructor supports + various tokenizer backends, including the huggingface fast tokenizer and tiktoken tokenizer. + Necessary information is automatically detected from the tokenizer. + + Note that some models (e.g. Phi-3 and Deepseek-V2) may pad the vocabulary to a multiple + of 32. In this case, the model's vocab_size is larger than the tokenizer's vocabulary + size. Please pass the model's vocab_size (this should be defined in the model config) + to the vocab_size parameter in the constructor, because this information is used to + determine the size of the token mask. + + Some models can have more than one stop token ids, and auto detection may not find all + of them. In this case, you can specify the stop token ids manually. + + Parameters + ---------- + tokenizer : PreTrainedTokenizerBase + The huggingface tokenizer. + + vocab_size : Optional[int], default: None + The size of the vocabulary. If not provided, the vocabulary size will be + len(encoded_vocab). + + stop_token_ids : Optional[List[int]], default: None + The stop token ids. If not provided, the stop token ids will be auto detected + (but may not be correct). + + Returns + ------- + tokenizer_info : TokenizerInfo + The tokenizer info. + """ + + if isinstance(stop_token_ids, int): + stop_token_ids = [stop_token_ids] + if isinstance(stop_token_ids, list) and len(stop_token_ids) == 0: + raise ValueError("stop_token_ids cannot be empty") + + try: + vocab_dict = tokenizer.get_vocab() + except AttributeError as e: + msg = ( + f"Cannot get the vocabulary of the tokenizer {type(tokenizer)}. The tokenizer " + "should have a get_vocab method." + ) + raise ValueError(msg) from e + + max_id = max(vocab_dict.values()) if vocab_dict else -1 + detected_vocab_size = max(len(vocab_dict), max_id + 1) + if vocab_size is None: + vocab_size = detected_vocab_size + else: + if vocab_size < detected_vocab_size: + msg = f"Input vocab_size less than minimum viable vocab size for tokenizer {type(tokenizer)}." + raise ValueError(msg) + + # maintain tokenizer's indexing + encoded_vocab = ["" for _ in range(vocab_size)] + for token, idx in vocab_dict.items(): + encoded_vocab[idx] = token + + if isinstance(tokenizer, PreTrainedTokenizerFast): + # huggingface fast tokenizer + # - the vocabulary is directly obtained from tokenizer.get_vocab() + # (tokenizer.backend_tokenizer.to_str() may not contain the full vocab, special + # tokens may be omitted) + # - the vocab size is obtained from len(tokenizer.get_vocab()) or provided by user + # - the vocab type and prepend_space_in_tokenization are obtained from + # tokenizer.backend_tokenizer.to_str() + # - stop token id is provided by user, or auto detected. + backend_str = tokenizer.backend_tokenizer.to_str() + if stop_token_ids is None: + if hasattr(tokenizer, "eos_token_id") and tokenizer.eos_token_id is not None: + stop_token_ids = [tokenizer.eos_token_id] + else: + logger.warning( + "When constructing TokenizerInfo from a huggingface tokenizer, " + "stop_token_ids is neither provided by user nor found from the tokenizer. " + "It will be automatically detected." + ) + return TokenizerInfo._create_from_handle( + _core.TokenizerInfo.from_huggingface( + encoded_vocab, backend_str, vocab_size, stop_token_ids + ) + ) + elif TokenizerInfo._is_tiktoken_tokenizer(tokenizer): + # tiktoken tokenizer + # e.g. Phi-3-small-8k-instruct, Qwen-7B-Chat, stablelm-2-12b-chat (previously) + if stop_token_ids is None: + if hasattr(tokenizer, "eos_token_id") and tokenizer.eos_token_id is not None: + stop_token_ids = [tokenizer.eos_token_id] + else: + logger.warning( + "When constructing TokenizerInfo from a huggingface tokenizer, " + "stop_token_ids is neither provided by user nor found from the tokenizer. " + "It will be automatically detected." + ) + return TokenizerInfo( + encoded_vocab, + VocabType.RAW, + vocab_size=vocab_size, + stop_token_ids=stop_token_ids, + prepend_space_in_tokenization=False, + ) + elif TokenizerInfo._is_sentencepiece_tokenizer(tokenizer): + # sentencepiece tokenizer + # e.g. Chatglm3-6b + if hasattr(tokenizer, "sp_model"): + sp_model = tokenizer.sp_model + elif hasattr(tokenizer, "tokenizer") and hasattr(tokenizer.tokenizer, "sp_model"): + sp_model = tokenizer.tokenizer.sp_model + + if stop_token_ids is None: + if hasattr(tokenizer, "eos_token_id") and tokenizer.eos_token_id is not None: + stop_token_ids = [tokenizer.eos_token_id] + else: + eos_id = sp_model.eos_id() + if eos_id != -1: + stop_token_ids = [eos_id] + else: + logger.warning( + "When constructing TokenizerInfo from a huggingface tokenizer, " + "stop_token_ids is neither provided by user nor found from the tokenizer. " + "It will be automatically detected." + ) + # detect vocab_type of tokenizer + if "<0x0A>" in vocab_dict: + vocab_type = VocabType.BYTE_FALLBACK + else: + vocab_type = VocabType.RAW + + return TokenizerInfo( + encoded_vocab, + vocab_type=vocab_type, + vocab_size=vocab_size, + stop_token_ids=stop_token_ids, + prepend_space_in_tokenization=True, + ) + else: + # TODO(yixin): unsupported tokenizer + raise ValueError(f"Unsupported tokenizer type: {type(tokenizer)}") + + @property + def vocab_type(self) -> VocabType: + """The type of the vocabulary.""" + return VocabType(self._handle.vocab_type) + + @property + def vocab_size(self) -> int: + """The size of the vocabulary.""" + return self._handle.vocab_size + + @property + def prepend_space_in_tokenization(self) -> bool: + """Whether the tokenizer will prepend a space before the text in the tokenization + process.""" + return self._handle.prepend_space_in_tokenization + + @property + def decoded_vocab(self) -> List[bytes]: + """The decoded vocabulary of the tokenizer. This converts the tokens in the LLM's + vocabulary back to the original format of the input text. E.g. for type ByteFallback, + the token <0x1B> is converted back to "\u001B". + """ + return self._handle.decoded_vocab + + @property + def stop_token_ids(self) -> List[int]: + """The stop token ids.""" + return self._handle.stop_token_ids + + @property + def special_token_ids(self) -> List[int]: + """The special token ids. Special tokens include control tokens, reserved tokens, + padded tokens, etc. Now it is automatically detected from the vocabulary.""" + return self._handle.special_token_ids + + def dump_metadata(self) -> str: + """Dump the metadata of the tokenizer to a json string. It can be used to construct the + tokenizer info from the vocabulary and the metadata string.""" + return self._handle.dump_metadata() + + @staticmethod + def from_vocab_and_metadata( + encoded_vocab: List[Union[bytes, str]], metadata: str + ) -> "TokenizerInfo": + """Construct the tokenizer info from the vocabulary and the metadata string in json + format. + + Parameters + ---------- + encoded_vocab : List[Union[bytes, str]] + The encoded vocabulary of the tokenizer. + + metadata : str + The metadata string in json format. + """ + return TokenizerInfo._create_from_handle( + _core.TokenizerInfo.from_vocab_and_metadata(encoded_vocab, metadata), + ) diff --git a/.venv/lib/python3.11/site-packages/xgrammar/version.py b/.venv/lib/python3.11/site-packages/xgrammar/version.py new file mode 100644 index 0000000000000000000000000000000000000000..3f015a954279f39f1ea0670cc32493297e311359 --- /dev/null +++ b/.venv/lib/python3.11/site-packages/xgrammar/version.py @@ -0,0 +1,144 @@ +# pylint: disable=missing-docstring +import argparse +import logging +import os +import subprocess + +# Modify the following value during release +# --------------------------------------------------- +# Current version: +# We use the version of the incoming release for code +# that is under development. +# +# It is also fallback version to be used when --git-describe +# is not invoked, or when the repository does not present the +# git tags in a format that this script can use. +# +# Two tag formats are supported: +# - vMAJ.MIN.PATCH (e.g. v0.8.0) or +# - vMAJ.MIN.devN (e.g. v0.8.dev0) + +# --------------------------------------------------- + +__version__ = "0.1.11" +PROJ_ROOT = os.path.dirname(os.path.abspath(os.path.expanduser(__file__))) + + +def py_str(cstr): + return cstr.decode("utf-8") + + +def git_describe_version(): + """Get PEP-440 compatible public and local version using git describe. + + Returns + ------- + pub_ver: str + Public version. + + local_ver: str + Local version (with additional label appended to pub_ver). + + Notes + ----- + - We follow PEP 440's convention of public version + and local versions. + - Only tags conforming to vMAJOR.MINOR.REV (e.g. "v0.7.0") + are considered in order to generate the version string. + See the use of `--match` in the `git` command below. + + Here are some examples: + + - pub_ver = '0.7.0', local_ver = '0.7.0': + We are at the 0.7.0 release. + - pub_ver = '0.8.dev94', local_ver = '0.8.dev94+g0d07a329e': + We are at the 0.8 development cycle. + The current source contains 94 additional commits + after the most recent tag(v0.7.0), + the git short hash tag of the current commit is 0d07a329e. + """ + cmd = [ + "git", + "describe", + "--tags", + "--match", + "v[0-9]*.[0-9]*.[0-9]*", + "--match", + "v[0-9]*.[0-9]*.dev[0-9]*", + ] + with subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + cwd=PROJ_ROOT, + ) as proc: + (out, _) = proc.communicate() + + if proc.returncode != 0: + msg = py_str(out) + logging.warning("git describe: %s", msg) + return None, None + describe = py_str(out).strip() + arr_info = describe.split("-") + + # Remove the v prefix, mainly to be robust + # to the case where v is not presented as well. + if arr_info[0].startswith("v"): + arr_info[0] = arr_info[0][1:] + + # hit the exact tag + if len(arr_info) == 1: + return arr_info[0], arr_info[0] + + if len(arr_info) != 3: + logging.warning("Invalid output from git describe %s", describe) + return None, None + + dev_pos = arr_info[0].find(".dev") + + # Development versions: + # The code will reach this point in case it can't match a full release version, such as v0.7.0. + # + # 1. in case the last known label looks like vMAJ.MIN.devN e.g. v0.8.dev0, we use + # the current behavior of just using vMAJ.MIN.devNNNN+gGIT_REV + if dev_pos != -1: + dev_version = arr_info[0][: arr_info[0].find(".dev")] + # 2. in case the last known label looks like vMAJ.MIN.PATCH e.g. v0.8.0 + # then we just carry on with a similar version to what git describe provides, which is + # vMAJ.MIN.PATCH.devNNNN+gGIT_REV + else: + dev_version = arr_info[0] + + pub_ver = f"{dev_version}.dev{arr_info[1]}" + local_ver = f"{pub_ver}+{arr_info[2]}" + return pub_ver, local_ver + + +def main(): + logging.basicConfig(level=logging.INFO) + parser = argparse.ArgumentParser(description="Detect and synchronize version.") + parser.add_argument( + "--print-version", + action="store_true", + help="Print version to the command line. No changes is applied to files.", + ) + parser.add_argument( + "--git-describe", + action="store_true", + help="Use git describe to generate development version.", + ) + parser.add_argument("--dry-run", action="store_true") + opt = parser.parse_args() + pub_ver, local_ver = None, None + if opt.git_describe: + pub_ver, local_ver = git_describe_version() + if pub_ver is None: + pub_ver = __version__ + if local_ver is None: + local_ver = __version__ + if opt.print_version: + print(local_ver) + + +if __name__ == "__main__": + main()