diff --git a/.venv/lib/python3.11/site-packages/distro/__init__.py b/.venv/lib/python3.11/site-packages/distro/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..7686fe85a7cc94188da76bfb1c10ad2a10821256
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/distro/__init__.py
@@ -0,0 +1,54 @@
+from .distro import (
+    NORMALIZED_DISTRO_ID,
+    NORMALIZED_LSB_ID,
+    NORMALIZED_OS_ID,
+    LinuxDistribution,
+    __version__,
+    build_number,
+    codename,
+    distro_release_attr,
+    distro_release_info,
+    id,
+    info,
+    like,
+    linux_distribution,
+    lsb_release_attr,
+    lsb_release_info,
+    major_version,
+    minor_version,
+    name,
+    os_release_attr,
+    os_release_info,
+    uname_attr,
+    uname_info,
+    version,
+    version_parts,
+)
+
+__all__ = [
+    "NORMALIZED_DISTRO_ID",
+    "NORMALIZED_LSB_ID",
+    "NORMALIZED_OS_ID",
+    "LinuxDistribution",
+    "build_number",
+    "codename",
+    "distro_release_attr",
+    "distro_release_info",
+    "id",
+    "info",
+    "like",
+    "linux_distribution",
+    "lsb_release_attr",
+    "lsb_release_info",
+    "major_version",
+    "minor_version",
+    "name",
+    "os_release_attr",
+    "os_release_info",
+    "uname_attr",
+    "uname_info",
+    "version",
+    "version_parts",
+]
+
+__version__ = __version__
diff --git a/.venv/lib/python3.11/site-packages/distro/__main__.py b/.venv/lib/python3.11/site-packages/distro/__main__.py
new file mode 100644
index 0000000000000000000000000000000000000000..0c01d5b08b6b44379b931d54d7fcf5221fdc9fde
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/distro/__main__.py
@@ -0,0 +1,4 @@
+from .distro import main
+
+if __name__ == "__main__":
+    main()
diff --git a/.venv/lib/python3.11/site-packages/distro/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/distro/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ab6b0a54035235c94332c49cab59ac83f05db790
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/distro/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/distro/__pycache__/__main__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/distro/__pycache__/__main__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3c6227d94f723a900bfa1fc7ad3f380cc30a61ed
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/distro/__pycache__/__main__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/distro/__pycache__/distro.cpython-311.pyc b/.venv/lib/python3.11/site-packages/distro/__pycache__/distro.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..46bb1e00bb3d55b876ed7d3c6bcbe7dcbbe2c5b7
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/distro/__pycache__/distro.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/distro/distro.py b/.venv/lib/python3.11/site-packages/distro/distro.py
new file mode 100644
index 0000000000000000000000000000000000000000..78ccdfa402ac29a8ef8aaddf7b527a0efb568d43
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/distro/distro.py
@@ -0,0 +1,1403 @@
+#!/usr/bin/env python
+# Copyright 2015-2021 Nir Cohen
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+The ``distro`` package (``distro`` stands for Linux Distribution) provides
+information about the Linux distribution it runs on, such as a reliable
+machine-readable distro ID, or version information.
+
+It is the recommended replacement for Python's original
+:py:func:`platform.linux_distribution` function, but it provides much more
+functionality. An alternative implementation became necessary because Python
+3.5 deprecated this function, and Python 3.8 removed it altogether. Its
+predecessor function :py:func:`platform.dist` was already deprecated since
+Python 2.6 and removed in Python 3.8. Still, there are many cases in which
+access to OS distribution information is needed. See `Python issue 1322
+<https://bugs.python.org/issue1322>`_ for more information.
+"""
+
+import argparse
+import json
+import logging
+import os
+import re
+import shlex
+import subprocess
+import sys
+import warnings
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    Iterable,
+    Optional,
+    Sequence,
+    TextIO,
+    Tuple,
+    Type,
+)
+
+try:
+    from typing import TypedDict
+except ImportError:
+    # Python 3.7
+    TypedDict = dict
+
+__version__ = "1.9.0"
+
+
+class VersionDict(TypedDict):
+    major: str
+    minor: str
+    build_number: str
+
+
+class InfoDict(TypedDict):
+    id: str
+    version: str
+    version_parts: VersionDict
+    like: str
+    codename: str
+
+
+_UNIXCONFDIR = os.environ.get("UNIXCONFDIR", "/etc")
+_UNIXUSRLIBDIR = os.environ.get("UNIXUSRLIBDIR", "/usr/lib")
+_OS_RELEASE_BASENAME = "os-release"
+
+#: Translation table for normalizing the "ID" attribute defined in os-release
+#: files, for use by the :func:`distro.id` method.
+#:
+#: * Key: Value as defined in the os-release file, translated to lower case,
+#:   with blanks translated to underscores.
+#:
+#: * Value: Normalized value.
+NORMALIZED_OS_ID = {
+    "ol": "oracle",  # Oracle Linux
+    "opensuse-leap": "opensuse",  # Newer versions of OpenSuSE report as opensuse-leap
+}
+
+#: Translation table for normalizing the "Distributor ID" attribute returned by
+#: the lsb_release command, for use by the :func:`distro.id` method.
+#:
+#: * Key: Value as returned by the lsb_release command, translated to lower
+#:   case, with blanks translated to underscores.
+#:
+#: * Value: Normalized value.
+NORMALIZED_LSB_ID = {
+    "enterpriseenterpriseas": "oracle",  # Oracle Enterprise Linux 4
+    "enterpriseenterpriseserver": "oracle",  # Oracle Linux 5
+    "redhatenterpriseworkstation": "rhel",  # RHEL 6, 7 Workstation
+    "redhatenterpriseserver": "rhel",  # RHEL 6, 7 Server
+    "redhatenterprisecomputenode": "rhel",  # RHEL 6 ComputeNode
+}
+
+#: Translation table for normalizing the distro ID derived from the file name
+#: of distro release files, for use by the :func:`distro.id` method.
+#:
+#: * Key: Value as derived from the file name of a distro release file,
+#:   translated to lower case, with blanks translated to underscores.
+#:
+#: * Value: Normalized value.
+NORMALIZED_DISTRO_ID = {
+    "redhat": "rhel",  # RHEL 6.x, 7.x
+}
+
+# Pattern for content of distro release file (reversed)
+_DISTRO_RELEASE_CONTENT_REVERSED_PATTERN = re.compile(
+    r"(?:[^)]*\)(.*)\()? *(?:STL )?([\d.+\-a-z]*\d) *(?:esaeler *)?(.+)"
+)
+
+# Pattern for base file name of distro release file
+_DISTRO_RELEASE_BASENAME_PATTERN = re.compile(r"(\w+)[-_](release|version)$")
+
+# Base file names to be looked up for if _UNIXCONFDIR is not readable.
+_DISTRO_RELEASE_BASENAMES = [
+    "SuSE-release",
+    "altlinux-release",
+    "arch-release",
+    "base-release",
+    "centos-release",
+    "fedora-release",
+    "gentoo-release",
+    "mageia-release",
+    "mandrake-release",
+    "mandriva-release",
+    "mandrivalinux-release",
+    "manjaro-release",
+    "oracle-release",
+    "redhat-release",
+    "rocky-release",
+    "sl-release",
+    "slackware-version",
+]
+
+# Base file names to be ignored when searching for distro release file
+_DISTRO_RELEASE_IGNORE_BASENAMES = (
+    "debian_version",
+    "lsb-release",
+    "oem-release",
+    _OS_RELEASE_BASENAME,
+    "system-release",
+    "plesk-release",
+    "iredmail-release",
+    "board-release",
+    "ec2_version",
+)
+
+
+def linux_distribution(full_distribution_name: bool = True) -> Tuple[str, str, str]:
+    """
+    .. deprecated:: 1.6.0
+
+        :func:`distro.linux_distribution()` is deprecated. It should only be
+        used as a compatibility shim with Python's
+        :py:func:`platform.linux_distribution()`. Please use :func:`distro.id`,
+        :func:`distro.version` and :func:`distro.name` instead.
+
+    Return information about the current OS distribution as a tuple
+    ``(id_name, version, codename)`` with items as follows:
+
+    * ``id_name``:  If *full_distribution_name* is false, the result of
+      :func:`distro.id`. Otherwise, the result of :func:`distro.name`.
+
+    * ``version``:  The result of :func:`distro.version`.
+
+    * ``codename``:  The extra item (usually in parentheses) after the
+      os-release version number, or the result of :func:`distro.codename`.
+
+    The interface of this function is compatible with the original
+    :py:func:`platform.linux_distribution` function, supporting a subset of
+    its parameters.
+
+    The data it returns may not exactly be the same, because it uses more data
+    sources than the original function, and that may lead to different data if
+    the OS distribution is not consistent across multiple data sources it
+    provides (there are indeed such distributions ...).
+
+    Another reason for differences is the fact that the :func:`distro.id`
+    method normalizes the distro ID string to a reliable machine-readable value
+    for a number of popular OS distributions.
+    """
+    warnings.warn(
+        "distro.linux_distribution() is deprecated. It should only be used as a "
+        "compatibility shim with Python's platform.linux_distribution(). Please use "
+        "distro.id(), distro.version() and distro.name() instead.",
+        DeprecationWarning,
+        stacklevel=2,
+    )
+    return _distro.linux_distribution(full_distribution_name)
+
+
+def id() -> str:
+    """
+    Return the distro ID of the current distribution, as a
+    machine-readable string.
+
+    For a number of OS distributions, the returned distro ID value is
+    *reliable*, in the sense that it is documented and that it does not change
+    across releases of the distribution.
+
+    This package maintains the following reliable distro ID values:
+
+    ==============  =========================================
+    Distro ID       Distribution
+    ==============  =========================================
+    "ubuntu"        Ubuntu
+    "debian"        Debian
+    "rhel"          RedHat Enterprise Linux
+    "centos"        CentOS
+    "fedora"        Fedora
+    "sles"          SUSE Linux Enterprise Server
+    "opensuse"      openSUSE
+    "amzn"          Amazon Linux
+    "arch"          Arch Linux
+    "buildroot"     Buildroot
+    "cloudlinux"    CloudLinux OS
+    "exherbo"       Exherbo Linux
+    "gentoo"        GenToo Linux
+    "ibm_powerkvm"  IBM PowerKVM
+    "kvmibm"        KVM for IBM z Systems
+    "linuxmint"     Linux Mint
+    "mageia"        Mageia
+    "mandriva"      Mandriva Linux
+    "parallels"     Parallels
+    "pidora"        Pidora
+    "raspbian"      Raspbian
+    "oracle"        Oracle Linux (and Oracle Enterprise Linux)
+    "scientific"    Scientific Linux
+    "slackware"     Slackware
+    "xenserver"     XenServer
+    "openbsd"       OpenBSD
+    "netbsd"        NetBSD
+    "freebsd"       FreeBSD
+    "midnightbsd"   MidnightBSD
+    "rocky"         Rocky Linux
+    "aix"           AIX
+    "guix"          Guix System
+    "altlinux"      ALT Linux
+    ==============  =========================================
+
+    If you have a need to get distros for reliable IDs added into this set,
+    or if you find that the :func:`distro.id` function returns a different
+    distro ID for one of the listed distros, please create an issue in the
+    `distro issue tracker`_.
+
+    **Lookup hierarchy and transformations:**
+
+    First, the ID is obtained from the following sources, in the specified
+    order. The first available and non-empty value is used:
+
+    * the value of the "ID" attribute of the os-release file,
+
+    * the value of the "Distributor ID" attribute returned by the lsb_release
+      command,
+
+    * the first part of the file name of the distro release file,
+
+    The so determined ID value then passes the following transformations,
+    before it is returned by this method:
+
+    * it is translated to lower case,
+
+    * blanks (which should not be there anyway) are translated to underscores,
+
+    * a normalization of the ID is performed, based upon
+      `normalization tables`_. The purpose of this normalization is to ensure
+      that the ID is as reliable as possible, even across incompatible changes
+      in the OS distributions. A common reason for an incompatible change is
+      the addition of an os-release file, or the addition of the lsb_release
+      command, with ID values that differ from what was previously determined
+      from the distro release file name.
+    """
+    return _distro.id()
+
+
+def name(pretty: bool = False) -> str:
+    """
+    Return the name of the current OS distribution, as a human-readable
+    string.
+
+    If *pretty* is false, the name is returned without version or codename.
+    (e.g. "CentOS Linux")
+
+    If *pretty* is true, the version and codename are appended.
+    (e.g. "CentOS Linux 7.1.1503 (Core)")
+
+    **Lookup hierarchy:**
+
+    The name is obtained from the following sources, in the specified order.
+    The first available and non-empty value is used:
+
+    * If *pretty* is false:
+
+      - the value of the "NAME" attribute of the os-release file,
+
+      - the value of the "Distributor ID" attribute returned by the lsb_release
+        command,
+
+      - the value of the "<name>" field of the distro release file.
+
+    * If *pretty* is true:
+
+      - the value of the "PRETTY_NAME" attribute of the os-release file,
+
+      - the value of the "Description" attribute returned by the lsb_release
+        command,
+
+      - the value of the "<name>" field of the distro release file, appended
+        with the value of the pretty version ("<version_id>" and "<codename>"
+        fields) of the distro release file, if available.
+    """
+    return _distro.name(pretty)
+
+
+def version(pretty: bool = False, best: bool = False) -> str:
+    """
+    Return the version of the current OS distribution, as a human-readable
+    string.
+
+    If *pretty* is false, the version is returned without codename (e.g.
+    "7.0").
+
+    If *pretty* is true, the codename in parenthesis is appended, if the
+    codename is non-empty (e.g. "7.0 (Maipo)").
+
+    Some distributions provide version numbers with different precisions in
+    the different sources of distribution information. Examining the different
+    sources in a fixed priority order does not always yield the most precise
+    version (e.g. for Debian 8.2, or CentOS 7.1).
+
+    Some other distributions may not provide this kind of information. In these
+    cases, an empty string would be returned. This behavior can be observed
+    with rolling releases distributions (e.g. Arch Linux).
+
+    The *best* parameter can be used to control the approach for the returned
+    version:
+
+    If *best* is false, the first non-empty version number in priority order of
+    the examined sources is returned.
+
+    If *best* is true, the most precise version number out of all examined
+    sources is returned.
+
+    **Lookup hierarchy:**
+
+    In all cases, the version number is obtained from the following sources.
+    If *best* is false, this order represents the priority order:
+
+    * the value of the "VERSION_ID" attribute of the os-release file,
+    * the value of the "Release" attribute returned by the lsb_release
+      command,
+    * the version number parsed from the "<version_id>" field of the first line
+      of the distro release file,
+    * the version number parsed from the "PRETTY_NAME" attribute of the
+      os-release file, if it follows the format of the distro release files.
+    * the version number parsed from the "Description" attribute returned by
+      the lsb_release command, if it follows the format of the distro release
+      files.
+    """
+    return _distro.version(pretty, best)
+
+
+def version_parts(best: bool = False) -> Tuple[str, str, str]:
+    """
+    Return the version of the current OS distribution as a tuple
+    ``(major, minor, build_number)`` with items as follows:
+
+    * ``major``:  The result of :func:`distro.major_version`.
+
+    * ``minor``:  The result of :func:`distro.minor_version`.
+
+    * ``build_number``:  The result of :func:`distro.build_number`.
+
+    For a description of the *best* parameter, see the :func:`distro.version`
+    method.
+    """
+    return _distro.version_parts(best)
+
+
+def major_version(best: bool = False) -> str:
+    """
+    Return the major version of the current OS distribution, as a string,
+    if provided.
+    Otherwise, the empty string is returned. The major version is the first
+    part of the dot-separated version string.
+
+    For a description of the *best* parameter, see the :func:`distro.version`
+    method.
+    """
+    return _distro.major_version(best)
+
+
+def minor_version(best: bool = False) -> str:
+    """
+    Return the minor version of the current OS distribution, as a string,
+    if provided.
+    Otherwise, the empty string is returned. The minor version is the second
+    part of the dot-separated version string.
+
+    For a description of the *best* parameter, see the :func:`distro.version`
+    method.
+    """
+    return _distro.minor_version(best)
+
+
+def build_number(best: bool = False) -> str:
+    """
+    Return the build number of the current OS distribution, as a string,
+    if provided.
+    Otherwise, the empty string is returned. The build number is the third part
+    of the dot-separated version string.
+
+    For a description of the *best* parameter, see the :func:`distro.version`
+    method.
+    """
+    return _distro.build_number(best)
+
+
+def like() -> str:
+    """
+    Return a space-separated list of distro IDs of distributions that are
+    closely related to the current OS distribution in regards to packaging
+    and programming interfaces, for example distributions the current
+    distribution is a derivative from.
+
+    **Lookup hierarchy:**
+
+    This information item is only provided by the os-release file.
+    For details, see the description of the "ID_LIKE" attribute in the
+    `os-release man page
+    <http://www.freedesktop.org/software/systemd/man/os-release.html>`_.
+    """
+    return _distro.like()
+
+
+def codename() -> str:
+    """
+    Return the codename for the release of the current OS distribution,
+    as a string.
+
+    If the distribution does not have a codename, an empty string is returned.
+
+    Note that the returned codename is not always really a codename. For
+    example, openSUSE returns "x86_64". This function does not handle such
+    cases in any special way and just returns the string it finds, if any.
+
+    **Lookup hierarchy:**
+
+    * the codename within the "VERSION" attribute of the os-release file, if
+      provided,
+
+    * the value of the "Codename" attribute returned by the lsb_release
+      command,
+
+    * the value of the "<codename>" field of the distro release file.
+    """
+    return _distro.codename()
+
+
+def info(pretty: bool = False, best: bool = False) -> InfoDict:
+    """
+    Return certain machine-readable information items about the current OS
+    distribution in a dictionary, as shown in the following example:
+
+    .. sourcecode:: python
+
+        {
+            'id': 'rhel',
+            'version': '7.0',
+            'version_parts': {
+                'major': '7',
+                'minor': '0',
+                'build_number': ''
+            },
+            'like': 'fedora',
+            'codename': 'Maipo'
+        }
+
+    The dictionary structure and keys are always the same, regardless of which
+    information items are available in the underlying data sources. The values
+    for the various keys are as follows:
+
+    * ``id``:  The result of :func:`distro.id`.
+
+    * ``version``:  The result of :func:`distro.version`.
+
+    * ``version_parts -> major``:  The result of :func:`distro.major_version`.
+
+    * ``version_parts -> minor``:  The result of :func:`distro.minor_version`.
+
+    * ``version_parts -> build_number``:  The result of
+      :func:`distro.build_number`.
+
+    * ``like``:  The result of :func:`distro.like`.
+
+    * ``codename``:  The result of :func:`distro.codename`.
+
+    For a description of the *pretty* and *best* parameters, see the
+    :func:`distro.version` method.
+    """
+    return _distro.info(pretty, best)
+
+
+def os_release_info() -> Dict[str, str]:
+    """
+    Return a dictionary containing key-value pairs for the information items
+    from the os-release file data source of the current OS distribution.
+
+    See `os-release file`_ for details about these information items.
+    """
+    return _distro.os_release_info()
+
+
+def lsb_release_info() -> Dict[str, str]:
+    """
+    Return a dictionary containing key-value pairs for the information items
+    from the lsb_release command data source of the current OS distribution.
+
+    See `lsb_release command output`_ for details about these information
+    items.
+    """
+    return _distro.lsb_release_info()
+
+
+def distro_release_info() -> Dict[str, str]:
+    """
+    Return a dictionary containing key-value pairs for the information items
+    from the distro release file data source of the current OS distribution.
+
+    See `distro release file`_ for details about these information items.
+    """
+    return _distro.distro_release_info()
+
+
+def uname_info() -> Dict[str, str]:
+    """
+    Return a dictionary containing key-value pairs for the information items
+    from the distro release file data source of the current OS distribution.
+    """
+    return _distro.uname_info()
+
+
+def os_release_attr(attribute: str) -> str:
+    """
+    Return a single named information item from the os-release file data source
+    of the current OS distribution.
+
+    Parameters:
+
+    * ``attribute`` (string): Key of the information item.
+
+    Returns:
+
+    * (string): Value of the information item, if the item exists.
+      The empty string, if the item does not exist.
+
+    See `os-release file`_ for details about these information items.
+    """
+    return _distro.os_release_attr(attribute)
+
+
+def lsb_release_attr(attribute: str) -> str:
+    """
+    Return a single named information item from the lsb_release command output
+    data source of the current OS distribution.
+
+    Parameters:
+
+    * ``attribute`` (string): Key of the information item.
+
+    Returns:
+
+    * (string): Value of the information item, if the item exists.
+      The empty string, if the item does not exist.
+
+    See `lsb_release command output`_ for details about these information
+    items.
+    """
+    return _distro.lsb_release_attr(attribute)
+
+
+def distro_release_attr(attribute: str) -> str:
+    """
+    Return a single named information item from the distro release file
+    data source of the current OS distribution.
+
+    Parameters:
+
+    * ``attribute`` (string): Key of the information item.
+
+    Returns:
+
+    * (string): Value of the information item, if the item exists.
+      The empty string, if the item does not exist.
+
+    See `distro release file`_ for details about these information items.
+    """
+    return _distro.distro_release_attr(attribute)
+
+
+def uname_attr(attribute: str) -> str:
+    """
+    Return a single named information item from the distro release file
+    data source of the current OS distribution.
+
+    Parameters:
+
+    * ``attribute`` (string): Key of the information item.
+
+    Returns:
+
+    * (string): Value of the information item, if the item exists.
+                The empty string, if the item does not exist.
+    """
+    return _distro.uname_attr(attribute)
+
+
+try:
+    from functools import cached_property
+except ImportError:
+    # Python < 3.8
+    class cached_property:  # type: ignore
+        """A version of @property which caches the value.  On access, it calls the
+        underlying function and sets the value in `__dict__` so future accesses
+        will not re-call the property.
+        """
+
+        def __init__(self, f: Callable[[Any], Any]) -> None:
+            self._fname = f.__name__
+            self._f = f
+
+        def __get__(self, obj: Any, owner: Type[Any]) -> Any:
+            assert obj is not None, f"call {self._fname} on an instance"
+            ret = obj.__dict__[self._fname] = self._f(obj)
+            return ret
+
+
+class LinuxDistribution:
+    """
+    Provides information about a OS distribution.
+
+    This package creates a private module-global instance of this class with
+    default initialization arguments, that is used by the
+    `consolidated accessor functions`_ and `single source accessor functions`_.
+    By using default initialization arguments, that module-global instance
+    returns data about the current OS distribution (i.e. the distro this
+    package runs on).
+
+    Normally, it is not necessary to create additional instances of this class.
+    However, in situations where control is needed over the exact data sources
+    that are used, instances of this class can be created with a specific
+    distro release file, or a specific os-release file, or without invoking the
+    lsb_release command.
+    """
+
+    def __init__(
+        self,
+        include_lsb: Optional[bool] = None,
+        os_release_file: str = "",
+        distro_release_file: str = "",
+        include_uname: Optional[bool] = None,
+        root_dir: Optional[str] = None,
+        include_oslevel: Optional[bool] = None,
+    ) -> None:
+        """
+        The initialization method of this class gathers information from the
+        available data sources, and stores that in private instance attributes.
+        Subsequent access to the information items uses these private instance
+        attributes, so that the data sources are read only once.
+
+        Parameters:
+
+        * ``include_lsb`` (bool): Controls whether the
+          `lsb_release command output`_ is included as a data source.
+
+          If the lsb_release command is not available in the program execution
+          path, the data source for the lsb_release command will be empty.
+
+        * ``os_release_file`` (string): The path name of the
+          `os-release file`_ that is to be used as a data source.
+
+          An empty string (the default) will cause the default path name to
+          be used (see `os-release file`_ for details).
+
+          If the specified or defaulted os-release file does not exist, the
+          data source for the os-release file will be empty.
+
+        * ``distro_release_file`` (string): The path name of the
+          `distro release file`_ that is to be used as a data source.
+
+          An empty string (the default) will cause a default search algorithm
+          to be used (see `distro release file`_ for details).
+
+          If the specified distro release file does not exist, or if no default
+          distro release file can be found, the data source for the distro
+          release file will be empty.
+
+        * ``include_uname`` (bool): Controls whether uname command output is
+          included as a data source. If the uname command is not available in
+          the program execution path the data source for the uname command will
+          be empty.
+
+        * ``root_dir`` (string): The absolute path to the root directory to use
+          to find distro-related information files. Note that ``include_*``
+          parameters must not be enabled in combination with ``root_dir``.
+
+        * ``include_oslevel`` (bool): Controls whether (AIX) oslevel command
+          output is included as a data source. If the oslevel command is not
+          available in the program execution path the data source will be
+          empty.
+
+        Public instance attributes:
+
+        * ``os_release_file`` (string): The path name of the
+          `os-release file`_ that is actually used as a data source. The
+          empty string if no distro release file is used as a data source.
+
+        * ``distro_release_file`` (string): The path name of the
+          `distro release file`_ that is actually used as a data source. The
+          empty string if no distro release file is used as a data source.
+
+        * ``include_lsb`` (bool): The result of the ``include_lsb`` parameter.
+          This controls whether the lsb information will be loaded.
+
+        * ``include_uname`` (bool): The result of the ``include_uname``
+          parameter. This controls whether the uname information will
+          be loaded.
+
+        * ``include_oslevel`` (bool): The result of the ``include_oslevel``
+          parameter. This controls whether (AIX) oslevel information will be
+          loaded.
+
+        * ``root_dir`` (string): The result of the ``root_dir`` parameter.
+          The absolute path to the root directory to use to find distro-related
+          information files.
+
+        Raises:
+
+        * :py:exc:`ValueError`: Initialization parameters combination is not
+           supported.
+
+        * :py:exc:`OSError`: Some I/O issue with an os-release file or distro
+          release file.
+
+        * :py:exc:`UnicodeError`: A data source has unexpected characters or
+          uses an unexpected encoding.
+        """
+        self.root_dir = root_dir
+        self.etc_dir = os.path.join(root_dir, "etc") if root_dir else _UNIXCONFDIR
+        self.usr_lib_dir = (
+            os.path.join(root_dir, "usr/lib") if root_dir else _UNIXUSRLIBDIR
+        )
+
+        if os_release_file:
+            self.os_release_file = os_release_file
+        else:
+            etc_dir_os_release_file = os.path.join(self.etc_dir, _OS_RELEASE_BASENAME)
+            usr_lib_os_release_file = os.path.join(
+                self.usr_lib_dir, _OS_RELEASE_BASENAME
+            )
+
+            # NOTE: The idea is to respect order **and** have it set
+            #       at all times for API backwards compatibility.
+            if os.path.isfile(etc_dir_os_release_file) or not os.path.isfile(
+                usr_lib_os_release_file
+            ):
+                self.os_release_file = etc_dir_os_release_file
+            else:
+                self.os_release_file = usr_lib_os_release_file
+
+        self.distro_release_file = distro_release_file or ""  # updated later
+
+        is_root_dir_defined = root_dir is not None
+        if is_root_dir_defined and (include_lsb or include_uname or include_oslevel):
+            raise ValueError(
+                "Including subprocess data sources from specific root_dir is disallowed"
+                " to prevent false information"
+            )
+        self.include_lsb = (
+            include_lsb if include_lsb is not None else not is_root_dir_defined
+        )
+        self.include_uname = (
+            include_uname if include_uname is not None else not is_root_dir_defined
+        )
+        self.include_oslevel = (
+            include_oslevel if include_oslevel is not None else not is_root_dir_defined
+        )
+
+    def __repr__(self) -> str:
+        """Return repr of all info"""
+        return (
+            "LinuxDistribution("
+            "os_release_file={self.os_release_file!r}, "
+            "distro_release_file={self.distro_release_file!r}, "
+            "include_lsb={self.include_lsb!r}, "
+            "include_uname={self.include_uname!r}, "
+            "include_oslevel={self.include_oslevel!r}, "
+            "root_dir={self.root_dir!r}, "
+            "_os_release_info={self._os_release_info!r}, "
+            "_lsb_release_info={self._lsb_release_info!r}, "
+            "_distro_release_info={self._distro_release_info!r}, "
+            "_uname_info={self._uname_info!r}, "
+            "_oslevel_info={self._oslevel_info!r})".format(self=self)
+        )
+
+    def linux_distribution(
+        self, full_distribution_name: bool = True
+    ) -> Tuple[str, str, str]:
+        """
+        Return information about the OS distribution that is compatible
+        with Python's :func:`platform.linux_distribution`, supporting a subset
+        of its parameters.
+
+        For details, see :func:`distro.linux_distribution`.
+        """
+        return (
+            self.name() if full_distribution_name else self.id(),
+            self.version(),
+            self._os_release_info.get("release_codename") or self.codename(),
+        )
+
+    def id(self) -> str:
+        """Return the distro ID of the OS distribution, as a string.
+
+        For details, see :func:`distro.id`.
+        """
+
+        def normalize(distro_id: str, table: Dict[str, str]) -> str:
+            distro_id = distro_id.lower().replace(" ", "_")
+            return table.get(distro_id, distro_id)
+
+        distro_id = self.os_release_attr("id")
+        if distro_id:
+            return normalize(distro_id, NORMALIZED_OS_ID)
+
+        distro_id = self.lsb_release_attr("distributor_id")
+        if distro_id:
+            return normalize(distro_id, NORMALIZED_LSB_ID)
+
+        distro_id = self.distro_release_attr("id")
+        if distro_id:
+            return normalize(distro_id, NORMALIZED_DISTRO_ID)
+
+        distro_id = self.uname_attr("id")
+        if distro_id:
+            return normalize(distro_id, NORMALIZED_DISTRO_ID)
+
+        return ""
+
+    def name(self, pretty: bool = False) -> str:
+        """
+        Return the name of the OS distribution, as a string.
+
+        For details, see :func:`distro.name`.
+        """
+        name = (
+            self.os_release_attr("name")
+            or self.lsb_release_attr("distributor_id")
+            or self.distro_release_attr("name")
+            or self.uname_attr("name")
+        )
+        if pretty:
+            name = self.os_release_attr("pretty_name") or self.lsb_release_attr(
+                "description"
+            )
+            if not name:
+                name = self.distro_release_attr("name") or self.uname_attr("name")
+                version = self.version(pretty=True)
+                if version:
+                    name = f"{name} {version}"
+        return name or ""
+
+    def version(self, pretty: bool = False, best: bool = False) -> str:
+        """
+        Return the version of the OS distribution, as a string.
+
+        For details, see :func:`distro.version`.
+        """
+        versions = [
+            self.os_release_attr("version_id"),
+            self.lsb_release_attr("release"),
+            self.distro_release_attr("version_id"),
+            self._parse_distro_release_content(self.os_release_attr("pretty_name")).get(
+                "version_id", ""
+            ),
+            self._parse_distro_release_content(
+                self.lsb_release_attr("description")
+            ).get("version_id", ""),
+            self.uname_attr("release"),
+        ]
+        if self.uname_attr("id").startswith("aix"):
+            # On AIX platforms, prefer oslevel command output.
+            versions.insert(0, self.oslevel_info())
+        elif self.id() == "debian" or "debian" in self.like().split():
+            # On Debian-like, add debian_version file content to candidates list.
+            versions.append(self._debian_version)
+        version = ""
+        if best:
+            # This algorithm uses the last version in priority order that has
+            # the best precision. If the versions are not in conflict, that
+            # does not matter; otherwise, using the last one instead of the
+            # first one might be considered a surprise.
+            for v in versions:
+                if v.count(".") > version.count(".") or version == "":
+                    version = v
+        else:
+            for v in versions:
+                if v != "":
+                    version = v
+                    break
+        if pretty and version and self.codename():
+            version = f"{version} ({self.codename()})"
+        return version
+
+    def version_parts(self, best: bool = False) -> Tuple[str, str, str]:
+        """
+        Return the version of the OS distribution, as a tuple of version
+        numbers.
+
+        For details, see :func:`distro.version_parts`.
+        """
+        version_str = self.version(best=best)
+        if version_str:
+            version_regex = re.compile(r"(\d+)\.?(\d+)?\.?(\d+)?")
+            matches = version_regex.match(version_str)
+            if matches:
+                major, minor, build_number = matches.groups()
+                return major, minor or "", build_number or ""
+        return "", "", ""
+
+    def major_version(self, best: bool = False) -> str:
+        """
+        Return the major version number of the current distribution.
+
+        For details, see :func:`distro.major_version`.
+        """
+        return self.version_parts(best)[0]
+
+    def minor_version(self, best: bool = False) -> str:
+        """
+        Return the minor version number of the current distribution.
+
+        For details, see :func:`distro.minor_version`.
+        """
+        return self.version_parts(best)[1]
+
+    def build_number(self, best: bool = False) -> str:
+        """
+        Return the build number of the current distribution.
+
+        For details, see :func:`distro.build_number`.
+        """
+        return self.version_parts(best)[2]
+
+    def like(self) -> str:
+        """
+        Return the IDs of distributions that are like the OS distribution.
+
+        For details, see :func:`distro.like`.
+        """
+        return self.os_release_attr("id_like") or ""
+
+    def codename(self) -> str:
+        """
+        Return the codename of the OS distribution.
+
+        For details, see :func:`distro.codename`.
+        """
+        try:
+            # Handle os_release specially since distros might purposefully set
+            # this to empty string to have no codename
+            return self._os_release_info["codename"]
+        except KeyError:
+            return (
+                self.lsb_release_attr("codename")
+                or self.distro_release_attr("codename")
+                or ""
+            )
+
+    def info(self, pretty: bool = False, best: bool = False) -> InfoDict:
+        """
+        Return certain machine-readable information about the OS
+        distribution.
+
+        For details, see :func:`distro.info`.
+        """
+        return InfoDict(
+            id=self.id(),
+            version=self.version(pretty, best),
+            version_parts=VersionDict(
+                major=self.major_version(best),
+                minor=self.minor_version(best),
+                build_number=self.build_number(best),
+            ),
+            like=self.like(),
+            codename=self.codename(),
+        )
+
+    def os_release_info(self) -> Dict[str, str]:
+        """
+        Return a dictionary containing key-value pairs for the information
+        items from the os-release file data source of the OS distribution.
+
+        For details, see :func:`distro.os_release_info`.
+        """
+        return self._os_release_info
+
+    def lsb_release_info(self) -> Dict[str, str]:
+        """
+        Return a dictionary containing key-value pairs for the information
+        items from the lsb_release command data source of the OS
+        distribution.
+
+        For details, see :func:`distro.lsb_release_info`.
+        """
+        return self._lsb_release_info
+
+    def distro_release_info(self) -> Dict[str, str]:
+        """
+        Return a dictionary containing key-value pairs for the information
+        items from the distro release file data source of the OS
+        distribution.
+
+        For details, see :func:`distro.distro_release_info`.
+        """
+        return self._distro_release_info
+
+    def uname_info(self) -> Dict[str, str]:
+        """
+        Return a dictionary containing key-value pairs for the information
+        items from the uname command data source of the OS distribution.
+
+        For details, see :func:`distro.uname_info`.
+        """
+        return self._uname_info
+
+    def oslevel_info(self) -> str:
+        """
+        Return AIX' oslevel command output.
+        """
+        return self._oslevel_info
+
+    def os_release_attr(self, attribute: str) -> str:
+        """
+        Return a single named information item from the os-release file data
+        source of the OS distribution.
+
+        For details, see :func:`distro.os_release_attr`.
+        """
+        return self._os_release_info.get(attribute, "")
+
+    def lsb_release_attr(self, attribute: str) -> str:
+        """
+        Return a single named information item from the lsb_release command
+        output data source of the OS distribution.
+
+        For details, see :func:`distro.lsb_release_attr`.
+        """
+        return self._lsb_release_info.get(attribute, "")
+
+    def distro_release_attr(self, attribute: str) -> str:
+        """
+        Return a single named information item from the distro release file
+        data source of the OS distribution.
+
+        For details, see :func:`distro.distro_release_attr`.
+        """
+        return self._distro_release_info.get(attribute, "")
+
+    def uname_attr(self, attribute: str) -> str:
+        """
+        Return a single named information item from the uname command
+        output data source of the OS distribution.
+
+        For details, see :func:`distro.uname_attr`.
+        """
+        return self._uname_info.get(attribute, "")
+
+    @cached_property
+    def _os_release_info(self) -> Dict[str, str]:
+        """
+        Get the information items from the specified os-release file.
+
+        Returns:
+            A dictionary containing all information items.
+        """
+        if os.path.isfile(self.os_release_file):
+            with open(self.os_release_file, encoding="utf-8") as release_file:
+                return self._parse_os_release_content(release_file)
+        return {}
+
+    @staticmethod
+    def _parse_os_release_content(lines: TextIO) -> Dict[str, str]:
+        """
+        Parse the lines of an os-release file.
+
+        Parameters:
+
+        * lines: Iterable through the lines in the os-release file.
+                 Each line must be a unicode string or a UTF-8 encoded byte
+                 string.
+
+        Returns:
+            A dictionary containing all information items.
+        """
+        props = {}
+        lexer = shlex.shlex(lines, posix=True)
+        lexer.whitespace_split = True
+
+        tokens = list(lexer)
+        for token in tokens:
+            # At this point, all shell-like parsing has been done (i.e.
+            # comments processed, quotes and backslash escape sequences
+            # processed, multi-line values assembled, trailing newlines
+            # stripped, etc.), so the tokens are now either:
+            # * variable assignments: var=value
+            # * commands or their arguments (not allowed in os-release)
+            # Ignore any tokens that are not variable assignments
+            if "=" in token:
+                k, v = token.split("=", 1)
+                props[k.lower()] = v
+
+        if "version" in props:
+            # extract release codename (if any) from version attribute
+            match = re.search(r"\((\D+)\)|,\s*(\D+)", props["version"])
+            if match:
+                release_codename = match.group(1) or match.group(2)
+                props["codename"] = props["release_codename"] = release_codename
+
+        if "version_codename" in props:
+            # os-release added a version_codename field.  Use that in
+            # preference to anything else Note that some distros purposefully
+            # do not have code names.  They should be setting
+            # version_codename=""
+            props["codename"] = props["version_codename"]
+        elif "ubuntu_codename" in props:
+            # Same as above but a non-standard field name used on older Ubuntus
+            props["codename"] = props["ubuntu_codename"]
+
+        return props
+
+    @cached_property
+    def _lsb_release_info(self) -> Dict[str, str]:
+        """
+        Get the information items from the lsb_release command output.
+
+        Returns:
+            A dictionary containing all information items.
+        """
+        if not self.include_lsb:
+            return {}
+        try:
+            cmd = ("lsb_release", "-a")
+            stdout = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
+        # Command not found or lsb_release returned error
+        except (OSError, subprocess.CalledProcessError):
+            return {}
+        content = self._to_str(stdout).splitlines()
+        return self._parse_lsb_release_content(content)
+
+    @staticmethod
+    def _parse_lsb_release_content(lines: Iterable[str]) -> Dict[str, str]:
+        """
+        Parse the output of the lsb_release command.
+
+        Parameters:
+
+        * lines: Iterable through the lines of the lsb_release output.
+                 Each line must be a unicode string or a UTF-8 encoded byte
+                 string.
+
+        Returns:
+            A dictionary containing all information items.
+        """
+        props = {}
+        for line in lines:
+            kv = line.strip("\n").split(":", 1)
+            if len(kv) != 2:
+                # Ignore lines without colon.
+                continue
+            k, v = kv
+            props.update({k.replace(" ", "_").lower(): v.strip()})
+        return props
+
+    @cached_property
+    def _uname_info(self) -> Dict[str, str]:
+        if not self.include_uname:
+            return {}
+        try:
+            cmd = ("uname", "-rs")
+            stdout = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
+        except OSError:
+            return {}
+        content = self._to_str(stdout).splitlines()
+        return self._parse_uname_content(content)
+
+    @cached_property
+    def _oslevel_info(self) -> str:
+        if not self.include_oslevel:
+            return ""
+        try:
+            stdout = subprocess.check_output("oslevel", stderr=subprocess.DEVNULL)
+        except (OSError, subprocess.CalledProcessError):
+            return ""
+        return self._to_str(stdout).strip()
+
+    @cached_property
+    def _debian_version(self) -> str:
+        try:
+            with open(
+                os.path.join(self.etc_dir, "debian_version"), encoding="ascii"
+            ) as fp:
+                return fp.readline().rstrip()
+        except FileNotFoundError:
+            return ""
+
+    @staticmethod
+    def _parse_uname_content(lines: Sequence[str]) -> Dict[str, str]:
+        if not lines:
+            return {}
+        props = {}
+        match = re.search(r"^([^\s]+)\s+([\d\.]+)", lines[0].strip())
+        if match:
+            name, version = match.groups()
+
+            # This is to prevent the Linux kernel version from
+            # appearing as the 'best' version on otherwise
+            # identifiable distributions.
+            if name == "Linux":
+                return {}
+            props["id"] = name.lower()
+            props["name"] = name
+            props["release"] = version
+        return props
+
+    @staticmethod
+    def _to_str(bytestring: bytes) -> str:
+        encoding = sys.getfilesystemencoding()
+        return bytestring.decode(encoding)
+
+    @cached_property
+    def _distro_release_info(self) -> Dict[str, str]:
+        """
+        Get the information items from the specified distro release file.
+
+        Returns:
+            A dictionary containing all information items.
+        """
+        if self.distro_release_file:
+            # If it was specified, we use it and parse what we can, even if
+            # its file name or content does not match the expected pattern.
+            distro_info = self._parse_distro_release_file(self.distro_release_file)
+            basename = os.path.basename(self.distro_release_file)
+            # The file name pattern for user-specified distro release files
+            # is somewhat more tolerant (compared to when searching for the
+            # file), because we want to use what was specified as best as
+            # possible.
+            match = _DISTRO_RELEASE_BASENAME_PATTERN.match(basename)
+        else:
+            try:
+                basenames = [
+                    basename
+                    for basename in os.listdir(self.etc_dir)
+                    if basename not in _DISTRO_RELEASE_IGNORE_BASENAMES
+                    and os.path.isfile(os.path.join(self.etc_dir, basename))
+                ]
+                # We sort for repeatability in cases where there are multiple
+                # distro specific files; e.g. CentOS, Oracle, Enterprise all
+                # containing `redhat-release` on top of their own.
+                basenames.sort()
+            except OSError:
+                # This may occur when /etc is not readable but we can't be
+                # sure about the *-release files. Check common entries of
+                # /etc for information. If they turn out to not be there the
+                # error is handled in `_parse_distro_release_file()`.
+                basenames = _DISTRO_RELEASE_BASENAMES
+            for basename in basenames:
+                match = _DISTRO_RELEASE_BASENAME_PATTERN.match(basename)
+                if match is None:
+                    continue
+                filepath = os.path.join(self.etc_dir, basename)
+                distro_info = self._parse_distro_release_file(filepath)
+                # The name is always present if the pattern matches.
+                if "name" not in distro_info:
+                    continue
+                self.distro_release_file = filepath
+                break
+            else:  # the loop didn't "break": no candidate.
+                return {}
+
+        if match is not None:
+            distro_info["id"] = match.group(1)
+
+        # CloudLinux < 7: manually enrich info with proper id.
+        if "cloudlinux" in distro_info.get("name", "").lower():
+            distro_info["id"] = "cloudlinux"
+
+        return distro_info
+
+    def _parse_distro_release_file(self, filepath: str) -> Dict[str, str]:
+        """
+        Parse a distro release file.
+
+        Parameters:
+
+        * filepath: Path name of the distro release file.
+
+        Returns:
+            A dictionary containing all information items.
+        """
+        try:
+            with open(filepath, encoding="utf-8") as fp:
+                # Only parse the first line. For instance, on SLES there
+                # are multiple lines. We don't want them...
+                return self._parse_distro_release_content(fp.readline())
+        except OSError:
+            # Ignore not being able to read a specific, seemingly version
+            # related file.
+            # See https://github.com/python-distro/distro/issues/162
+            return {}
+
+    @staticmethod
+    def _parse_distro_release_content(line: str) -> Dict[str, str]:
+        """
+        Parse a line from a distro release file.
+
+        Parameters:
+        * line: Line from the distro release file. Must be a unicode string
+                or a UTF-8 encoded byte string.
+
+        Returns:
+            A dictionary containing all information items.
+        """
+        matches = _DISTRO_RELEASE_CONTENT_REVERSED_PATTERN.match(line.strip()[::-1])
+        distro_info = {}
+        if matches:
+            # regexp ensures non-None
+            distro_info["name"] = matches.group(3)[::-1]
+            if matches.group(2):
+                distro_info["version_id"] = matches.group(2)[::-1]
+            if matches.group(1):
+                distro_info["codename"] = matches.group(1)[::-1]
+        elif line:
+            distro_info["name"] = line.strip()
+        return distro_info
+
+
+_distro = LinuxDistribution()
+
+
+def main() -> None:
+    logger = logging.getLogger(__name__)
+    logger.setLevel(logging.DEBUG)
+    logger.addHandler(logging.StreamHandler(sys.stdout))
+
+    parser = argparse.ArgumentParser(description="OS distro info tool")
+    parser.add_argument(
+        "--json", "-j", help="Output in machine readable format", action="store_true"
+    )
+
+    parser.add_argument(
+        "--root-dir",
+        "-r",
+        type=str,
+        dest="root_dir",
+        help="Path to the root filesystem directory (defaults to /)",
+    )
+
+    args = parser.parse_args()
+
+    if args.root_dir:
+        dist = LinuxDistribution(
+            include_lsb=False,
+            include_uname=False,
+            include_oslevel=False,
+            root_dir=args.root_dir,
+        )
+    else:
+        dist = _distro
+
+    if args.json:
+        logger.info(json.dumps(dist.info(), indent=4, sort_keys=True))
+    else:
+        logger.info("Name: %s", dist.name(pretty=True))
+        distribution_version = dist.version(pretty=True)
+        logger.info("Version: %s", distribution_version)
+        distribution_codename = dist.codename()
+        logger.info("Codename: %s", distribution_codename)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/.venv/lib/python3.11/site-packages/distro/py.typed b/.venv/lib/python3.11/site-packages/distro/py.typed
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/.venv/lib/python3.11/site-packages/gguf-0.10.0.dist-info/INSTALLER b/.venv/lib/python3.11/site-packages/gguf-0.10.0.dist-info/INSTALLER
new file mode 100644
index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/gguf-0.10.0.dist-info/INSTALLER
@@ -0,0 +1 @@
+pip
diff --git a/.venv/lib/python3.11/site-packages/gguf-0.10.0.dist-info/LICENSE b/.venv/lib/python3.11/site-packages/gguf-0.10.0.dist-info/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..76f67efdc6470081b512a8db5bf2b1d4962d9c3c
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/gguf-0.10.0.dist-info/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 Georgi Gerganov
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/.venv/lib/python3.11/site-packages/gguf-0.10.0.dist-info/METADATA b/.venv/lib/python3.11/site-packages/gguf-0.10.0.dist-info/METADATA
new file mode 100644
index 0000000000000000000000000000000000000000..e97309f52e8dab48a7422599ee189c5b7f03888c
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/gguf-0.10.0.dist-info/METADATA
@@ -0,0 +1,114 @@
+Metadata-Version: 2.1
+Name: gguf
+Version: 0.10.0
+Summary: Read and write ML models in GGUF for GGML
+Home-page: https://ggml.ai
+Keywords: ggml,gguf,llama.cpp
+Author: GGML
+Author-email: ggml@ggml.ai
+Requires-Python: >=3.8
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Requires-Dist: numpy (>=1.17)
+Requires-Dist: pyyaml (>=5.1)
+Requires-Dist: tqdm (>=4.27)
+Project-URL: Repository, https://github.com/ggerganov/llama.cpp
+Description-Content-Type: text/markdown
+
+## gguf
+
+This is a Python package for writing binary files in the [GGUF](https://github.com/ggerganov/ggml/pull/302)
+(GGML Universal File) format.
+
+See [convert_hf_to_gguf.py](https://github.com/ggerganov/llama.cpp/blob/master/convert_hf_to_gguf.py)
+as an example for its usage.
+
+## Installation
+```sh
+pip install gguf
+```
+
+## API Examples/Simple Tools
+
+[examples/writer.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/examples/writer.py) — Generates `example.gguf` in the current directory to demonstrate generating a GGUF file. Note that this file cannot be used as a model.
+
+[scripts/gguf_dump.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/scripts/gguf_dump.py) — Dumps a GGUF file's metadata to the console.
+
+[scripts/gguf_set_metadata.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/scripts/gguf_set_metadata.py) — Allows changing simple metadata values in a GGUF file by key.
+
+[scripts/gguf_convert_endian.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/scripts/gguf_convert_endian.py) — Allows converting the endianness of GGUF files.
+
+[scripts/gguf_new_metadata.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/scripts/gguf_new_metadata.py) — Copies a GGUF file with added/modified/removed metadata values.
+
+## Development
+Maintainers who participate in development of this package are advised to install it in editable mode:
+
+```sh
+cd /path/to/llama.cpp/gguf-py
+
+pip install --editable .
+```
+
+**Note**: This may require to upgrade your Pip installation, with a message saying that editable installation currently requires `setup.py`.
+In this case, upgrade Pip to the latest:
+
+```sh
+pip install --upgrade pip
+```
+
+## Automatic publishing with CI
+
+There's a GitHub workflow to make a release automatically upon creation of tags in a specified format.
+
+1. Bump the version in `pyproject.toml`.
+2. Create a tag named `gguf-vx.x.x` where `x.x.x` is the semantic version number.
+
+```sh
+git tag -a gguf-v1.0.0 -m "Version 1.0 release"
+```
+
+3. Push the tags.
+
+```sh
+git push origin --tags
+```
+
+## Manual publishing
+If you want to publish the package manually for any reason, you need to have `twine` and `build` installed:
+
+```sh
+pip install build twine
+```
+
+Then, follow these steps to release a new version:
+
+1. Bump the version in `pyproject.toml`.
+2. Build the package:
+
+```sh
+python -m build
+```
+
+3. Upload the generated distribution archives:
+
+```sh
+python -m twine upload dist/*
+```
+
+## Run Unit Tests
+
+From root of this repository you can run this command to run all the unit tests
+
+```bash
+python -m unittest discover ./gguf-py -v
+```
+
+## TODO
+- [ ] Include conversion scripts as command line entry points in this package.
+
diff --git a/.venv/lib/python3.11/site-packages/gguf-0.10.0.dist-info/RECORD b/.venv/lib/python3.11/site-packages/gguf-0.10.0.dist-info/RECORD
new file mode 100644
index 0000000000000000000000000000000000000000..ccb5732a1710165e6bef70154355b390ecabb120
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/gguf-0.10.0.dist-info/RECORD
@@ -0,0 +1,45 @@
+../../../bin/gguf-convert-endian,sha256=W53Q9wLM4ktx91uKrLfyuFNGS7UCbZr-mA2U66A_W0E,276
+../../../bin/gguf-dump,sha256=v29_dyj4DF8zullVHEvLoQotNn0dgtrt8f-NjwDigFw,256
+../../../bin/gguf-new-metadata,sha256=UwjDwictRuL-TFKI2mlnkzB8YzGJSNWJ69870J9w64Y,272
+../../../bin/gguf-set-metadata,sha256=iqt1mYpW1z41SzKpVu0r6NLBVYPDc-k06JH7cmo-e9s,272
+gguf-0.10.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
+gguf-0.10.0.dist-info/LICENSE,sha256=73jH5mWeNMeYGU8NNE6AfHIt5wy8oTWe9UdyZh4Ryjg,1072
+gguf-0.10.0.dist-info/METADATA,sha256=0jck7V9EOoT24ugiLaorfqFzhmN3eJM7lj-fyq8tvy8,3533
+gguf-0.10.0.dist-info/RECORD,,
+gguf-0.10.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+gguf-0.10.0.dist-info/entry_points.txt,sha256=6SBOHW2PZstAG6hvoQ63pyxPF8CIGjGLXZyG-NFFnAc,227
+gguf/__init__.py,sha256=PM_AEEzX6ojGAodDt78_LIm19HRCXeA6IXpgcjINfC8,219
+gguf/__pycache__/__init__.cpython-311.pyc,,
+gguf/__pycache__/constants.cpython-311.pyc,,
+gguf/__pycache__/gguf.cpython-311.pyc,,
+gguf/__pycache__/gguf_reader.cpython-311.pyc,,
+gguf/__pycache__/gguf_writer.cpython-311.pyc,,
+gguf/__pycache__/lazy.cpython-311.pyc,,
+gguf/__pycache__/metadata.cpython-311.pyc,,
+gguf/__pycache__/quants.cpython-311.pyc,,
+gguf/__pycache__/tensor_mapping.cpython-311.pyc,,
+gguf/__pycache__/utility.cpython-311.pyc,,
+gguf/__pycache__/vocab.cpython-311.pyc,,
+gguf/constants.py,sha256=00pCXTqoWP36ZR9OcuxJfhezZfxqjUTKGCNvGMrkLGU,49158
+gguf/gguf.py,sha256=8MDu7a0JEXhLUv_tjhYqDrWubVNc41cFvBYZbkZZenI,478
+gguf/gguf_reader.py,sha256=0Y86qmxYfPNq3u_AiAwSnhU_G-V2SjXK2b7CM3MoPP4,12365
+gguf/gguf_writer.py,sha256=ZWW_XwN6tyDIzBCwl6ARDwwRZ9TewNMV32IQaVYRNP8,35090
+gguf/lazy.py,sha256=QwxFAtj-5nIm_a3mvLBezpShFuo-pjJhoAiXreK9ITQ,8601
+gguf/metadata.py,sha256=nFEqgODwH6jYF3_jcycZkN4FjLnfXOQ7A73cLKZJb0Y,25722
+gguf/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+gguf/quants.py,sha256=_6kmSprMfawH3gcPz9WbxnlXCeZQIYivf_HjfUvUkM0,57336
+gguf/tensor_mapping.py,sha256=8CpqXKwKYFz6v11Fpzhe85hPcH18gnieh_FQsISpcVc,30893
+gguf/utility.py,sha256=jbd1bduLFjjNAQhjghSyrRQfJ_kjiTFhRAZxm0oabZw,2934
+gguf/vocab.py,sha256=N4PNulV8x_m7cdbmbUBF3nqQ0UXGTDi31oEIh3h8hAs,19046
+scripts/__init__.py,sha256=-Az7fR5lhVb9GPB5U_BaDb8Xp706km53vSY7aXQYw9I,297
+scripts/__pycache__/__init__.cpython-311.pyc,,
+scripts/__pycache__/gguf_convert_endian.cpython-311.pyc,,
+scripts/__pycache__/gguf_dump.cpython-311.pyc,,
+scripts/__pycache__/gguf_hash.cpython-311.pyc,,
+scripts/__pycache__/gguf_new_metadata.cpython-311.pyc,,
+scripts/__pycache__/gguf_set_metadata.cpython-311.pyc,,
+scripts/gguf_convert_endian.py,sha256=tcpe1O6xLTSetYD5T8dYcDN7cwiTWS-5sH7iFCP7ga8,5279
+scripts/gguf_dump.py,sha256=Ej3XWrYupFOg-Bz-ff9EAN5hdmp2OCdEkd46D4PtZ58,21934
+scripts/gguf_hash.py,sha256=DtzQ3xSdS8bHxdsp8ELQaBlaZiqqDBXzqtRSEzYTkhM,3711
+scripts/gguf_new_metadata.py,sha256=bGuF8iXkElxIsr4mR952DHEBTosA9ob-UkF-zEqlPqo,10713
+scripts/gguf_set_metadata.py,sha256=9jISgnh2atbwKh-68oN5b0wLxs0mYpXOWILjuqLTt-Q,4131
diff --git a/.venv/lib/python3.11/site-packages/gguf-0.10.0.dist-info/WHEEL b/.venv/lib/python3.11/site-packages/gguf-0.10.0.dist-info/WHEEL
new file mode 100644
index 0000000000000000000000000000000000000000..d73ccaae8e0eea45949b0957a5af034099b36aa4
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/gguf-0.10.0.dist-info/WHEEL
@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: poetry-core 1.9.0
+Root-Is-Purelib: true
+Tag: py3-none-any
diff --git a/.venv/lib/python3.11/site-packages/gguf-0.10.0.dist-info/entry_points.txt b/.venv/lib/python3.11/site-packages/gguf-0.10.0.dist-info/entry_points.txt
new file mode 100644
index 0000000000000000000000000000000000000000..419684c75c1cf14cbbcf3f970d401862b280c91d
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/gguf-0.10.0.dist-info/entry_points.txt
@@ -0,0 +1,6 @@
+[console_scripts]
+gguf-convert-endian=scripts:gguf_convert_endian_entrypoint
+gguf-dump=scripts:gguf_dump_entrypoint
+gguf-new-metadata=scripts:gguf_new_metadata_entrypoint
+gguf-set-metadata=scripts:gguf_set_metadata_entrypoint
+
diff --git a/.venv/lib/python3.11/site-packages/mistral_common/__init__.py b/.venv/lib/python3.11/site-packages/mistral_common/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..5197c5f5acdac86c91de46aef8412fbc75ca5656
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/mistral_common/__init__.py
@@ -0,0 +1 @@
+__version__ = "1.5.2"
diff --git a/.venv/lib/python3.11/site-packages/mistral_common/base.py b/.venv/lib/python3.11/site-packages/mistral_common/base.py
new file mode 100644
index 0000000000000000000000000000000000000000..dfa180b2985c8f2445de3cd0469776045624bf8a
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/mistral_common/base.py
@@ -0,0 +1,9 @@
+from pydantic import BaseModel, ConfigDict
+
+
+class MistralBase(BaseModel):
+    """
+    Base class for all Mistral Pydantic models.
+    """
+
+    model_config = ConfigDict(extra="forbid", validate_default=True, use_enum_values=True)
diff --git a/.venv/lib/python3.11/site-packages/mistral_common/exceptions.py b/.venv/lib/python3.11/site-packages/mistral_common/exceptions.py
new file mode 100644
index 0000000000000000000000000000000000000000..236cd9a8b9a7586377c62381986f31d0447eb9d0
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/mistral_common/exceptions.py
@@ -0,0 +1,67 @@
+from typing import Optional
+
+
+class MistralCommonException(Exception):
+    message: str = "Internal server error"
+
+    def __init__(
+        self,
+        message: Optional[str] = None,
+    ) -> None:
+        if message:
+            self.message = message
+
+
+class TokenizerException(MistralCommonException):
+    def __init__(self, message: str) -> None:
+        super().__init__(message)
+
+
+class UnsupportedTokenizerFeatureException(MistralCommonException):
+    def __init__(self, message: str) -> None:
+        super().__init__(message)
+
+
+class InvalidRequestException(MistralCommonException):
+    def __init__(self, message: str) -> None:
+        super().__init__(message)
+
+
+class InvalidSystemPromptException(MistralCommonException):
+    def __init__(self, message: str) -> None:
+        super().__init__(message)
+
+
+class InvalidMessageStructureException(MistralCommonException):
+    def __init__(self, message: str) -> None:
+        super().__init__(message)
+
+
+class InvalidAssistantMessageException(MistralCommonException):
+    def __init__(self, message: str) -> None:
+        super().__init__(message)
+
+
+class InvalidToolMessageException(MistralCommonException):
+    def __init__(self, message: str) -> None:
+        super().__init__(message)
+
+
+class InvalidToolSchemaException(MistralCommonException):
+    def __init__(self, message: str) -> None:
+        super().__init__(message)
+
+
+class InvalidUserMessageException(MistralCommonException):
+    def __init__(self, message: str) -> None:
+        super().__init__(message)
+
+
+class InvalidFunctionCallException(MistralCommonException):
+    def __init__(self, message: str) -> None:
+        super().__init__(message)
+
+
+class InvalidToolException(MistralCommonException):
+    def __init__(self, message: str) -> None:
+        super().__init__(message)
diff --git a/.venv/lib/python3.11/site-packages/mistral_common/multimodal.py b/.venv/lib/python3.11/site-packages/mistral_common/multimodal.py
new file mode 100644
index 0000000000000000000000000000000000000000..4b8f6bd9226c7d9f45aa0c5ec5a0cd2d50397f7e
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/mistral_common/multimodal.py
@@ -0,0 +1,70 @@
+import base64
+import io
+from typing import Union
+
+import requests
+from PIL import Image
+from pydantic import BeforeValidator, PlainSerializer, SerializationInfo
+from typing_extensions import Annotated
+
+from mistral_common import __version__
+
+
+def download_image(url: str) -> Image.Image:
+    headers = {"User-Agent": f"mistral-common/{__version__}"}
+    try:
+        # Make a request to download the image
+        response = requests.get(url, headers=headers)
+        response.raise_for_status()  # Raise an error for bad responses (4xx, 5xx)
+
+        # Convert the image content to a PIL Image
+        img = Image.open(io.BytesIO(response.content))
+        return img
+
+    except requests.exceptions.RequestException as e:
+        raise RuntimeError(f"Error downloading the image from {url}: {e}.")
+    except Exception as e:
+        raise RuntimeError(f"Error converting to PIL image: {e}")
+
+
+def maybe_load_image_from_str_or_bytes(x: Union[Image.Image, str, bytes]) -> Image.Image:
+    if isinstance(x, Image.Image):
+        return x
+    if isinstance(x, bytes):
+        try:
+            return Image.open(io.BytesIO(x))
+        except Exception:
+            raise RuntimeError("Encountered an error when loading image from bytes.")
+
+    try:
+        image = Image.open(io.BytesIO(base64.b64decode(x.encode("ascii"))))
+        return image
+    except Exception as e:
+        raise RuntimeError(
+            f"Encountered an error when loading image from bytes starting "
+            f"with '{x[:20]}'. Expected either a PIL.Image.Image or a base64 "
+            f"encoded string of bytes."
+        ) from e
+
+
+def serialize_image_to_byte_str(im: Image.Image, info: SerializationInfo) -> str:
+    if hasattr(info, "context"):
+        context = info.context or {}
+    else:
+        context = {}
+
+    stream = io.BytesIO()
+    im_format = im.format or "PNG"
+    im.save(stream, format=im_format)
+    im_b64 = base64.b64encode(stream.getvalue()).decode("ascii")
+    if context and (max_image_b64_len := context.get("max_image_b64_len")):
+        return im_b64[:max_image_b64_len] + "..."
+    return im_b64
+
+
+# A normal PIL image that supports serialization to b64 bytes string
+SerializableImage = Annotated[
+    Image.Image,
+    BeforeValidator(maybe_load_image_from_str_or_bytes),
+    PlainSerializer(serialize_image_to_byte_str),
+]
diff --git a/.venv/lib/python3.11/site-packages/mistral_common/protocol/__init__.py b/.venv/lib/python3.11/site-packages/mistral_common/protocol/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/.venv/lib/python3.11/site-packages/mistral_common/protocol/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/mistral_common/protocol/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fc21452e80f949a189c8dad291cf91539b4e615b
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/mistral_common/protocol/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/mistral_common/protocol/__pycache__/base.cpython-311.pyc b/.venv/lib/python3.11/site-packages/mistral_common/protocol/__pycache__/base.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5ba2e235db5f7918b238050c1dd36e8594afacef
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/mistral_common/protocol/__pycache__/base.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/mistral_common/protocol/__pycache__/utils.cpython-311.pyc b/.venv/lib/python3.11/site-packages/mistral_common/protocol/__pycache__/utils.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5eb5ebbb65102460384f33c363b69a9e592da463
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/mistral_common/protocol/__pycache__/utils.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/mistral_common/protocol/base.py b/.venv/lib/python3.11/site-packages/mistral_common/protocol/base.py
new file mode 100644
index 0000000000000000000000000000000000000000..74d0573e32710698eecbb3d778e56e54a22915ec
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/mistral_common/protocol/base.py
@@ -0,0 +1,18 @@
+from typing import Optional
+
+from pydantic import Field
+
+from mistral_common.base import MistralBase
+
+
+class UsageInfo(MistralBase):
+    prompt_tokens: int = 0
+    total_tokens: int = 0
+    completion_tokens: Optional[int] = 0
+
+
+class BaseCompletionRequest(MistralBase):
+    temperature: float = Field(default=0.7, ge=0.0, le=1.0)
+    top_p: float = Field(default=1.0, ge=0.0, le=1.0)
+    max_tokens: Optional[int] = Field(default=None, ge=0)
+    random_seed: Optional[int] = Field(default=None, ge=0)
diff --git a/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/__init__.py b/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/__pycache__/normalize.cpython-311.pyc b/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/__pycache__/normalize.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..75a3723c363f08887869a6636ae1f4eddd64f59e
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/__pycache__/normalize.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/__pycache__/request.cpython-311.pyc b/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/__pycache__/request.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..50558933d83da9d8294c6f200e2e719be3717dcd
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/__pycache__/request.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/__pycache__/tool_calls.cpython-311.pyc b/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/__pycache__/tool_calls.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f6fb3177b2a664886560f5c7d9cef6aa8e2aff32
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/__pycache__/tool_calls.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/__pycache__/validator.cpython-311.pyc b/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/__pycache__/validator.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..60587e8677e11f6ba085a8560d3a95073c24053f
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/__pycache__/validator.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/messages.py b/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/messages.py
new file mode 100644
index 0000000000000000000000000000000000000000..82d6f623c5d71f049768b11a6a6ae2515177ab21
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/messages.py
@@ -0,0 +1,113 @@
+from enum import Enum
+from typing import List, Literal, Optional, TypeVar, Union
+
+from pydantic import ConfigDict, Field
+from typing_extensions import Annotated, TypeAlias
+
+from mistral_common.base import MistralBase
+from mistral_common.multimodal import SerializableImage
+from mistral_common.protocol.instruct.tool_calls import ToolCall
+
+
+class ChunkTypes(str, Enum):
+    text = "text"
+    image = "image"
+    image_url = "image_url"
+
+
+class BaseContentChunk(MistralBase):
+    type: Literal[ChunkTypes.text, ChunkTypes.image, ChunkTypes.image_url]
+
+
+class ImageChunk(BaseContentChunk):
+    type: Literal[ChunkTypes.image] = ChunkTypes.image
+    image: SerializableImage
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+
+
+class ImageURL(MistralBase):
+    url: str
+    detail: Optional[str] = None
+
+
+class ImageURLChunk(BaseContentChunk):
+    """
+    {"type":"image_url","image_url":{"url":"data:image/png;base64,iVBORw0
+    """
+
+    type: Literal[ChunkTypes.image_url] = ChunkTypes.image_url
+    image_url: Union[ImageURL, str]
+
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+
+    def get_url(self) -> str:
+        if isinstance(self.image_url, ImageURL):
+            return self.image_url.url
+        return self.image_url
+
+
+class TextChunk(BaseContentChunk):
+    type: Literal[ChunkTypes.text] = ChunkTypes.text
+    text: str
+
+
+ContentChunk = Annotated[Union[TextChunk, ImageChunk, ImageURLChunk], Field(discriminator="type")]
+
+
+class Roles(str, Enum):
+    system = "system"
+    user = "user"
+    assistant = "assistant"
+    tool = "tool"
+
+
+class BaseMessage(MistralBase):
+    role: Literal[Roles.system, Roles.user, Roles.assistant, Roles.tool]
+
+
+class UserMessage(BaseMessage):
+    role: Literal[Roles.user] = Roles.user
+    content: Union[str, List[ContentChunk]]
+
+
+class SystemMessage(BaseMessage):
+    role: Literal[Roles.system] = Roles.system
+    content: Union[str, List[ContentChunk]]
+
+
+class AssistantMessage(BaseMessage):
+    role: Literal[Roles.assistant] = Roles.assistant
+    content: Optional[str] = None
+    tool_calls: Optional[List[ToolCall]] = None
+    prefix: bool = False
+
+
+class FinetuningAssistantMessage(AssistantMessage):
+    weight: Optional[float] = None
+
+
+class ToolMessage(BaseMessage):
+    content: str
+    role: Literal[Roles.tool] = Roles.tool
+    tool_call_id: Optional[str] = None
+
+    # Deprecated in V3 tokenization
+    name: Optional[str] = None
+
+
+ChatMessage = Annotated[Union[SystemMessage, UserMessage, AssistantMessage, ToolMessage], Field(discriminator="role")]
+
+FinetuningMessage = Annotated[
+    Union[SystemMessage, UserMessage, FinetuningAssistantMessage, ToolMessage],
+    Field(discriminator="role"),
+]
+
+ChatMessageType = TypeVar("ChatMessageType", bound=ChatMessage)
+
+# Used for type hinting in generic classes where we might override the message types
+UserMessageType = TypeVar("UserMessageType", bound=UserMessage)
+AssistantMessageType = TypeVar("AssistantMessageType", bound=AssistantMessage)
+ToolMessageType = TypeVar("ToolMessageType", bound=ToolMessage)
+SystemMessageType = TypeVar("SystemMessageType", bound=SystemMessage)
+
+UATS: TypeAlias = Union[UserMessageType, AssistantMessageType, ToolMessageType, SystemMessageType]
diff --git a/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/normalize.py b/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/normalize.py
new file mode 100644
index 0000000000000000000000000000000000000000..2852f968e8c54dcb48f07a4a5a3ff020606d8ce0
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/normalize.py
@@ -0,0 +1,265 @@
+import json
+from typing import Generic, List, Optional, Sequence, Type, Union
+
+from mistral_common.protocol.instruct.messages import (
+    UATS,
+    AssistantMessage,
+    AssistantMessageType,
+    ContentChunk,
+    FinetuningAssistantMessage,
+    Roles,
+    SystemMessage,
+    SystemMessageType,
+    TextChunk,
+    ToolMessage,
+    ToolMessageType,
+    UserMessage,
+    UserMessageType,
+)
+from mistral_common.protocol.instruct.request import ChatCompletionRequest
+from mistral_common.protocol.instruct.tool_calls import FunctionCall, Tool, ToolCall
+from mistral_common.tokens.instruct.request import InstructRequest
+from mistral_common.tokens.tokenizers.base import InstructRequestType, TokenizerVersion
+
+
+class InstructRequestNormalizer(
+    Generic[UserMessageType, AssistantMessageType, ToolMessageType, SystemMessageType, InstructRequestType]
+):
+    """
+    Takes a ChatCompletionRequest and normalizes it into an InstructRequest.
+
+    The normalization process does several things such as:
+    - Aggregate consecutive messages of the same role
+    - Aggregate system prompts
+    - Normalize json content
+    - Normalize tool calls
+    """
+
+    system_prompt_in_begin: bool = False
+
+    def __init__(
+        self,
+        user_message_class: Type[UserMessageType],
+        assistant_message_class: Type[AssistantMessageType],
+        tool_message_class: Type[ToolMessageType],
+        system_message_class: Type[SystemMessageType],
+        instruct_request_class: Type[InstructRequestType],
+    ):
+        self._user_message_class = user_message_class
+        self._assistant_message_class = assistant_message_class
+        self._tool_message_class = tool_message_class
+        self._instruct_request_class = instruct_request_class
+        # this is unused but makes creation nicer
+        self._system_message_class = system_message_class
+
+    @staticmethod
+    def normalizer() -> "InstructRequestNormalizer":
+        return InstructRequestNormalizer(
+            UserMessage,
+            AssistantMessage,
+            ToolMessage,
+            SystemMessage,
+            InstructRequest[UATS, Tool],
+        )
+
+    def _normalize_json_content(self, content: Optional[str]) -> str:
+        if content is None or len(content) == 0:
+            return "{}"
+
+        try:
+            parsed_json = json.loads(content)
+            normalized_content = json.dumps(parsed_json, ensure_ascii=False)
+        except json.JSONDecodeError:
+            normalized_content = content
+        return normalized_content
+
+    def _aggregate_content_chunks(self, content: Union[str, List[TextChunk]], chunk_join_str: str = "\n\n") -> str:
+        if isinstance(content, list):
+            return chunk_join_str.join([chunk.text for chunk in content])
+        else:
+            return content
+
+    def _aggregate_system_prompts(self, request: ChatCompletionRequest[UATS]) -> Optional[str]:
+        system_prompt: List[str] = []
+
+        for message in request.messages:
+            if message.role == Roles.system and message.content:
+                system_prompt.append(self._aggregate_content_chunks(message.content))
+
+        return "\n\n".join(system_prompt) if len(system_prompt) else None
+
+    def _aggregate_tool_messages(self, messages: List[UATS]) -> List[ToolMessageType]:
+        """
+        We currently do not do any aggregation for tool messages, but we normalize the json content
+        """
+        tool_messages: List[ToolMessageType] = []
+        for message in messages:
+            assert isinstance(message, self._tool_message_class), "Expected tool message"
+            content = self._aggregate_content_chunks(message.content)
+            normalized_content = self._normalize_json_content(content)
+            tool_messages.append(
+                self._tool_message_class(
+                    content=normalized_content, tool_call_id=message.tool_call_id, name=message.name
+                )
+            )
+
+        return tool_messages
+
+    def _normalize_tool_call(self, tool_call: ToolCall) -> ToolCall:
+        normalized_function_aruments = self._normalize_json_content(tool_call.function.arguments)
+        return ToolCall(
+            function=FunctionCall(name=tool_call.function.name, arguments=normalized_function_aruments),
+            id=tool_call.id,
+        )
+
+    def _aggregate_assistant_messages(self, messages: List[UATS]) -> AssistantMessageType:
+        aggregated_content: List[str] = []
+        tool_calls: List[ToolCall] = []
+        prefix: bool = False
+        weight: Optional[float] = None
+        for message in messages:
+            assert isinstance(message, self._assistant_message_class), "Expected assistant message"
+            if message.tool_calls is not None and len(message.tool_calls) > 0:
+                for tool_call in message.tool_calls:
+                    normalized_tool_call = self._normalize_tool_call(tool_call)
+                    tool_calls.append(normalized_tool_call)
+            elif message.content:
+                aggregated_content.append(self._aggregate_content_chunks(message.content))
+            prefix |= message.prefix
+            if isinstance(message, FinetuningAssistantMessage):
+                # Only FinetuningAssistantMessage can be weighted
+                if weight is not None:
+                    assert (
+                        weight == message.weight
+                    ), "Expected weights of aggregated FinetuningAssistantMessage to be equal"
+                weight = message.weight
+
+        aggregated_message = self._assistant_message_class(
+            content="\n\n".join(aggregated_content) if len(aggregated_content) else None,
+            tool_calls=tool_calls or None,
+            prefix=prefix,
+        )
+
+        if weight is not None and hasattr(aggregated_message, "weight"):
+            aggregated_message.weight = weight
+        return aggregated_message
+
+    def _aggregate_user_messages(self, messages: List[UATS]) -> UserMessageType:
+        """
+        Just coalesce neighboring blocks of text
+        """
+        all_content: List[ContentChunk] = []
+        text_chunks: List[str] = []
+        for message in messages:
+            assert isinstance(message, self._user_message_class), f"Expected user message got {type(message)}"
+            if isinstance(message.content, str):
+                text_chunks.append(message.content)
+            else:  # it's a List[ContentChunk]
+                for chunk in message.content:
+                    if isinstance(chunk, TextChunk):
+                        text_chunks.append(chunk.text)
+                    else:
+                        if text_chunks:
+                            all_content.append(TextChunk(text="\n\n".join(text_chunks)))
+                            text_chunks = []
+                        all_content.append(chunk)
+
+        text_content = "\n\n".join(text_chunks) if text_chunks else ""
+
+        if not all_content:
+            # if no ContentChunk was passed, we return content as a str
+            return self._user_message_class(content=text_content)
+
+        if text_content:
+            # else we return a List of content chunks
+            all_content.append(TextChunk(text=text_content))
+
+        return self._user_message_class(content=all_content)
+
+    def _aggregate_role(self, messages: List[UATS], role: Optional[Roles]) -> Sequence[UATS]:
+        if role == Roles.tool:
+            return self._aggregate_tool_messages(messages)
+        elif role == Roles.assistant:
+            return [self._aggregate_assistant_messages(messages)]
+        elif role == Roles.user:
+            return [self._aggregate_user_messages(messages)]
+        else:  # System messages are ignored
+            return []
+
+    def _aggregate_messages(self, request: ChatCompletionRequest[UATS]) -> List[UATS]:
+        aggregated_messages: List[UATS] = []
+        messages_to_aggregate: List[UATS] = []
+        current_role: Optional[Roles] = None
+        current_weight: Optional[float] = None
+
+        # Collect consecutive lists of messages with the same role and weight
+        for message in request.messages:
+            new_weight = getattr(message, "weight", None)
+            if current_role != message.role or (new_weight != current_weight):
+                aggregated_messages.extend(self._aggregate_role(messages_to_aggregate, current_role))
+                messages_to_aggregate.clear()
+            current_weight = new_weight
+            current_role = message.role
+            messages_to_aggregate.append(message)
+
+        # Add the last set of messages
+        aggregated_messages.extend(self._aggregate_role(messages_to_aggregate, current_role))
+
+        # If the first message is not a user message, or we didnt aggregate
+        # anything (all system messages) for example, add an empty user message
+        if len(aggregated_messages) == 0 or (
+            not self.system_prompt_in_begin and aggregated_messages[0].role != Roles.user
+        ):
+            aggregated_messages.insert(0, self._user_message_class(content=""))
+
+        return aggregated_messages
+
+    def from_chat_completion_request(self, request: ChatCompletionRequest[UATS]) -> InstructRequestType:
+        system_prompt = self._aggregate_system_prompts(request)
+        messages = self._aggregate_messages(request)
+
+        return self._instruct_request_class(
+            messages=messages, system_prompt=system_prompt, available_tools=request.tools
+        )
+
+
+class InstructRequestNormalizerV7(InstructRequestNormalizer):
+    system_prompt_in_begin: bool = True
+
+    @staticmethod
+    def normalizer() -> "InstructRequestNormalizerV7":
+        return InstructRequestNormalizerV7(
+            UserMessage,
+            AssistantMessage,
+            ToolMessage,
+            SystemMessage,
+            InstructRequest[UATS, Tool],
+        )
+
+    def _aggregate_role(self, messages: List[UATS], role: Optional[Roles]) -> Sequence[UATS]:
+        if role == Roles.tool:
+            return self._aggregate_tool_messages(messages)
+        elif role == Roles.assistant:
+            return [self._aggregate_assistant_messages(messages)]
+        elif role == Roles.user:
+            return [self._aggregate_user_messages(messages)]
+        elif role == Roles.system:
+            return messages
+        else:
+            assert role is None and len(messages) == 0
+            return []
+
+    def _aggregate_system_prompts(self, request: ChatCompletionRequest[UATS]) -> Optional[str]:
+        raise NotImplementedError("We should not aggregate system prompts")
+
+    def from_chat_completion_request(self, request: ChatCompletionRequest[UATS]) -> InstructRequestType:  # type: ignore[type-var]
+        messages = self._aggregate_messages(request)
+        return self._instruct_request_class(messages=messages, system_prompt=None, available_tools=request.tools)  # type: ignore[no-any-return]
+
+
+def normalizer_for_tokenizer_version(version: TokenizerVersion) -> InstructRequestNormalizer:
+    if version in {TokenizerVersion.v1, TokenizerVersion.v2, TokenizerVersion.v3}:
+        return InstructRequestNormalizer.normalizer()
+    elif version == TokenizerVersion.v7:
+        return InstructRequestNormalizerV7.normalizer()
+    raise ValueError(f"Unknown tokenizer version {version}")
diff --git a/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/request.py b/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/request.py
new file mode 100644
index 0000000000000000000000000000000000000000..4c4ee9040a036dfe00bc8faa0581c81473d63a43
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/request.py
@@ -0,0 +1,27 @@
+from enum import Enum
+from typing import Generic, List, Optional
+
+from pydantic import Field
+
+from mistral_common.base import MistralBase
+from mistral_common.protocol.base import BaseCompletionRequest
+from mistral_common.protocol.instruct.messages import ChatMessageType
+from mistral_common.protocol.instruct.tool_calls import Tool, ToolChoice
+
+
+class ResponseFormats(str, Enum):
+    text: str = "text"
+    json: str = "json_object"
+
+
+class ResponseFormat(MistralBase):
+    type: ResponseFormats = ResponseFormats.text
+
+
+class ChatCompletionRequest(BaseCompletionRequest, Generic[ChatMessageType]):
+    model: Optional[str] = None
+    messages: List[ChatMessageType]
+    response_format: ResponseFormat = Field(default_factory=ResponseFormat)
+    tools: Optional[List[Tool]] = None
+    tool_choice: ToolChoice = ToolChoice.auto
+    truncate_for_context_length: bool = False
diff --git a/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/response.py b/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/response.py
new file mode 100644
index 0000000000000000000000000000000000000000..f79aa320665178f78248aaf1c0e3f68599acbb24
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/response.py
@@ -0,0 +1,66 @@
+import time
+from enum import Enum
+from typing import List, Optional
+
+from pydantic import Field
+
+from mistral_common.base import MistralBase
+from mistral_common.protocol.base import UsageInfo
+from mistral_common.protocol.instruct.tool_calls import ToolCall
+from mistral_common.protocol.utils import random_uuid
+
+
+class FinishReason(str, Enum):
+    stop: str = "stop"
+    length: str = "length"
+    model_length: str = "model_length"
+    error: str = "error"
+    tool_call: str = "tool_calls"
+
+
+class ChatCompletionTokenLogprobs(MistralBase):
+    token: str
+    logprob: float
+    bytes: List[int]
+
+
+class ChatCompletionResponseChoiceLogprobs(MistralBase):
+    content: List[ChatCompletionTokenLogprobs]
+
+
+class DeltaMessage(MistralBase):
+    role: Optional[str] = None
+    content: Optional[str] = None
+    tool_calls: Optional[List[ToolCall]] = None
+
+
+class ChatCompletionResponseChoice(MistralBase):
+    index: int
+    message: DeltaMessage
+    finish_reason: Optional[FinishReason] = None
+    logprobs: Optional[ChatCompletionResponseChoiceLogprobs] = None
+
+
+class ChatCompletionResponse(MistralBase):
+    id: str = Field(default_factory=lambda: f"chatcmpl-{random_uuid()}")
+    object: str = "chat.completion"
+    created: int = Field(default_factory=lambda: int(time.time()))
+    model: str
+    choices: List[ChatCompletionResponseChoice]
+    usage: UsageInfo
+
+
+class ChatCompletionResponseStreamChoice(MistralBase):
+    index: int
+    delta: DeltaMessage
+    finish_reason: Optional[FinishReason] = None
+    logprobs: Optional[ChatCompletionResponseChoiceLogprobs] = None
+
+
+class ChatCompletionStreamResponse(MistralBase):
+    id: str = Field(default_factory=lambda: f"chatcmpl-{random_uuid()}")
+    object: str = "chat.completion.chunk"
+    created: int = Field(default_factory=lambda: int(time.time()))
+    model: str
+    choices: List[ChatCompletionResponseStreamChoice]
+    usage: Optional[UsageInfo] = None
diff --git a/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/tool_calls.py b/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/tool_calls.py
new file mode 100644
index 0000000000000000000000000000000000000000..764ba4cb45bb8ee958730b1dbd69a7bc7a51a8d9
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/tool_calls.py
@@ -0,0 +1,51 @@
+import json
+from enum import Enum
+from typing import Any, Dict, TypeVar, Union
+
+from pydantic import field_validator
+
+from mistral_common.base import MistralBase
+
+
+class Function(MistralBase):
+    name: str
+    description: str = ""
+    parameters: Dict[str, Any]
+
+
+class ToolTypes(str, Enum):
+    function = "function"
+
+
+class ToolChoice(str, Enum):
+    auto: str = "auto"
+    none: str = "none"
+    any: str = "any"
+
+
+class Tool(MistralBase):
+    type: ToolTypes = ToolTypes.function
+    function: Function
+
+
+class FunctionCall(MistralBase):
+    name: str
+    arguments: str
+
+    @field_validator("arguments", mode="before")
+    def validate_arguments(cls, v: Union[str, Dict[str, Any]]) -> str:
+        """
+        This is for backward compatibility
+        """
+        if isinstance(v, dict):
+            return json.dumps(v)
+        return v
+
+
+class ToolCall(MistralBase):
+    id: str = "null"  # required for V3 tokenization
+    type: ToolTypes = ToolTypes.function
+    function: FunctionCall
+
+
+ToolType = TypeVar("ToolType", bound=Tool)
diff --git a/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/validator.py b/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/validator.py
new file mode 100644
index 0000000000000000000000000000000000000000..202d36353bc91b5cf51c636450c79132b07ab18f
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/validator.py
@@ -0,0 +1,328 @@
+import re
+from enum import Enum
+from typing import Generic, List
+
+from jsonschema import Draft7Validator, SchemaError
+
+from mistral_common.exceptions import (
+    InvalidAssistantMessageException,
+    InvalidFunctionCallException,
+    InvalidMessageStructureException,
+    InvalidRequestException,
+    InvalidSystemPromptException,
+    InvalidToolException,
+    InvalidToolMessageException,
+    InvalidToolSchemaException,
+)
+from mistral_common.protocol.instruct.messages import (
+    UATS,
+    AssistantMessage,
+    AssistantMessageType,
+    FinetuningAssistantMessage,
+    Roles,
+    SystemMessageType,
+    ToolMessageType,
+    UserMessageType,
+)
+from mistral_common.protocol.instruct.request import ChatCompletionRequest
+from mistral_common.protocol.instruct.tool_calls import (
+    Function,
+    FunctionCall,
+    Tool,
+    ToolCall,
+)
+
+
+class ValidationMode(Enum):
+    serving = "serving"
+    finetuning = "finetuning"
+    test = "test"
+
+
+class MistralRequestValidator(Generic[UserMessageType, AssistantMessageType, ToolMessageType, SystemMessageType]):
+    def __init__(self, mode: ValidationMode = ValidationMode.test):
+        self._mode = mode
+
+    def validate_messages(self, messages: List[UATS]) -> None:
+        """
+        Validates the list of messages
+        """
+        self._validate_message_list_structure(messages)
+        self._validate_message_list_content(messages)
+
+    def validate_request(self, request: ChatCompletionRequest) -> ChatCompletionRequest[UATS]:
+        """
+        Validates the request
+        """
+
+        if self._mode == ValidationMode.serving:
+            if request.model is None:
+                raise InvalidRequestException("Model name parameter is required for serving mode")
+
+        # Validate the messages
+        self.validate_messages(request.messages)
+
+        # Validate the tools
+        self._validate_tools(request.tools or [])
+
+        return request
+
+    def _validate_function(self, function: Function) -> None:
+        """
+        Checks:
+        - That the function schema is valid
+        """
+        try:
+            Draft7Validator.check_schema(function.parameters)
+        except SchemaError as e:
+            raise InvalidToolSchemaException(f"Invalid tool schema: {e.message}")
+
+        if not re.match(r"^[a-zA-Z0-9_-]{1,64}$", function.name):
+            raise InvalidToolException(
+                f"Function name was {function.name} but must be a-z, A-Z, 0-9, "
+                "or contain underscores and dashes, with a maximum length of 64."
+            )
+
+    def _validate_tools(self, tools: List[Tool]) -> None:
+        """
+        Checks:
+        - That the tool schemas are valid
+        """
+
+        for tool in tools:
+            self._validate_function(tool.function)
+
+    def _validate_user_message(self, message: UserMessageType) -> None:
+        pass
+
+    def _validate_tool_message(self, message: ToolMessageType) -> None:
+        """
+        Checks:
+        - The tool name is valid
+        """
+        if message.name is not None:
+            if not re.match(r"^[a-zA-Z0-9_-]{1,64}$", message.name):
+                raise InvalidToolMessageException(
+                    f"Function name was {message.name} but must be a-z, A-Z, 0-9, "
+                    "or contain underscores and dashes, with a maximum length of 64."
+                )
+
+    def _validate_system_message(self, message: SystemMessageType) -> None:
+        """
+        Checks:
+        - That the system prompt has content
+        """
+        if message.content is None:
+            raise InvalidSystemPromptException("System prompt must have content")
+
+    def _validate_function_call(self, function_call: FunctionCall) -> None:
+        """
+        Checks:
+        - That the function call has a valid name
+        """
+        if not re.match(r"^[a-zA-Z0-9_-]{1,64}$", function_call.name):
+            raise InvalidFunctionCallException(
+                f"Function name was {function_call.name} but must be a-z, A-Z, 0-9, "
+                "or contain underscores and dashes, with a maximum length of 64."
+            )
+
+    def _validate_tool_call(self, tool_call: ToolCall, is_last_message: bool) -> None:
+        """
+        Checks:
+        - That the tool call has a valid function
+        """
+
+        self._validate_function_call(tool_call.function)
+
+    def _validate_assistant_message(self, message: AssistantMessageType, is_last_message: bool = False) -> None:
+        """
+        Checks:
+        - That the assistant message has either text or tool_calls, but not both
+        - That the tool calls are valid
+        """
+
+        # Validate that the message has either text or tool_calls
+        # but not both and not neither.
+        if bool(message.content) == bool(message.tool_calls):
+            raise InvalidAssistantMessageException(
+                "Assistant message must have either content or tool_calls, but not both."
+            )
+
+        # If we have tool calls, validate them
+        if message.tool_calls is not None:
+            # Validate that the tool calls are valid
+            for tool_call in message.tool_calls:
+                self._validate_tool_call(tool_call, is_last_message=is_last_message)
+
+        if self._mode == ValidationMode.finetuning and isinstance(message, FinetuningAssistantMessage):
+            if message.weight is not None and message.weight not in [0, 1]:
+                raise InvalidAssistantMessageException("Assistant message weight must be either 0 or 1")
+
+        if message.prefix:
+            if not is_last_message:
+                raise InvalidAssistantMessageException("Assistant message with prefix True must be last message")
+            # note : we already validate that assistant messsage has content 3 lines up.
+
+    def _validate_tool_calls_followed_by_tool_messages(self, messages: List[UATS]) -> None:
+        """
+        Checks:
+        - That the number of tool calls and tool messages are the same
+        - That the tool calls are followed by tool messages
+        """
+        prev_role = None
+        expected_tool_messages = 0
+        for message in messages:
+            if prev_role is None:
+                prev_role = message.role
+                continue
+
+            if message.role == Roles.tool:
+                expected_tool_messages -= 1
+            elif message.role == Roles.assistant:
+                # if we have an assistant message and we have not received all the function calls
+                # we need to raise an exception
+                if expected_tool_messages != 0:
+                    raise InvalidMessageStructureException("Not the same number of function calls and responses")
+
+                if message.tool_calls is not None:
+                    # Validate that the number of function calls and responses are the same
+                    expected_tool_messages = len(message.tool_calls)
+
+            prev_role = message.role
+
+        if expected_tool_messages != 0 and self._mode == ValidationMode.serving:
+            raise InvalidMessageStructureException("Not the same number of function calls and responses")
+        elif expected_tool_messages < 0 and self._mode == ValidationMode.finetuning:
+            raise InvalidMessageStructureException("More tool responses than tool calls")
+
+    def _validate_message_order(self, messages: List[UATS]) -> None:
+        """
+        Validates the order of the messages, for example user -> assistant -> user -> assistant -> ...
+        """
+        previous_role = None
+        for message in messages:
+            current_role = message.role
+
+            if previous_role is not None:
+                if previous_role == Roles.system:
+                    expected_roles = {Roles.user, Roles.assistant, Roles.system}
+                elif previous_role == Roles.user:
+                    expected_roles = {Roles.assistant, Roles.system, Roles.user}
+                elif previous_role == Roles.assistant:
+                    expected_roles = {Roles.assistant, Roles.user, Roles.tool}
+                elif previous_role == Roles.tool:
+                    expected_roles = {Roles.assistant, Roles.tool}
+
+                if current_role not in expected_roles:
+                    raise InvalidMessageStructureException(
+                        f"Unexpected role '{current_role}' after role '{previous_role}'"
+                    )
+
+            previous_role = current_role
+
+    def _validate_last_message(self, message: UATS) -> None:
+        # The last message must be a user or tool message in serving mode or an assistant message in finetuning mode
+        last_message_role = message.role
+        if self._mode == ValidationMode.finetuning:
+            if last_message_role != Roles.assistant:
+                raise InvalidMessageStructureException(
+                    f"Expected last role Assistant for finetuning but got {last_message_role}"
+                )
+        else:
+            bad_assistant = isinstance(message, AssistantMessage) and not message.prefix
+            bad_role = message.role not in {Roles.user, Roles.tool}
+            if bad_assistant and bad_role:
+                raise InvalidMessageStructureException(
+                    f"Expected last role User or Tool (or Assistant with prefix True) for serving"
+                    f" but got {last_message_role}"
+                )
+
+    def _validate_message_list_structure(self, messages: List[UATS]) -> None:
+        """
+        Validates the structure of the list of messages
+
+        For example the messages must be in the correct order of user/assistant/tool
+        """
+
+        if len(messages) == 0:
+            raise InvalidMessageStructureException("Conversation must have at least one message")
+
+        # If we have one message it must be a user or a system message
+        if len(messages) == 1:
+            if messages[0].role not in {Roles.user, Roles.system}:
+                raise InvalidMessageStructureException("Conversation must start with a user message or system message")
+
+        # Always check the last message if in fine-tuning mode
+        if self._mode == ValidationMode.finetuning or len(messages) > 1:
+            self._validate_last_message(messages[-1])
+
+        self._validate_message_order(messages)
+        self._validate_tool_calls_followed_by_tool_messages(messages)
+
+    def _validate_message_list_content(self, messages: List[UATS]) -> None:
+        """
+        Validates the content of the messages
+        """
+
+        for idx, message in enumerate(messages):
+            if message.role == Roles.user:
+                self._validate_user_message(message)
+            elif message.role == Roles.assistant:
+                self._validate_assistant_message(message, is_last_message=idx == len(messages) - 1)
+            elif message.role == Roles.tool:
+                self._validate_tool_message(message)
+            elif message.role == Roles.system:
+                self._validate_system_message(message)
+            else:
+                raise InvalidRequestException(f"Unsupported message type {type(message)}")
+
+
+class MistralRequestValidatorV3(MistralRequestValidator):
+    def _validate_tool_message(self, message: ToolMessageType) -> None:
+        """
+        Checks:
+        - The tool name is valid
+        - Tool call id is valid
+        """
+        if message.name is not None:
+            if not re.match(r"^[a-zA-Z0-9_-]{1,64}$", message.name):
+                raise InvalidToolMessageException(
+                    f"Function name was {message.name} but must be a-z, A-Z, 0-9, "
+                    "or contain underscores and dashes, with a maximum length of 64."
+                )
+
+        if message.tool_call_id is None:
+            raise InvalidRequestException("Tool call id has to be defined.")
+
+        if not re.match(r"^[a-zA-Z0-9]{9}$", message.tool_call_id):
+            raise InvalidToolMessageException(
+                f"Tool call id was {message.tool_call_id} but must be a-z, A-Z, 0-9, with a length of 9."
+            )
+
+    def _validate_tool_call(self, tool_call: ToolCall, is_last_message: bool) -> None:
+        """
+        Validate that the tool call has a valid ID
+        """
+        if tool_call.id != "null":
+            if not re.match(r"^[a-zA-Z0-9]{9}$", tool_call.id):
+                raise InvalidFunctionCallException(
+                    f"Tool call id was {tool_call.id} but must be a-z, A-Z, 0-9, with a length of 9."
+                )
+        if self._mode == ValidationMode.finetuning and not is_last_message and tool_call.id == "null":
+            err_message = "Tool call id of assistant message that is not last has to be defined in finetuning mode."
+            raise InvalidFunctionCallException(err_message)
+
+        if self._mode == ValidationMode.serving and tool_call.id == "null":
+            raise InvalidFunctionCallException("Tool call id has to be defined in serving mode.")
+
+        self._validate_function_call(tool_call.function)
+
+    def _validate_last_message(self, message: UATS) -> None:
+        super()._validate_last_message(message)
+
+        if self._mode == ValidationMode.finetuning:
+            # in finetuning mode it has to be an assistant message
+            # as checked by parent `_validate_last_message`
+            if message.tool_calls is not None:
+                for tool_call in message.tool_calls:
+                    self._validate_tool_call(tool_call, is_last_message=True)
diff --git a/.venv/lib/python3.11/site-packages/mistral_common/protocol/utils.py b/.venv/lib/python3.11/site-packages/mistral_common/protocol/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..09bfa3dd7c909442ab895faa3b5b7718e32279f5
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/mistral_common/protocol/utils.py
@@ -0,0 +1,5 @@
+import uuid
+
+
+def random_uuid() -> str:
+    return str(uuid.uuid4().hex)
diff --git a/.venv/lib/python3.11/site-packages/mistral_common/py.typed b/.venv/lib/python3.11/site-packages/mistral_common/py.typed
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/.venv/lib/python3.11/site-packages/numpy/polynomial/tests/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/numpy/polynomial/tests/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..611c85ac99d107af2096d9b84c24cf0c07286c49
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/numpy/polynomial/tests/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/numpy/polynomial/tests/__pycache__/test_hermite_e.cpython-311.pyc b/.venv/lib/python3.11/site-packages/numpy/polynomial/tests/__pycache__/test_hermite_e.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..836cf6f35cc7ab4e58a13ab6e3072ed34bc2984c
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/numpy/polynomial/tests/__pycache__/test_hermite_e.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/INSTALLER b/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/INSTALLER
new file mode 100644
index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/INSTALLER
@@ -0,0 +1 @@
+pip
diff --git a/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/LICENSE b/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/LICENSE
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/METADATA b/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/METADATA
new file mode 100644
index 0000000000000000000000000000000000000000..c80f9dcf24dd15d0a895731b81af0f3cc47873fe
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/METADATA
@@ -0,0 +1,238 @@
+Metadata-Version: 2.2
+Name: vllm
+Version: 0.7.2
+Summary: A high-throughput and memory-efficient inference and serving engine for LLMs
+Home-page: https://github.com/vllm-project/vllm
+Author: vLLM Team
+License: Apache 2.0
+Project-URL: Homepage, https://github.com/vllm-project/vllm
+Project-URL: Documentation, https://vllm.readthedocs.io/en/latest/
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Intended Audience :: Developers
+Classifier: Intended Audience :: Information Technology
+Classifier: Intended Audience :: Science/Research
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Classifier: Topic :: Scientific/Engineering :: Information Analysis
+Requires-Python: >=3.9
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: psutil
+Requires-Dist: sentencepiece
+Requires-Dist: numpy<2.0.0
+Requires-Dist: requests>=2.26.0
+Requires-Dist: tqdm
+Requires-Dist: blake3
+Requires-Dist: py-cpuinfo
+Requires-Dist: transformers>=4.48.2
+Requires-Dist: tokenizers>=0.19.1
+Requires-Dist: protobuf
+Requires-Dist: fastapi<0.113.0,>=0.107.0; python_version < "3.9"
+Requires-Dist: fastapi!=0.113.*,!=0.114.0,>=0.107.0; python_version >= "3.9"
+Requires-Dist: aiohttp
+Requires-Dist: openai>=1.52.0
+Requires-Dist: uvicorn[standard]
+Requires-Dist: pydantic>=2.9
+Requires-Dist: prometheus_client>=0.18.0
+Requires-Dist: pillow
+Requires-Dist: prometheus-fastapi-instrumentator>=7.0.0
+Requires-Dist: tiktoken>=0.6.0
+Requires-Dist: lm-format-enforcer<0.11,>=0.10.9
+Requires-Dist: outlines==0.1.11
+Requires-Dist: lark==1.2.2
+Requires-Dist: xgrammar>=0.1.6; platform_machine == "x86_64"
+Requires-Dist: typing_extensions>=4.10
+Requires-Dist: filelock>=3.16.1
+Requires-Dist: partial-json-parser
+Requires-Dist: pyzmq
+Requires-Dist: msgspec
+Requires-Dist: gguf==0.10.0
+Requires-Dist: importlib_metadata
+Requires-Dist: mistral_common[opencv]>=1.5.0
+Requires-Dist: pyyaml
+Requires-Dist: six>=1.16.0; python_version > "3.11"
+Requires-Dist: setuptools>=74.1.1; python_version > "3.11"
+Requires-Dist: einops
+Requires-Dist: compressed-tensors==0.9.1
+Requires-Dist: depyf==0.18.0
+Requires-Dist: cloudpickle
+Requires-Dist: ray[default]>=2.9
+Requires-Dist: nvidia-ml-py>=12.560.30
+Requires-Dist: torch==2.5.1
+Requires-Dist: torchaudio==2.5.1
+Requires-Dist: torchvision==0.20.1
+Requires-Dist: xformers==0.0.28.post3; platform_system == "Linux" and platform_machine == "x86_64"
+Provides-Extra: tensorizer
+Requires-Dist: tensorizer>=2.9.0; extra == "tensorizer"
+Provides-Extra: runai
+Requires-Dist: runai-model-streamer; extra == "runai"
+Requires-Dist: runai-model-streamer-s3; extra == "runai"
+Requires-Dist: boto3; extra == "runai"
+Provides-Extra: audio
+Requires-Dist: librosa; extra == "audio"
+Requires-Dist: soundfile; extra == "audio"
+Provides-Extra: video
+Requires-Dist: decord; extra == "video"
+Dynamic: author
+Dynamic: classifier
+Dynamic: description
+Dynamic: description-content-type
+Dynamic: home-page
+Dynamic: license
+Dynamic: project-url
+Dynamic: provides-extra
+Dynamic: requires-dist
+Dynamic: requires-python
+Dynamic: summary
+
+<p align="center">
+  <picture>
+    <source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/vllm-project/vllm/main/docs/source/assets/logos/vllm-logo-text-dark.png">
+    <img alt="vLLM" src="https://raw.githubusercontent.com/vllm-project/vllm/main/docs/source/assets/logos/vllm-logo-text-light.png" width=55%>
+  </picture>
+</p>
+
+<h3 align="center">
+Easy, fast, and cheap LLM serving for everyone
+</h3>
+
+<p align="center">
+| <a href="https://docs.vllm.ai"><b>Documentation</b></a> | <a href="https://vllm.ai"><b>Blog</b></a> | <a href="https://arxiv.org/abs/2309.06180"><b>Paper</b></a> | <a href="https://x.com/vllm_project"><b>Twitter/X</b></a> | <a href="https://slack.vllm.ai"><b>Developer Slack</b></a> |
+</p>
+
+---
+
+*Latest News* 🔥
+- [2025/01] We are excited to announce the alpha release of vLLM V1: A major architectural upgrade with 1.7x speedup! Clean code, optimized execution loop, zero-overhead prefix caching, enhanced multimodal support, and more. Please check out our blog post [here](https://blog.vllm.ai/2025/01/27/v1-alpha-release.html).
+- [2025/01] We hosted [the eighth vLLM meetup](https://lu.ma/zep56hui) with Google Cloud! Please find the meetup slides from vLLM team [here](https://docs.google.com/presentation/d/1epVkt4Zu8Jz_S5OhEHPc798emsYh2BwYfRuDDVEF7u4/edit?usp=sharing).
+- [2024/12] vLLM joins [pytorch ecosystem](https://pytorch.org/blog/vllm-joins-pytorch)! Easy, Fast, and Cheap LLM Serving for Everyone!
+- [2024/11] We hosted [the seventh vLLM meetup](https://lu.ma/h0qvrajz) with Snowflake! Please find the meetup slides from vLLM team [here](https://docs.google.com/presentation/d/1e3CxQBV3JsfGp30SwyvS3eM_tW-ghOhJ9PAJGK6KR54/edit?usp=sharing), and Snowflake team [here](https://docs.google.com/presentation/d/1qF3RkDAbOULwz9WK5TOltt2fE9t6uIc_hVNLFAaQX6A/edit?usp=sharing).
+- [2024/10] We have just created a developer slack ([slack.vllm.ai](https://slack.vllm.ai)) focusing on coordinating contributions and discussing features. Please feel free to join us there!
+- [2024/10] Ray Summit 2024 held a special track for vLLM! Please find the opening talk slides from the vLLM team [here](https://docs.google.com/presentation/d/1B_KQxpHBTRa_mDF-tR6i8rWdOU5QoTZNcEg2MKZxEHM/edit?usp=sharing). Learn more from the [talks](https://www.youtube.com/playlist?list=PLzTswPQNepXl6AQwifuwUImLPFRVpksjR) from other vLLM contributors and users!
+- [2024/09] We hosted [the sixth vLLM meetup](https://lu.ma/87q3nvnh) with NVIDIA! Please find the meetup slides [here](https://docs.google.com/presentation/d/1wrLGwytQfaOTd5wCGSPNhoaW3nq0E-9wqyP7ny93xRs/edit?usp=sharing).
+- [2024/07] We hosted [the fifth vLLM meetup](https://lu.ma/lp0gyjqr) with AWS! Please find the meetup slides [here](https://docs.google.com/presentation/d/1RgUD8aCfcHocghoP3zmXzck9vX3RCI9yfUAB2Bbcl4Y/edit?usp=sharing).
+- [2024/07] In partnership with Meta, vLLM officially supports Llama 3.1 with FP8 quantization and pipeline parallelism! Please check out our blog post [here](https://blog.vllm.ai/2024/07/23/llama31.html).
+- [2024/06] We hosted [the fourth vLLM meetup](https://lu.ma/agivllm) with Cloudflare and BentoML! Please find the meetup slides [here](https://docs.google.com/presentation/d/1iJ8o7V2bQEi0BFEljLTwc5G1S10_Rhv3beed5oB0NJ4/edit?usp=sharing).
+- [2024/04] We hosted [the third vLLM meetup](https://robloxandvllmmeetup2024.splashthat.com/) with Roblox! Please find the meetup slides [here](https://docs.google.com/presentation/d/1A--47JAK4BJ39t954HyTkvtfwn0fkqtsL8NGFuslReM/edit?usp=sharing).
+- [2024/01] We hosted [the second vLLM meetup](https://lu.ma/ygxbpzhl) with IBM! Please find the meetup slides [here](https://docs.google.com/presentation/d/12mI2sKABnUw5RBWXDYY-HtHth4iMSNcEoQ10jDQbxgA/edit?usp=sharing).
+- [2023/10] We hosted [the first vLLM meetup](https://lu.ma/first-vllm-meetup) with a16z! Please find the meetup slides [here](https://docs.google.com/presentation/d/1QL-XPFXiFpDBh86DbEegFXBXFXjix4v032GhShbKf3s/edit?usp=sharing).
+- [2023/08] We would like to express our sincere gratitude to [Andreessen Horowitz](https://a16z.com/2023/08/30/supporting-the-open-source-ai-community/) (a16z) for providing a generous grant to support the open-source development and research of vLLM.
+- [2023/06] We officially released vLLM! FastChat-vLLM integration has powered [LMSYS Vicuna and Chatbot Arena](https://chat.lmsys.org) since mid-April. Check out our [blog post](https://vllm.ai).
+
+---
+## About
+vLLM is a fast and easy-to-use library for LLM inference and serving.
+
+Originally developed in the [Sky Computing Lab](https://sky.cs.berkeley.edu) at UC Berkeley, vLLM has evolved into a community-driven project with contributions from both academia and industry.
+
+vLLM is fast with:
+
+- State-of-the-art serving throughput
+- Efficient management of attention key and value memory with [**PagedAttention**](https://blog.vllm.ai/2023/06/20/vllm.html)
+- Continuous batching of incoming requests
+- Fast model execution with CUDA/HIP graph
+- Quantizations: [GPTQ](https://arxiv.org/abs/2210.17323), [AWQ](https://arxiv.org/abs/2306.00978), INT4, INT8, and FP8.
+- Optimized CUDA kernels, including integration with FlashAttention and FlashInfer.
+- Speculative decoding
+- Chunked prefill
+
+**Performance benchmark**: We include a performance benchmark at the end of [our blog post](https://blog.vllm.ai/2024/09/05/perf-update.html). It compares the performance of vLLM against other LLM serving engines ([TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM), [SGLang](https://github.com/sgl-project/sglang) and [LMDeploy](https://github.com/InternLM/lmdeploy)). The implementation is under [nightly-benchmarks folder](.buildkite/nightly-benchmarks/) and you can [reproduce](https://github.com/vllm-project/vllm/issues/8176) this benchmark using our one-click runnable script.
+
+vLLM is flexible and easy to use with:
+
+- Seamless integration with popular Hugging Face models
+- High-throughput serving with various decoding algorithms, including *parallel sampling*, *beam search*, and more
+- Tensor parallelism and pipeline parallelism support for distributed inference
+- Streaming outputs
+- OpenAI-compatible API server
+- Support NVIDIA GPUs, AMD CPUs and GPUs, Intel CPUs and GPUs, PowerPC CPUs, TPU, and AWS Neuron.
+- Prefix caching support
+- Multi-lora support
+
+vLLM seamlessly supports most popular open-source models on HuggingFace, including:
+- Transformer-like LLMs (e.g., Llama)
+- Mixture-of-Expert LLMs (e.g., Mixtral, Deepseek-V2 and V3)
+- Embedding Models (e.g. E5-Mistral)
+- Multi-modal LLMs (e.g., LLaVA)
+
+Find the full list of supported models [here](https://docs.vllm.ai/en/latest/models/supported_models.html).
+
+## Getting Started
+
+Install vLLM with `pip` or [from source](https://docs.vllm.ai/en/latest/getting_started/installation/gpu/index.html#build-wheel-from-source):
+
+```bash
+pip install vllm
+```
+
+Visit our [documentation](https://docs.vllm.ai/en/latest/) to learn more.
+- [Installation](https://docs.vllm.ai/en/latest/getting_started/installation/index.html)
+- [Quickstart](https://docs.vllm.ai/en/latest/getting_started/quickstart.html)
+- [List of Supported Models](https://docs.vllm.ai/en/latest/models/supported_models.html)
+
+## Contributing
+
+We welcome and value any contributions and collaborations.
+Please check out [CONTRIBUTING.md](./CONTRIBUTING.md) for how to get involved.
+
+## Sponsors
+
+vLLM is a community project. Our compute resources for development and testing are supported by the following organizations. Thank you for your support!
+
+<!-- Note: Please sort them in alphabetical order. -->
+<!-- Note: Please keep these consistent with docs/source/community/sponsors.md -->
+Cash Donations:
+- a16z
+- Dropbox
+- Sequoia Capital
+- Skywork AI
+- ZhenFund
+
+Compute Resources:
+- AMD
+- Anyscale
+- AWS
+- Crusoe Cloud
+- Databricks
+- DeepInfra
+- Google Cloud
+- Lambda Lab
+- Nebius
+- Novita AI
+- NVIDIA
+- Replicate
+- Roblox
+- RunPod
+- Trainy
+- UC Berkeley
+- UC San Diego
+
+Slack Sponsor: Anyscale
+
+We also have an official fundraising venue through [OpenCollective](https://opencollective.com/vllm). We plan to use the fund to support the development, maintenance, and adoption of vLLM.
+
+## Citation
+
+If you use vLLM for your research, please cite our [paper](https://arxiv.org/abs/2309.06180):
+```bibtex
+@inproceedings{kwon2023efficient,
+  title={Efficient Memory Management for Large Language Model Serving with PagedAttention},
+  author={Woosuk Kwon and Zhuohan Li and Siyuan Zhuang and Ying Sheng and Lianmin Zheng and Cody Hao Yu and Joseph E. Gonzalez and Hao Zhang and Ion Stoica},
+  booktitle={Proceedings of the ACM SIGOPS 29th Symposium on Operating Systems Principles},
+  year={2023}
+}
+```
+
+## Contact Us
+
+* For technical questions and feature requests, please use Github issues or discussions.
+* For discussing with fellow users and coordinating contributions and development, please use Slack.
+* For security disclosures, please use Github's security advisory feature.
+* For collaborations and partnerships, please contact us at vllm-questions AT lists.berkeley.edu.
+
+## Media Kit
+
+* If you wish to use vLLM's logo, please refer to [our media kit repo](https://github.com/vllm-project/media-kit).
diff --git a/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/RECORD b/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/RECORD
new file mode 100644
index 0000000000000000000000000000000000000000..a264252087aef620fe5aff3fa58965e4dba4e099
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/RECORD
@@ -0,0 +1,1313 @@
+../../../bin/vllm,sha256=8rKxioauw2uk5PWmY3kz-AQil42vTcT8MrUUp53pscQ,229
+vllm-0.7.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
+vllm-0.7.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+vllm-0.7.2.dist-info/METADATA,sha256=og0UL1frXzLiqZP4Y32h6Z7T8qkyrx0tlhTYCemzSa8,12451
+vllm-0.7.2.dist-info/RECORD,,
+vllm-0.7.2.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vllm-0.7.2.dist-info/WHEEL,sha256=DNGSEk5U4ugAGvX0_BXBTCo3Pq7hN4686HcjBg8CfTM,102
+vllm-0.7.2.dist-info/entry_points.txt,sha256=3gs2VG8nr7QHZXKTqjfRIabj07IjR9Gj5xm764igbjE,43
+vllm-0.7.2.dist-info/top_level.txt,sha256=fAgb8Pt4zQoKTUA3ZnKEIgcjh0L97_dwEjYDTL5MEEo,5
+vllm/_C.abi3.so,sha256=kUXtJrlp06iQehb0BZYr-c7rfoFuUWTI0TtGw0iQsg4,217472040
+vllm/__init__.py,sha256=4znJi5rYl--TIYaS7ITK9jm-_ctVwgR1VDPKwjWsmkY,2052
+vllm/__pycache__/__init__.cpython-311.pyc,,
+vllm/__pycache__/_custom_ops.cpython-311.pyc,,
+vllm/__pycache__/_ipex_ops.cpython-311.pyc,,
+vllm/__pycache__/_version.cpython-311.pyc,,
+vllm/__pycache__/beam_search.cpython-311.pyc,,
+vllm/__pycache__/config.cpython-311.pyc,,
+vllm/__pycache__/connections.cpython-311.pyc,,
+vllm/__pycache__/envs.cpython-311.pyc,,
+vllm/__pycache__/forward_context.cpython-311.pyc,,
+vllm/__pycache__/logger.cpython-311.pyc,,
+vllm/__pycache__/logits_process.cpython-311.pyc,,
+vllm/__pycache__/outputs.cpython-311.pyc,,
+vllm/__pycache__/pooling_params.cpython-311.pyc,,
+vllm/__pycache__/sampling_params.cpython-311.pyc,,
+vllm/__pycache__/scalar_type.cpython-311.pyc,,
+vllm/__pycache__/scripts.cpython-311.pyc,,
+vllm/__pycache__/sequence.cpython-311.pyc,,
+vllm/__pycache__/tracing.cpython-311.pyc,,
+vllm/__pycache__/utils.cpython-311.pyc,,
+vllm/__pycache__/version.cpython-311.pyc,,
+vllm/_custom_ops.py,sha256=idZui8sfplykQofJkDgxqJkym-vxfba763xNcmufHUU,44100
+vllm/_ipex_ops.py,sha256=fAp8PqSkAMWjm82Lhaohv63S1LYAUJwk0GXJl2PZl50,7839
+vllm/_moe_C.abi3.so,sha256=PPqjsHbDM1FhWuIt5g1lx5Nfo_2AX7RzoxsEWkbzd1k,83764856
+vllm/_version.py,sha256=SwvkPl7GLhGxWFK-GDetxs6qkq8TTqCfnxJUNJqmWtU,411
+vllm/adapter_commons/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vllm/adapter_commons/__pycache__/__init__.cpython-311.pyc,,
+vllm/adapter_commons/__pycache__/layers.cpython-311.pyc,,
+vllm/adapter_commons/__pycache__/models.cpython-311.pyc,,
+vllm/adapter_commons/__pycache__/request.cpython-311.pyc,,
+vllm/adapter_commons/__pycache__/utils.cpython-311.pyc,,
+vllm/adapter_commons/__pycache__/worker_manager.cpython-311.pyc,,
+vllm/adapter_commons/layers.py,sha256=rdsvBlYTiblidwK2EYkl3UdB4xvopcrd8li3vPFTbwo,406
+vllm/adapter_commons/models.py,sha256=tuuVafwk9Yvfl8uCXSg1Whzm2Wsq8W7JqypRm_XpBzg,2807
+vllm/adapter_commons/request.py,sha256=GoLdKUNCU6x-8plK95CuLOy56QOSYW6IQAg6ZQg76C8,617
+vllm/adapter_commons/utils.py,sha256=ytCCfLdk-FwWCflWMSTazUPg2gNmXjaovEWbpvQ6fe0,3271
+vllm/adapter_commons/worker_manager.py,sha256=qBj7swkk7LJoQi2GpueMQFMbVPjphnuzOKIc36oQ6Ts,928
+vllm/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vllm/assets/__pycache__/__init__.cpython-311.pyc,,
+vllm/assets/__pycache__/audio.cpython-311.pyc,,
+vllm/assets/__pycache__/base.cpython-311.pyc,,
+vllm/assets/__pycache__/image.cpython-311.pyc,,
+vllm/assets/__pycache__/video.cpython-311.pyc,,
+vllm/assets/audio.py,sha256=ISR8TC_92QOEC7pFr1_ILC0clwyvyMYdq-2ewBO4cG4,892
+vllm/assets/base.py,sha256=IdwWieuPqaAaYKo2ybSfg07dt86k4-NrP1BHPCrFN2s,1196
+vllm/assets/image.py,sha256=Kn7HrcJEIXKUcx7IUUcixkslwKannvgGVXWa77bI19s,922
+vllm/assets/video.py,sha256=N642PEaHhx7vB5hGCJVAUi3toVFt0CIXuSKFSFzlzc8,2464
+vllm/attention/__init__.py,sha256=YW7x8Ahq9TPxx8GiduskAliKrJUM5i-kDrOjuLep7aA,610
+vllm/attention/__pycache__/__init__.cpython-311.pyc,,
+vllm/attention/__pycache__/layer.cpython-311.pyc,,
+vllm/attention/__pycache__/selector.cpython-311.pyc,,
+vllm/attention/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vllm/attention/backends/__pycache__/__init__.cpython-311.pyc,,
+vllm/attention/backends/__pycache__/abstract.cpython-311.pyc,,
+vllm/attention/backends/__pycache__/blocksparse_attn.cpython-311.pyc,,
+vllm/attention/backends/__pycache__/flash_attn.cpython-311.pyc,,
+vllm/attention/backends/__pycache__/flashinfer.cpython-311.pyc,,
+vllm/attention/backends/__pycache__/hpu_attn.cpython-311.pyc,,
+vllm/attention/backends/__pycache__/ipex_attn.cpython-311.pyc,,
+vllm/attention/backends/__pycache__/openvino.cpython-311.pyc,,
+vllm/attention/backends/__pycache__/pallas.cpython-311.pyc,,
+vllm/attention/backends/__pycache__/placeholder_attn.cpython-311.pyc,,
+vllm/attention/backends/__pycache__/rocm_flash_attn.cpython-311.pyc,,
+vllm/attention/backends/__pycache__/torch_sdpa.cpython-311.pyc,,
+vllm/attention/backends/__pycache__/triton_mla.cpython-311.pyc,,
+vllm/attention/backends/__pycache__/utils.cpython-311.pyc,,
+vllm/attention/backends/__pycache__/xformers.cpython-311.pyc,,
+vllm/attention/backends/abstract.py,sha256=TNYGVUDkx7EQ8FuOqFmQYtMLEAzUPCUxmEdCbIesy1o,9097
+vllm/attention/backends/blocksparse_attn.py,sha256=QbIPsZDM3d0IPEkKBT7E78Ac-o2Hv8Fw-Ug_75XovzI,18003
+vllm/attention/backends/flash_attn.py,sha256=ZhEd0k8B9lqiyLG-LifdCLmUHzqzGtmpIUq6CvS2ALY,41200
+vllm/attention/backends/flashinfer.py,sha256=0_ow7Sy6yrEBuqpDNcl4ao2AVKYS1VdKOtr1QSuFeno,46216
+vllm/attention/backends/hpu_attn.py,sha256=9dRfuKLiTNbnq7X0HKZGGOWXicin5rXBCOJWwpj8GiA,11734
+vllm/attention/backends/ipex_attn.py,sha256=Hpob2Gbmjc-PF0EoURLqbbO2AlttWbw1lOY0x0JLpec,14586
+vllm/attention/backends/mla/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vllm/attention/backends/mla/__pycache__/__init__.cpython-311.pyc,,
+vllm/attention/backends/mla/__pycache__/utils.cpython-311.pyc,,
+vllm/attention/backends/mla/utils.py,sha256=RoD0gCzp5Cz7teWxDUUOCWY24JtUBAceEqnV02g75hU,23524
+vllm/attention/backends/openvino.py,sha256=DLSknNrBOmqKbtTV8ah0pOcNr0q2yz1R8nTaRk4Mte4,4799
+vllm/attention/backends/pallas.py,sha256=2vT7FvPWcGYHhQ_6QHhwqIk49Yd4PtqgExtgRoOjVXU,13263
+vllm/attention/backends/placeholder_attn.py,sha256=-NOPWnH3YVd8o1HuxULlYXkWsgZu_PtmYi7aUL4N8yw,16316
+vllm/attention/backends/rocm_flash_attn.py,sha256=zri0w4t1CdK8apXZTik3XKZxnHe1t4nMmnyQ3ECFFoU,38385
+vllm/attention/backends/torch_sdpa.py,sha256=uTf0ayYb23o0jwjIwIJe6KOD778wLCpR0HRzAGnlPDA,26939
+vllm/attention/backends/triton_mla.py,sha256=qMC8DCSuZE3gk1-n8ExQjW0Xoi-1KeqQaQ9SleO9Ntc,31630
+vllm/attention/backends/utils.py,sha256=Vvahg8g0PWOUKBAnOfeVMxUIaEAHM8j0AAn7hWwXZIQ,25129
+vllm/attention/backends/xformers.py,sha256=5JOTUcHT3leG8uNYa73Dzp-i9SD-kr5FCFjER6dsO1M,33442
+vllm/attention/layer.py,sha256=Q9sBH_5c6bc6-G_V5kxOu1Lt_7HZ4Hj_kZAE-cDLK4U,14828
+vllm/attention/ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vllm/attention/ops/__pycache__/__init__.cpython-311.pyc,,
+vllm/attention/ops/__pycache__/hpu_paged_attn.cpython-311.pyc,,
+vllm/attention/ops/__pycache__/ipex_attn.cpython-311.pyc,,
+vllm/attention/ops/__pycache__/nki_flash_attn.cpython-311.pyc,,
+vllm/attention/ops/__pycache__/paged_attn.cpython-311.pyc,,
+vllm/attention/ops/__pycache__/prefix_prefill.cpython-311.pyc,,
+vllm/attention/ops/__pycache__/triton_decode_attention.cpython-311.pyc,,
+vllm/attention/ops/__pycache__/triton_flash_attention.cpython-311.pyc,,
+vllm/attention/ops/blocksparse_attention/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vllm/attention/ops/blocksparse_attention/__pycache__/__init__.cpython-311.pyc,,
+vllm/attention/ops/blocksparse_attention/__pycache__/blocksparse_attention_kernel.cpython-311.pyc,,
+vllm/attention/ops/blocksparse_attention/__pycache__/interface.cpython-311.pyc,,
+vllm/attention/ops/blocksparse_attention/__pycache__/utils.cpython-311.pyc,,
+vllm/attention/ops/blocksparse_attention/blocksparse_attention_kernel.py,sha256=u_96xPRC15lmK8hK5npn3-G8DyY-I1k3spa5sfzPi3U,11535
+vllm/attention/ops/blocksparse_attention/interface.py,sha256=QfWOzV2KjpJu7goLLOFegFbkIOA484Io3T6E-T8KXwo,9326
+vllm/attention/ops/blocksparse_attention/utils.py,sha256=vitFMc2NXAqjiyWKt23ELVxzKT_1BE10TvA3wYPjooY,8085
+vllm/attention/ops/hpu_paged_attn.py,sha256=JIJMptp237a-zCl0-656j4Di4ru4RJfeBFMxpdLTq9w,3460
+vllm/attention/ops/ipex_attn.py,sha256=6FWxaOW-Oz9kuoC0UXd_T1hrTW81iXuqUdAAwrw0DOw,5521
+vllm/attention/ops/nki_flash_attn.py,sha256=arELBFAzm80fyr8ejm2y7GFdABPJ_TMuVS8T7VUjqfc,24491
+vllm/attention/ops/paged_attn.py,sha256=nzV0-MUHyX0J94-W0R-xfHX_SoQdbfCFsQqrDle_IBw,8334
+vllm/attention/ops/prefix_prefill.py,sha256=Uf4oBhidx__t5NWrR4gG3tvyIwa29Dhug519dh5xez0,32113
+vllm/attention/ops/triton_decode_attention.py,sha256=n_Klveqthy-pVj5NBHHOlTtfHMw5umnXttqhO-ANaLU,18915
+vllm/attention/ops/triton_flash_attention.py,sha256=CBK7O8xBFKAGPNqjUDY19QMYjvNGrxDE7Hm3qNw3H5U,27534
+vllm/attention/selector.py,sha256=PuQqeyrkyTtUcP2FWDJhXk0_qHX-Y4ekePSI7MZdo6s,5865
+vllm/beam_search.py,sha256=OloyO9Rvh75DE1uTVW3tX3XWhNsyQqcFOSBzFhNTvzI,2183
+vllm/compilation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vllm/compilation/__pycache__/__init__.cpython-311.pyc,,
+vllm/compilation/__pycache__/backends.cpython-311.pyc,,
+vllm/compilation/__pycache__/counter.cpython-311.pyc,,
+vllm/compilation/__pycache__/decorators.cpython-311.pyc,,
+vllm/compilation/__pycache__/fix_functionalization.cpython-311.pyc,,
+vllm/compilation/__pycache__/fusion.cpython-311.pyc,,
+vllm/compilation/__pycache__/fx_utils.cpython-311.pyc,,
+vllm/compilation/__pycache__/inductor_pass.cpython-311.pyc,,
+vllm/compilation/__pycache__/monitor.cpython-311.pyc,,
+vllm/compilation/__pycache__/multi_output_match.cpython-311.pyc,,
+vllm/compilation/__pycache__/pass_manager.cpython-311.pyc,,
+vllm/compilation/__pycache__/reshapes.cpython-311.pyc,,
+vllm/compilation/__pycache__/vllm_inductor_pass.cpython-311.pyc,,
+vllm/compilation/__pycache__/wrapper.cpython-311.pyc,,
+vllm/compilation/backends.py,sha256=CR8uEtc9fKZYISDIbgbn5FRZGc0eJVG_v96--Z3r9C8,35424
+vllm/compilation/counter.py,sha256=TzXY5bpM2Z3y55tg9Zssz-qb-p7TW-db__4L3DcV9hY,938
+vllm/compilation/decorators.py,sha256=u0kOMbxoEOtxTPAChFijXWyw3nU3QwmlkX26J5uQYsc,10246
+vllm/compilation/fix_functionalization.py,sha256=z8l6h_C6-UkbOR5uxo55R1ETOIRfCTj6dawujHzap-o,7953
+vllm/compilation/fusion.py,sha256=PiC6D8vwtRCqGOQ-Eyw1w2nzvfbwf4aOSAHjtUmQSi0,24556
+vllm/compilation/fx_utils.py,sha256=3N-l_AWzwE0-hDJXtsKlfY9XaKyRJy-mx7G9FmtEZDc,1478
+vllm/compilation/inductor_pass.py,sha256=KcFr08N5xkyFfWV3NuGgNS_egfl3hx2padUOqcnoShE,2570
+vllm/compilation/monitor.py,sha256=p4LQ1roNhKQE8M8zhlYwgdnSFBFi6EkDUazr6X2usc0,1346
+vllm/compilation/multi_output_match.py,sha256=FKsWQk73_olG_sIh7veaHzfBecoKURWD6SRH5_Ozw20,3839
+vllm/compilation/pass_manager.py,sha256=4A91kDHMRNidei644TXaMC89Kh-CAOFVjGQfNiO0oSo,2792
+vllm/compilation/reshapes.py,sha256=K7j5y9bMqeFv6EC3ggqjgVSuFBpmn3Imw3FGm0lwbhM,3055
+vllm/compilation/vllm_inductor_pass.py,sha256=QVhnVvw0GzSR-h80GNa2JkcfaLIy-jAecdLPQigKcUI,1835
+vllm/compilation/wrapper.py,sha256=pCzXlNs-_49mKiQJFE3Kg02lEkHf_CfbVk6dcPlhnok,5631
+vllm/config.py,sha256=ruDhJ_KG-4wUkNlzDa6OdUT__xbV3mVolVlW35lbOBU,151021
+vllm/connections.py,sha256=5Mscwp5D4mOFEA7nT-r7Zs73H9ghaYQynex1fLyxQrU,4962
+vllm/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vllm/core/__pycache__/__init__.cpython-311.pyc,,
+vllm/core/__pycache__/block_manager.cpython-311.pyc,,
+vllm/core/__pycache__/evictor.cpython-311.pyc,,
+vllm/core/__pycache__/interfaces.cpython-311.pyc,,
+vllm/core/__pycache__/placeholder_block_space_manager.cpython-311.pyc,,
+vllm/core/__pycache__/scheduler.cpython-311.pyc,,
+vllm/core/block/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vllm/core/block/__pycache__/__init__.cpython-311.pyc,,
+vllm/core/block/__pycache__/block_table.cpython-311.pyc,,
+vllm/core/block/__pycache__/common.cpython-311.pyc,,
+vllm/core/block/__pycache__/cpu_gpu_block_allocator.cpython-311.pyc,,
+vllm/core/block/__pycache__/interfaces.cpython-311.pyc,,
+vllm/core/block/__pycache__/naive_block.cpython-311.pyc,,
+vllm/core/block/__pycache__/prefix_caching_block.cpython-311.pyc,,
+vllm/core/block/__pycache__/utils.cpython-311.pyc,,
+vllm/core/block/block_table.py,sha256=HMwMwVY8pHLjlje6gfVsrHvyvLupcd3SMAvgcsUcnxM,16022
+vllm/core/block/common.py,sha256=cfDse1iNYLehOXrSfUypTmakGAdSSXrX0YmodFPpJjI,13200
+vllm/core/block/cpu_gpu_block_allocator.py,sha256=gr6P5-ahDwFR4i8x3bxTkWukTUlHV4_BciigAJA8MwM,16817
+vllm/core/block/interfaces.py,sha256=FSmVJr-d2RVt_kX2ckWCYf7N1Cq5XNeJqk-TebbOj2k,8111
+vllm/core/block/naive_block.py,sha256=EgYRm94K88DyFM3Xjfa1A8hWuGZStL0nIqiZqbVxQMI,16355
+vllm/core/block/prefix_caching_block.py,sha256=tBjZ58xTQmmzx2s24BcneVyCf5F-aaOqgJz07bxbfoo,44182
+vllm/core/block/utils.py,sha256=osLxVwSUYjOsLeal8RzpmGT72F4aU3qbTGuYMdWIsHY,928
+vllm/core/block_manager.py,sha256=xJRkXw2xn1vAIsH0IZTBHeoCZ1fd6mIEtL-TBV7K5qQ,22165
+vllm/core/evictor.py,sha256=Jy-eZwgdA4Q2F0buFgDNv2fPeiIxJAUEFtyKYz1VL40,5446
+vllm/core/interfaces.py,sha256=86jc5i5_EbVMSLNDZ5W5MUBl-DgiisRsIeTwfCLKjj0,3534
+vllm/core/placeholder_block_space_manager.py,sha256=WWT-kWCOKZ_HqMqcu_YP26lvE7T9TSGCG6TGdqPYGaM,2928
+vllm/core/scheduler.py,sha256=PjmfVKjDq0zD7O2O4gQ2S2L2EiN08oU1gMb8of3TX2c,81122
+vllm/cumem_allocator.abi3.so,sha256=0BF47IBh0qAI7WAU351txtj3DInTfvzQ9Mb9xDd0oSY,27824
+vllm/device_allocator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vllm/device_allocator/__pycache__/__init__.cpython-311.pyc,,
+vllm/device_allocator/__pycache__/cumem.cpython-311.pyc,,
+vllm/device_allocator/cumem.py,sha256=7IJr44wYldBlwFk6O_2uK0C1hrUtUYPLaiMI94QxnXE,9814
+vllm/distributed/__init__.py,sha256=Rk8k7bXtcPNaihFk5qOn__toXjElImWbszyRJBzeYHA,122
+vllm/distributed/__pycache__/__init__.cpython-311.pyc,,
+vllm/distributed/__pycache__/communication_op.cpython-311.pyc,,
+vllm/distributed/__pycache__/parallel_state.cpython-311.pyc,,
+vllm/distributed/__pycache__/utils.cpython-311.pyc,,
+vllm/distributed/communication_op.py,sha256=PHbMVvmq2WRcpTf5peCbHgP8FCC3ZMTzJPu38ydBBaM,1236
+vllm/distributed/device_communicators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vllm/distributed/device_communicators/__pycache__/__init__.cpython-311.pyc,,
+vllm/distributed/device_communicators/__pycache__/cuda_wrapper.cpython-311.pyc,,
+vllm/distributed/device_communicators/__pycache__/custom_all_reduce.cpython-311.pyc,,
+vllm/distributed/device_communicators/__pycache__/custom_all_reduce_utils.cpython-311.pyc,,
+vllm/distributed/device_communicators/__pycache__/hpu_communicator.cpython-311.pyc,,
+vllm/distributed/device_communicators/__pycache__/pynccl.cpython-311.pyc,,
+vllm/distributed/device_communicators/__pycache__/pynccl_wrapper.cpython-311.pyc,,
+vllm/distributed/device_communicators/__pycache__/shm_broadcast.cpython-311.pyc,,
+vllm/distributed/device_communicators/__pycache__/tpu_communicator.cpython-311.pyc,,
+vllm/distributed/device_communicators/__pycache__/xpu_communicator.cpython-311.pyc,,
+vllm/distributed/device_communicators/cuda_wrapper.py,sha256=Wa68jPiG67iSwqg3Lb5zEDzX0L1qnD6TAn_iUU-Oo7E,6901
+vllm/distributed/device_communicators/custom_all_reduce.py,sha256=QnH2wct_sdz_-ZJx7JjR0vej13qg2Ar8dSKppp0O6sc,12492
+vllm/distributed/device_communicators/custom_all_reduce_utils.py,sha256=g-GEhCjE6dGzRDYHKYDP5FqFW9snR-YMLcIQOYt0Zz8,10474
+vllm/distributed/device_communicators/hpu_communicator.py,sha256=wgpKr7gNzvj9BpmjIn51cd0V4Q2YiReargkr5U-Mrz0,1848
+vllm/distributed/device_communicators/pynccl.py,sha256=Z4QXdGf_qzz2J3PfKtU7bEG29oxbXy4dEorGB9WNB3k,9142
+vllm/distributed/device_communicators/pynccl_wrapper.py,sha256=FUfvxJPoxuYRXWBNbbFkFTKHncIYq23SmFhHCcvCirA,13705
+vllm/distributed/device_communicators/shm_broadcast.py,sha256=Nu57UdDywE3vhodVWkaIQfLEWLwrjq94tXmb14vwGp4,22780
+vllm/distributed/device_communicators/tpu_communicator.py,sha256=j_3BxCUAULwnKWb9cNXeM6FjzGcXhl28y6PBeGHBQT4,2438
+vllm/distributed/device_communicators/xpu_communicator.py,sha256=9k17FFfhUShuOypGMflbWTY02WcMbX5f0-C9P-qW8fs,1790
+vllm/distributed/kv_transfer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vllm/distributed/kv_transfer/__pycache__/__init__.cpython-311.pyc,,
+vllm/distributed/kv_transfer/__pycache__/kv_transfer_agent.cpython-311.pyc,,
+vllm/distributed/kv_transfer/kv_connector/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vllm/distributed/kv_transfer/kv_connector/__pycache__/__init__.cpython-311.pyc,,
+vllm/distributed/kv_transfer/kv_connector/__pycache__/base.cpython-311.pyc,,
+vllm/distributed/kv_transfer/kv_connector/__pycache__/factory.cpython-311.pyc,,
+vllm/distributed/kv_transfer/kv_connector/__pycache__/simple_connector.cpython-311.pyc,,
+vllm/distributed/kv_transfer/kv_connector/base.py,sha256=eqvKBJoU2iJwfPvtyJmdWEPqVIJLIIOieatOTMkxcMA,4291
+vllm/distributed/kv_transfer/kv_connector/factory.py,sha256=-5QZebKjYZQPv-lmLYwRnsFsTqgN-1c1hBOpZfFpBzk,1736
+vllm/distributed/kv_transfer/kv_connector/simple_connector.py,sha256=svH3MECdr72_1BKcswRZbTiRQh20HHBwg_1_2I6QL3A,12941
+vllm/distributed/kv_transfer/kv_lookup_buffer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vllm/distributed/kv_transfer/kv_lookup_buffer/__pycache__/__init__.cpython-311.pyc,,
+vllm/distributed/kv_transfer/kv_lookup_buffer/__pycache__/base.cpython-311.pyc,,
+vllm/distributed/kv_transfer/kv_lookup_buffer/__pycache__/simple_buffer.cpython-311.pyc,,
+vllm/distributed/kv_transfer/kv_lookup_buffer/base.py,sha256=PQnicbzMqrZvwSdrUrNLqJLmrQXzY-CwcqiNmQm28MI,4162
+vllm/distributed/kv_transfer/kv_lookup_buffer/simple_buffer.py,sha256=c8gZgks_s2k9TH1e6u2GjtPhBSM5spVQh5ybrChdQ8w,9033
+vllm/distributed/kv_transfer/kv_pipe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vllm/distributed/kv_transfer/kv_pipe/__pycache__/__init__.cpython-311.pyc,,
+vllm/distributed/kv_transfer/kv_pipe/__pycache__/base.cpython-311.pyc,,
+vllm/distributed/kv_transfer/kv_pipe/__pycache__/mooncake_pipe.cpython-311.pyc,,
+vllm/distributed/kv_transfer/kv_pipe/__pycache__/pynccl_pipe.cpython-311.pyc,,
+vllm/distributed/kv_transfer/kv_pipe/base.py,sha256=U4hivz-zJkjhTGgNdtcuupc_ArsoUPFuWEv_AXJ9rqs,2087
+vllm/distributed/kv_transfer/kv_pipe/mooncake_pipe.py,sha256=PI0bXHb1bUfng7_u40Y-TJ6SQ-ssfUW_xKFLpES-Bw8,11687
+vllm/distributed/kv_transfer/kv_pipe/pynccl_pipe.py,sha256=p9U4-Se_2brXUNWSv_U_wB4RYXFr_dcV9iTjjucZzFA,9588
+vllm/distributed/kv_transfer/kv_transfer_agent.py,sha256=-F7qEvxSGJ3FAzw_km73LoRAWmZ8WEiMFTHddPDarRU,2430
+vllm/distributed/parallel_state.py,sha256=6q3_gx5FUr2eMboKlmM0Getg3UH9XkelGfM0rnEfKm4,49922
+vllm/distributed/utils.py,sha256=3ESo_gKzI1FtAsYsd--7K2xjLFrKebLd9s9Ui38EFLQ,8553
+vllm/engine/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vllm/engine/__pycache__/__init__.cpython-311.pyc,,
+vllm/engine/__pycache__/arg_utils.cpython-311.pyc,,
+vllm/engine/__pycache__/async_llm_engine.cpython-311.pyc,,
+vllm/engine/__pycache__/async_timeout.cpython-311.pyc,,
+vllm/engine/__pycache__/llm_engine.cpython-311.pyc,,
+vllm/engine/__pycache__/metrics.cpython-311.pyc,,
+vllm/engine/__pycache__/metrics_types.cpython-311.pyc,,
+vllm/engine/__pycache__/protocol.cpython-311.pyc,,
+vllm/engine/arg_utils.py,sha256=hrJsNd9b2EHP1lWw8bbzHoBJF3fr16HGqiubLcafrJw,64060
+vllm/engine/async_llm_engine.py,sha256=lraQRRsWl_zzSIEJGGI7KtlPV7yH5nZbtm6Iiz-r8CY,48514
+vllm/engine/async_timeout.py,sha256=JxUaRVK_M5P5wRVkKHQ-QkDMnGxKMTt9S9OhQeQzP-s,7092
+vllm/engine/llm_engine.py,sha256=99SOL62knx8EZfgFmcRj-G6_mvXpOnxPufZcOjPrPEc,88668
+vllm/engine/metrics.py,sha256=DGrpynyGS5tnzo5kksJ4Z2403QrAEt75xrUaR37tkNc,29606
+vllm/engine/metrics_types.py,sha256=QgPkVHKyokSL_lYCG9tezbQTQ-ccp294vLUkkmsJj3A,3402
+vllm/engine/multiprocessing/__init__.py,sha256=7oSrA3Cfp7mO8-6ekiElbHXXEYCfNzj1glJ4pk4nu8g,4415
+vllm/engine/multiprocessing/__pycache__/__init__.cpython-311.pyc,,
+vllm/engine/multiprocessing/__pycache__/client.cpython-311.pyc,,
+vllm/engine/multiprocessing/__pycache__/engine.cpython-311.pyc,,
+vllm/engine/multiprocessing/client.py,sha256=Vh1tEPt4CCeOFJnKti5RFbjOyDZ4V646Pirhu_NVzGI,28650
+vllm/engine/multiprocessing/engine.py,sha256=bZYXPxJrH9glM7i55xqUK1RBzldYEm_3L47Ey0MC-J4,15396
+vllm/engine/output_processor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vllm/engine/output_processor/__pycache__/__init__.cpython-311.pyc,,
+vllm/engine/output_processor/__pycache__/interfaces.cpython-311.pyc,,
+vllm/engine/output_processor/__pycache__/multi_step.cpython-311.pyc,,
+vllm/engine/output_processor/__pycache__/single_step.cpython-311.pyc,,
+vllm/engine/output_processor/__pycache__/stop_checker.cpython-311.pyc,,
+vllm/engine/output_processor/__pycache__/util.cpython-311.pyc,,
+vllm/engine/output_processor/interfaces.py,sha256=99zPnCsA0H9k8d7uXfv8yGva69mAS1m3apR9sdWIUfY,2994
+vllm/engine/output_processor/multi_step.py,sha256=5lihog6cmxNmXPJxeKecZ8UhPrpiJXH_r0uPvcD8vDI,9000
+vllm/engine/output_processor/single_step.py,sha256=ToiNI09zyU86zsB8Vbw9-eLcbNk_gsCHc2sKRmBfcF0,6015
+vllm/engine/output_processor/stop_checker.py,sha256=YVSkufbbX3km41aU7bXRJjhpP24Qcun7y6V3H_cQ65U,5064
+vllm/engine/output_processor/util.py,sha256=IoNFmy8vKrK5pn3nGS26Ey5irhKr8mzNOGP30SsT1qA,1056
+vllm/engine/protocol.py,sha256=aPDHm0tM4qycT3mkLcGeyn_MuFe5ehKXDTyULXdbfQA,10104
+vllm/entrypoints/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vllm/entrypoints/__pycache__/__init__.cpython-311.pyc,,
+vllm/entrypoints/__pycache__/api_server.cpython-311.pyc,,
+vllm/entrypoints/__pycache__/chat_utils.cpython-311.pyc,,
+vllm/entrypoints/__pycache__/launcher.cpython-311.pyc,,
+vllm/entrypoints/__pycache__/llm.cpython-311.pyc,,
+vllm/entrypoints/__pycache__/logger.cpython-311.pyc,,
+vllm/entrypoints/__pycache__/utils.cpython-311.pyc,,
+vllm/entrypoints/api_server.py,sha256=QUmsbFCoQ3f74HXyg4RhqztQ8GKNZXM_mul9B-gpj6E,5417
+vllm/entrypoints/chat_utils.py,sha256=LynDqRGDJCyB1CTixN2LpTdRpWIEkpMiAJwP1MBuIQo,35458
+vllm/entrypoints/launcher.py,sha256=3RfCwmGWoGqgASiRP5kt86sOKIjyH1lYWZha7W2w-7U,3929
+vllm/entrypoints/llm.py,sha256=EE4MjNgbx6pSZg6POsTtqGrKe5HJqwvXi-rmj5tlT4Q,61938
+vllm/entrypoints/logger.py,sha256=0a5GEgmC1d210eqgdYwUuSIcZRcxeIozUIzrZyvD5T4,1449
+vllm/entrypoints/openai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vllm/entrypoints/openai/__pycache__/__init__.cpython-311.pyc,,
+vllm/entrypoints/openai/__pycache__/api_server.cpython-311.pyc,,
+vllm/entrypoints/openai/__pycache__/cli_args.cpython-311.pyc,,
+vllm/entrypoints/openai/__pycache__/logits_processors.cpython-311.pyc,,
+vllm/entrypoints/openai/__pycache__/protocol.cpython-311.pyc,,
+vllm/entrypoints/openai/__pycache__/run_batch.cpython-311.pyc,,
+vllm/entrypoints/openai/__pycache__/serving_chat.cpython-311.pyc,,
+vllm/entrypoints/openai/__pycache__/serving_completion.cpython-311.pyc,,
+vllm/entrypoints/openai/__pycache__/serving_embedding.cpython-311.pyc,,
+vllm/entrypoints/openai/__pycache__/serving_engine.cpython-311.pyc,,
+vllm/entrypoints/openai/__pycache__/serving_models.cpython-311.pyc,,
+vllm/entrypoints/openai/__pycache__/serving_pooling.cpython-311.pyc,,
+vllm/entrypoints/openai/__pycache__/serving_rerank.cpython-311.pyc,,
+vllm/entrypoints/openai/__pycache__/serving_score.cpython-311.pyc,,
+vllm/entrypoints/openai/__pycache__/serving_tokenization.cpython-311.pyc,,
+vllm/entrypoints/openai/api_server.py,sha256=3FNtzrhzEN7mZgMzBsYnTkyLffHKBdLqytlmCxVmkis,34388
+vllm/entrypoints/openai/cli_args.py,sha256=py20pxtraM9eheEc2jzWXUiE3dX6ysnGNSkrrBgrutc,12174
+vllm/entrypoints/openai/logits_processors.py,sha256=XIh2SPLpyOlF002J8K_85qgFG8qiIDCK4VnVk_tYB0U,3157
+vllm/entrypoints/openai/protocol.py,sha256=IIslNH6vEKljEXAG99B6HJCs6wjPgW1I175MY1_PcWo,53132
+vllm/entrypoints/openai/reasoning_parsers/__init__.py,sha256=q2Ai9Mb9TCEAFoag8rxvcduJpf1SK6KD9KzcyaBIb20,274
+vllm/entrypoints/openai/reasoning_parsers/__pycache__/__init__.cpython-311.pyc,,
+vllm/entrypoints/openai/reasoning_parsers/__pycache__/abs_reasoning_parsers.cpython-311.pyc,,
+vllm/entrypoints/openai/reasoning_parsers/__pycache__/deepseek_r1_reasoning_parser.cpython-311.pyc,,
+vllm/entrypoints/openai/reasoning_parsers/abs_reasoning_parsers.py,sha256=Q05zVyaESXOibqsH0MvbagFAX4WWusj2wlreBwpchtM,5945
+vllm/entrypoints/openai/reasoning_parsers/deepseek_r1_reasoning_parser.py,sha256=tnjGZ-d66uPh2uWI37YpSoTBIfW-2_c6vyoyOeAk_l4,5987
+vllm/entrypoints/openai/run_batch.py,sha256=xsRrylkwjd8kuzJBtAo0JZvNIxIiui9qhwW5g4EAy2E,12567
+vllm/entrypoints/openai/serving_chat.py,sha256=DQTZPOf2tbDkqLyPAA_4Tc6KNf7wdx6XvmcFukUYyDo,43959
+vllm/entrypoints/openai/serving_completion.py,sha256=MO00SXETExwnSbcJG8Skm13TdLI7FbxceYHD6Q7egEo,22786
+vllm/entrypoints/openai/serving_embedding.py,sha256=acNsb2R3lcOJhgrlzJ7so0HCzXPjPIzvlZmBRHrEn78,9106
+vllm/entrypoints/openai/serving_engine.py,sha256=bIfvo5q63wVa9O_XalGT22I-q6pbZQCOBcI_GsjHCyw,20607
+vllm/entrypoints/openai/serving_models.py,sha256=rflL6qv1a_bupedo8RVYXe9pruJgvfJu0iyuf_COCC4,9676
+vllm/entrypoints/openai/serving_pooling.py,sha256=ib8oc4gJFaBWk27t58RDnYfjHqxEhmY0SxeywbwOfyM,8784
+vllm/entrypoints/openai/serving_rerank.py,sha256=Jvi3rHKeRUp6XuFxpvFRDa8VwiYtKy0bhwXkWpn_NdU,8430
+vllm/entrypoints/openai/serving_score.py,sha256=05-WnpkF0jkdhR3fYfqqLaAHEi98k9v_sSKbcjYcJcg,8833
+vllm/entrypoints/openai/serving_tokenization.py,sha256=L4Tl5X2QgWLYuKgAv_UwD-QLh4UkMzc5Zeco68jhics,5474
+vllm/entrypoints/openai/tool_parsers/__init__.py,sha256=MCAp6mTORZyGThFRQXengbxammJdKmSr7kuQa-_Wjs8,773
+vllm/entrypoints/openai/tool_parsers/__pycache__/__init__.cpython-311.pyc,,
+vllm/entrypoints/openai/tool_parsers/__pycache__/abstract_tool_parser.cpython-311.pyc,,
+vllm/entrypoints/openai/tool_parsers/__pycache__/granite_20b_fc_tool_parser.cpython-311.pyc,,
+vllm/entrypoints/openai/tool_parsers/__pycache__/granite_tool_parser.cpython-311.pyc,,
+vllm/entrypoints/openai/tool_parsers/__pycache__/hermes_tool_parser.cpython-311.pyc,,
+vllm/entrypoints/openai/tool_parsers/__pycache__/internlm2_tool_parser.cpython-311.pyc,,
+vllm/entrypoints/openai/tool_parsers/__pycache__/jamba_tool_parser.cpython-311.pyc,,
+vllm/entrypoints/openai/tool_parsers/__pycache__/llama_tool_parser.cpython-311.pyc,,
+vllm/entrypoints/openai/tool_parsers/__pycache__/mistral_tool_parser.cpython-311.pyc,,
+vllm/entrypoints/openai/tool_parsers/__pycache__/pythonic_tool_parser.cpython-311.pyc,,
+vllm/entrypoints/openai/tool_parsers/__pycache__/utils.cpython-311.pyc,,
+vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py,sha256=Jkyeyt0PdTxWSwwqDIpIOkkUVbG4Am-LjFLRJQGOv4U,6017
+vllm/entrypoints/openai/tool_parsers/granite_20b_fc_tool_parser.py,sha256=mjnp2uh4ZgJsZzTUU1_lkn15Tk5rFs84P7oAsPwt5W8,11114
+vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py,sha256=SejcNYn9O18Nt-YZgtRyJ1sGl-yLY2WtFgJkNZjh23A,10344
+vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py,sha256=5rF_22DrZFyGsj-SZG1d6IS9nuaWTtzvoaSUXhITnMU,16784
+vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py,sha256=GnxKcV02EMSB-R4AXETCrwQjUOfyUnt89VXgkKC2oIY,9114
+vllm/entrypoints/openai/tool_parsers/jamba_tool_parser.py,sha256=F9aSXFBrGmQGqXeVBC6kWSbqzug_CmkEM3LgoDOY-Oc,13538
+vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py,sha256=KyUb6v6vlCw3m3NRZemMlRTuPgvvYCPVm0vqKbQB9Dk,11894
+vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py,sha256=N-D8XAETGsMhlNgDg45Y0huPMbVY73OI1ZD4O7yxLhA,14664
+vllm/entrypoints/openai/tool_parsers/pythonic_tool_parser.py,sha256=9ZXksKP2X0i--AlkLtGEQ9IEq8Y5PtAsKkkqx1yqpnw,11939
+vllm/entrypoints/openai/tool_parsers/utils.py,sha256=YMUBQVTiG7HuE9QB60_5CXG_Ed4_A87jq543B2Q4psQ,3819
+vllm/entrypoints/utils.py,sha256=1AhOs4JUzXO6eVKcQyO_zoHYFuNHPpY8WAMbf1Tlcks,2294
+vllm/envs.py,sha256=N7D6rputSdxivRZny---DOUsGadYU8nn1GSZfe4nU_4,24374
+vllm/executor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vllm/executor/__pycache__/__init__.cpython-311.pyc,,
+vllm/executor/__pycache__/executor_base.cpython-311.pyc,,
+vllm/executor/__pycache__/mp_distributed_executor.cpython-311.pyc,,
+vllm/executor/__pycache__/msgspec_utils.cpython-311.pyc,,
+vllm/executor/__pycache__/multiproc_worker_utils.cpython-311.pyc,,
+vllm/executor/__pycache__/ray_distributed_executor.cpython-311.pyc,,
+vllm/executor/__pycache__/ray_utils.cpython-311.pyc,,
+vllm/executor/__pycache__/uniproc_executor.cpython-311.pyc,,
+vllm/executor/executor_base.py,sha256=p-RWke1Eu1DBjRLN2U0bwDp97tlBMXGZdqGCwJzpzTU,14560
+vllm/executor/mp_distributed_executor.py,sha256=6mrv5wsBUw6ng08_qyn7LtmYsaGgGgTHoZSdP-hmPtY,9854
+vllm/executor/msgspec_utils.py,sha256=FG5Qh6ghtLjyI6BHiTGmJQ3sGop2Lpm4LoADP_0Hs9o,909
+vllm/executor/multiproc_worker_utils.py,sha256=X6jx-OFT-kW-O-2njzoZOG60A3GUTQQr5M5u_8i_fGc,10590
+vllm/executor/ray_distributed_executor.py,sha256=wujrkeqP6VwA81ox9hrhS14lcHWi7D8B6hqfDKFjXoA,27588
+vllm/executor/ray_utils.py,sha256=gSu9CiXPnOjrmPZbLNj4rms2zokeetupaiVMVtij6KU,15646
+vllm/executor/uniproc_executor.py,sha256=MlWdrHScDoFZbmhgT6QNONnCL05aawKGvmRhUz6n3lU,5408
+vllm/forward_context.py,sha256=Pm5oWtNDYmDmORo4vyAvEH4l5ujcbQwSylfQYif5Ysk,3939
+vllm/inputs/__init__.py,sha256=28WIgSktiZ0KR0ndxtnEwZjhS406uO-jPjyWovkw7L4,1228
+vllm/inputs/__pycache__/__init__.cpython-311.pyc,,
+vllm/inputs/__pycache__/data.cpython-311.pyc,,
+vllm/inputs/__pycache__/parse.cpython-311.pyc,,
+vllm/inputs/__pycache__/preprocess.cpython-311.pyc,,
+vllm/inputs/__pycache__/registry.cpython-311.pyc,,
+vllm/inputs/data.py,sha256=geBLaVXi6auxBSHvSkqViX-UR_fxc1L4cwnCKDmlgH8,13247
+vllm/inputs/parse.py,sha256=QIv86GTYr1TuOr0YpGvqxh_NWYbyIUJSomXcXnjbqSk,3472
+vllm/inputs/preprocess.py,sha256=SIvINfVZw8iTTg2cpybsuhYMFdDbPS2HBVMxLiHXadM,25481
+vllm/inputs/registry.py,sha256=VIQGXrJnB_UY319TwVR3lxjiSI69OpOIyszTKjzgBWo,16877
+vllm/logger.py,sha256=Zhf5TX7_kMXIYG5hOAjT1ufmNiPvMzHwrP7SV5tOBTc,7338
+vllm/logging_utils/__init__.py,sha256=t2aDazCRc19hTrOxiW3eY-d702nQFBOJz_QIfSip9ok,136
+vllm/logging_utils/__pycache__/__init__.cpython-311.pyc,,
+vllm/logging_utils/__pycache__/formatter.cpython-311.pyc,,
+vllm/logging_utils/formatter.py,sha256=AAWbFV4wgQZn_Ek0MKA-TBJwCQiO3ejIuIV1rQm8ADQ,525
+vllm/logits_process.py,sha256=Rq55GebQWGNj5u_RpcE6ABbHkjHps5vhK-3It5UX-9A,4664
+vllm/lora/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vllm/lora/__pycache__/__init__.cpython-311.pyc,,
+vllm/lora/__pycache__/fully_sharded_layers.cpython-311.pyc,,
+vllm/lora/__pycache__/layers.cpython-311.pyc,,
+vllm/lora/__pycache__/lora.cpython-311.pyc,,
+vllm/lora/__pycache__/models.cpython-311.pyc,,
+vllm/lora/__pycache__/peft_helper.cpython-311.pyc,,
+vllm/lora/__pycache__/request.cpython-311.pyc,,
+vllm/lora/__pycache__/utils.cpython-311.pyc,,
+vllm/lora/__pycache__/worker_manager.cpython-311.pyc,,
+vllm/lora/fully_sharded_layers.py,sha256=GsESume-K5mteX1EH1Vr8fQmY0nUjyTIKtY-1HVFNV8,12229
+vllm/lora/layers.py,sha256=zgnvqQwBCB3pbnsDyhBjvYvWVrpmQNGWWXHgaXUVJew,44795
+vllm/lora/lora.py,sha256=XfOb94aCsORrhvTnHDy-gF6iGo6nULtpsRcR2wpfYBU,6222
+vllm/lora/models.py,sha256=FsxAE7ugEpEiPwnZIN0_W0jQyqbl5CqI8FQtcz83Nj0,33292
+vllm/lora/ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vllm/lora/ops/__pycache__/__init__.cpython-311.pyc,,
+vllm/lora/ops/torch_ops/__init__.py,sha256=z03eb5aCSj_Z-_RPa3huUKuXRBvhxj_M8lK7izkQJHE,466
+vllm/lora/ops/torch_ops/__pycache__/__init__.cpython-311.pyc,,
+vllm/lora/ops/torch_ops/__pycache__/lora_ops.cpython-311.pyc,,
+vllm/lora/ops/torch_ops/lora_ops.py,sha256=ilxQObKw2wEz3BJJ8X87xWPHGOz0jSII3b13wpj66es,4300
+vllm/lora/ops/triton_ops/__init__.py,sha256=pDFzcbjtDVrinPai8pFqq1G0hIjI-fLpxg5vXesFGgc,486
+vllm/lora/ops/triton_ops/__pycache__/__init__.cpython-311.pyc,,
+vllm/lora/ops/triton_ops/__pycache__/bgmv_expand.cpython-311.pyc,,
+vllm/lora/ops/triton_ops/__pycache__/bgmv_expand_slice.cpython-311.pyc,,
+vllm/lora/ops/triton_ops/__pycache__/bgmv_shrink.cpython-311.pyc,,
+vllm/lora/ops/triton_ops/__pycache__/sgmv_expand.cpython-311.pyc,,
+vllm/lora/ops/triton_ops/__pycache__/sgmv_shrink.cpython-311.pyc,,
+vllm/lora/ops/triton_ops/__pycache__/utils.cpython-311.pyc,,
+vllm/lora/ops/triton_ops/bgmv_expand.py,sha256=VeuUfI1mp37B-kYBQdzF9RA6cyWY49ZoNnmU2qqsqj0,5533
+vllm/lora/ops/triton_ops/bgmv_expand_slice.py,sha256=NO-IhMKXVu7g-8giHS0F1xyqA4fM03oSnNi6cUyKNxg,6074
+vllm/lora/ops/triton_ops/bgmv_shrink.py,sha256=7Oxgn2g2vilJQJUfPx7hhk2yz_Wd6H-6FQRfhE9BzYo,4643
+vllm/lora/ops/triton_ops/sgmv_expand.py,sha256=KidzmgiQaxGUgkpgvswTK8D0tVePqwwVXoyeTc7B-OA,9085
+vllm/lora/ops/triton_ops/sgmv_shrink.py,sha256=x2HlCSoiNVtS4s_P-8Uu93kbfXCTFHrQG2TMJRhpJWw,7674
+vllm/lora/ops/triton_ops/utils.py,sha256=nrcZGbN48oga_9gcLMX1FePDKN182DyFjfFrKFuLq7w,6365
+vllm/lora/peft_helper.py,sha256=iOUNDIf9nw2TSwuZ5cjpDCsb-stn1qhkupZVvYPcvrc,4399
+vllm/lora/punica_wrapper/__init__.py,sha256=RAbrZogtmoPZNIMImJFX1REM0cydwz5C-ATIp7_qHFA,244
+vllm/lora/punica_wrapper/__pycache__/__init__.cpython-311.pyc,,
+vllm/lora/punica_wrapper/__pycache__/punica_base.cpython-311.pyc,,
+vllm/lora/punica_wrapper/__pycache__/punica_cpu.cpython-311.pyc,,
+vllm/lora/punica_wrapper/__pycache__/punica_gpu.cpython-311.pyc,,
+vllm/lora/punica_wrapper/__pycache__/punica_hpu.cpython-311.pyc,,
+vllm/lora/punica_wrapper/__pycache__/punica_selector.cpython-311.pyc,,
+vllm/lora/punica_wrapper/__pycache__/utils.cpython-311.pyc,,
+vllm/lora/punica_wrapper/punica_base.py,sha256=Sd6bBfAUwWVM82zzOrl5hIkINzthhNGv857VdGwAk6w,18228
+vllm/lora/punica_wrapper/punica_cpu.py,sha256=1hzv1SchgOGiUo4kYsAN5BcTDX5XNQopMqbbXvxLqlg,12465
+vllm/lora/punica_wrapper/punica_gpu.py,sha256=AfVozZYi7kwdwVj3CLjxAx7vHDFGQpoORzo1wCDf5t8,11427
+vllm/lora/punica_wrapper/punica_hpu.py,sha256=84vAp9BLWkf9u8CdnT_enX3IUd2_QI_qvOU2vKiLSb8,3253
+vllm/lora/punica_wrapper/punica_selector.py,sha256=WP5XsmWE8YJG8fmak0jNPrgYxF4_lxaJt-mcNaMfsRY,755
+vllm/lora/punica_wrapper/utils.py,sha256=CFoSN8wz2TQhQhYdiZFjN_QU4AwQ8wPfa_o7wL9Ufuk,6846
+vllm/lora/request.py,sha256=w_fGpOlDlJpOS-7iw-dKXPkr0zvDmgdB4mr3kOszin8,3059
+vllm/lora/utils.py,sha256=6FD4StnjuC_MporT8Ahd6TC_t7VQZz22gzekjQljmrg,8122
+vllm/lora/worker_manager.py,sha256=-rm7b4BXK2ezu8tW41sEe1mzCmG49ksWfQaZkHvW8SU,10590
+vllm/model_executor/__init__.py,sha256=cRhmybV9ftoNVy7E91WIczp4wLL4E6y77KQ9vrhWqL0,505
+vllm/model_executor/__pycache__/__init__.cpython-311.pyc,,
+vllm/model_executor/__pycache__/custom_op.cpython-311.pyc,,
+vllm/model_executor/__pycache__/parameter.cpython-311.pyc,,
+vllm/model_executor/__pycache__/pooling_metadata.cpython-311.pyc,,
+vllm/model_executor/__pycache__/sampling_metadata.cpython-311.pyc,,
+vllm/model_executor/__pycache__/utils.cpython-311.pyc,,
+vllm/model_executor/custom_op.py,sha256=SY4dP4JUcvagu1OOsZwjM6Q-kKPG0N597JeROytng04,5331
+vllm/model_executor/guided_decoding/__init__.py,sha256=qbfQVkvAs7NgQaLMB9_wGhDGYwkOlTS_QQJm763nN4s,6883
+vllm/model_executor/guided_decoding/__pycache__/__init__.cpython-311.pyc,,
+vllm/model_executor/guided_decoding/__pycache__/guided_fields.cpython-311.pyc,,
+vllm/model_executor/guided_decoding/__pycache__/lm_format_enforcer_decoding.cpython-311.pyc,,
+vllm/model_executor/guided_decoding/__pycache__/outlines_decoding.cpython-311.pyc,,
+vllm/model_executor/guided_decoding/__pycache__/outlines_logits_processors.cpython-311.pyc,,
+vllm/model_executor/guided_decoding/__pycache__/utils.cpython-311.pyc,,
+vllm/model_executor/guided_decoding/__pycache__/xgrammar_decoding.cpython-311.pyc,,
+vllm/model_executor/guided_decoding/guided_fields.py,sha256=buAymWm9-1u6oIscVajQ0qQvwHVVYbvVDY6quQQlPyk,1450
+vllm/model_executor/guided_decoding/lm_format_enforcer_decoding.py,sha256=uUzfxJuP1NDjgLzPdEHJqGCj17J2JInP_LksjCP2utQ,2678
+vllm/model_executor/guided_decoding/outlines_decoding.py,sha256=K9rDRJr1NAUmaboTH4nZCwGta38uOOK4vHVLRgDA9s4,5188
+vllm/model_executor/guided_decoding/outlines_logits_processors.py,sha256=HlKi6Oy4VF85_BFTHYS3QYQ6JQPVkq5kZFf9W1wKKIw,8683
+vllm/model_executor/guided_decoding/utils.py,sha256=3Of64yPGTZ4Ohu5crzc8JqaJRo_47-i5-iVN4sgSPx4,7693
+vllm/model_executor/guided_decoding/xgrammar_decoding.py,sha256=RrTIdjAUhAEJZO1K0TKfoG6KsI_M4WtGbDnwAcp6YEo,13113
+vllm/model_executor/layers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vllm/model_executor/layers/__pycache__/__init__.cpython-311.pyc,,
+vllm/model_executor/layers/__pycache__/activation.cpython-311.pyc,,
+vllm/model_executor/layers/__pycache__/layernorm.cpython-311.pyc,,
+vllm/model_executor/layers/__pycache__/linear.cpython-311.pyc,,
+vllm/model_executor/layers/__pycache__/logits_processor.cpython-311.pyc,,
+vllm/model_executor/layers/__pycache__/pooler.cpython-311.pyc,,
+vllm/model_executor/layers/__pycache__/rejection_sampler.cpython-311.pyc,,
+vllm/model_executor/layers/__pycache__/resampler.cpython-311.pyc,,
+vllm/model_executor/layers/__pycache__/rotary_embedding.cpython-311.pyc,,
+vllm/model_executor/layers/__pycache__/sampler.cpython-311.pyc,,
+vllm/model_executor/layers/__pycache__/spec_decode_base_sampler.cpython-311.pyc,,
+vllm/model_executor/layers/__pycache__/typical_acceptance_sampler.cpython-311.pyc,,
+vllm/model_executor/layers/__pycache__/utils.cpython-311.pyc,,
+vllm/model_executor/layers/__pycache__/vocab_parallel_embedding.cpython-311.pyc,,
+vllm/model_executor/layers/activation.py,sha256=EHZZLcttE3vxTJDu47rEaYklObw4HK4xX6tdcDcnRas,12197
+vllm/model_executor/layers/fused_moe/__init__.py,sha256=8NLBBk3J3kbdmy2lOmYqoowsqTXEBgM6wy2rdFAtxvw,1168
+vllm/model_executor/layers/fused_moe/__pycache__/__init__.cpython-311.pyc,,
+vllm/model_executor/layers/fused_moe/__pycache__/fused_marlin_moe.cpython-311.pyc,,
+vllm/model_executor/layers/fused_moe/__pycache__/fused_moe.cpython-311.pyc,,
+vllm/model_executor/layers/fused_moe/__pycache__/layer.cpython-311.pyc,,
+vllm/model_executor/layers/fused_moe/__pycache__/moe_pallas.cpython-311.pyc,,
+vllm/model_executor/layers/fused_moe/__pycache__/moe_torch_iterative.cpython-311.pyc,,
+"vllm/model_executor/layers/fused_moe/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=iNGsE2ZeVnQEnN4A8UJ9Jv0d3hbRF2MJ9oBgjup5Szk,2737
+"vllm/model_executor/layers/fused_moe/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=hH5rRN9Wtyv35azxMzyUMHWtiKgOHev5tNjIG8j6dsE,2751
+"vllm/model_executor/layers/fused_moe/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=qPumkNxaHMvVBnEjPe_Xiuz9ICb6Hqc-9I1DAR8s3gA,4130
+"vllm/model_executor/layers/fused_moe/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=s47lb8VLnyxMgWlqcIR4BdPBsjKWL4olXF49uZvygzQ,4140
+"vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=gzfjrYDcS0vsACq7ONGVkNA3FqVjr3e89q9fO9kokkg,4133
+"vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json",sha256=Np7yRX9Z7Y7Z5Nutbl02wpKdZRltbt4WqlPlleiYs2E,4146
+"vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=XsNfNXY8v0eatazkLCDiDclI0FnTudUGLYO01e1_4aA,4149
+"vllm/model_executor/layers/fused_moe/configs/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=H0K4_O1CMbNLi-srcycT3lSl4JaBl3EGF89GY5Rj9MU,4130
+"vllm/model_executor/layers/fused_moe/configs/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=arPqstZMzZjz8BNpY3alKT4vGCJyUj5I2hEeK02aq98,4152
+"vllm/model_executor/layers/fused_moe/configs/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=wjnQ4v-dflJMR3iFDHBuZI_1R0xXjsNoWc2kHu6C8JI,4135
+"vllm/model_executor/layers/fused_moe/configs/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=7WHPz_0fxeI3Ed0D9VIpZVoeN9RtJVVARvptfcmQu40,4146
+"vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-40GB.json",sha256=pCCKkdUzzuBVtljyk7AEIAbeDf12DUiieXaODZXzm5E,3254
+"vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=trX2-c4N6hTTD6zFNi6A2bT3FkhxKjkM2rPl-o1K9ss,3250
+"vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=I4d56uD7E1JMXD9RAxq3FebdPquDsnNEkVaIY9Ctm9w,3246
+"vllm/model_executor/layers/fused_moe/configs/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=ypuAxMQ7JESPXLBltt68wly2wTrJzlnobhUMip6xAmc,2751
+"vllm/model_executor/layers/fused_moe/configs/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=tUptlureu5QgyAEedtx5sm7CFudXAE6fIXepOb9gfas,2745
+"vllm/model_executor/layers/fused_moe/configs/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=h57svdmDlZC_D8w9XWjPRS8ciYVkJiPEYfhrD2NRVVY,4127
+"vllm/model_executor/layers/fused_moe/configs/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=JmXhUnhX6YOy8RsmT0zFLGyNCpRBPV2q2Db9Y9ctZeE,4144
+"vllm/model_executor/layers/fused_moe/configs/E=16,N=2688,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=G4PKqWxh0MlBhg7QHKj0m--_fP3Ll0gs7VJaeg-NIDM,3254
+"vllm/model_executor/layers/fused_moe/configs/E=16,N=2688,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=bKX9AvcxN6k-i3RUmHSchZZ3rjoYRYb4iBqhCI4L3MY,3257
+"vllm/model_executor/layers/fused_moe/configs/E=16,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=bWR6XBZ4nJ_ROg8rEgrQGc04I3BDbwILDHMZxATO-H4,2740
+"vllm/model_executor/layers/fused_moe/configs/E=16,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json",sha256=Gu1wROuky-xS0dsFgbXS2QD_hOVV8yol9a5iqiYyq3s,2749
+"vllm/model_executor/layers/fused_moe/configs/E=16,N=3200,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=_9HO7SaR6aQeh6vqCDpo3kjHnGJ9BVKLiMwYYgd3SmQ,2913
+"vllm/model_executor/layers/fused_moe/configs/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=2ONiQSa9odzdPe1dIgBpP24l5z-5wB1eos06xOj0V_Q,2738
+"vllm/model_executor/layers/fused_moe/configs/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=Twkm9DVNxijpowfvioJ_4cKwIIlAWdyNWO9TA3gxAHs,4149
+"vllm/model_executor/layers/fused_moe/configs/E=16,N=6400,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=THQWP1o2bWhnJh0rq3ZIVvs_sagIJgoK4x3pJbiFbHk,2910
+"vllm/model_executor/layers/fused_moe/configs/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=o1pR3rNpO1eW4BHOKpPIQLjviw4P2X5Fr4HQBcdHA-I,2747
+"vllm/model_executor/layers/fused_moe/configs/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=iySqae0zI_PRBLqV-vfSCwDS4Jxcl5QjWa2NnhndL0U,2752
+"vllm/model_executor/layers/fused_moe/configs/E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json",sha256=Uhq0SrWiCrldkWbb0ZZZhWaCZ0SsvpiNL4z30KZUN5g,2747
+"vllm/model_executor/layers/fused_moe/configs/E=16,N=800,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=ydsFUdXdVE_ZSScVhUxvxOFwKG-nkTraNeN69wqzxIM,2903
+"vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=0aSYzpv_cBAlpWCPrfGgNTCfae1KdKQnT56E8XFQl7A,3262
+"vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=R4B2n2vGt4pPo6jS4Bmnx8AYtcfF9qQJE5bD7OhmXHs,3265
+"vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=6QPLhZurIqcMVdy3w0Dd7gLViKxsyJRBz-qd8agpi6Q,3248
+"vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=ozS2ECxk-Dsd4Y9DgCGGwDwJlCf5T20ANf5gnTUMuSc,3252
+"vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=oxOKFDrgmw1YmgxTtRa1uoe3p09ylTLrkj_jOTqNh1Q,3249
+"vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=WegYsHl39QVlHu_4EZJSrgA4LQ5fYxSVNWFhoL6W2Rc,3251
+"vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=X8FVPE7rLblDs_Dw_Iu-KDw9H7PaC417EHyVclYjfv8,3733
+"vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=AMD_Instinct_MI300X.json",sha256=Ezk-gq1ET8Kan5LQvk4XbwDcgsbuIBoaa3HlDwI-364,4734
+"vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=V_sgDtEtGEuBsGVa0maYJHhhGqe1NE7l-1ek2ed9WP8,3082
+"vllm/model_executor/layers/fused_moe/configs/E=8,N=16384,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=LgHbxG1kQV36zZPkJcnurHYzwAjMh04lvEHEsfzS1t0,3732
+"vllm/model_executor/layers/fused_moe/configs/E=8,N=16384,device_name=AMD_Instinct_MI300X.json",sha256=_fcdkmWvdMqHiH8ZAGke-zXhH7qVPQx5CmKELW5hRCA,4735
+"vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=yTf2R9cngSf4OafucAYlDDn4-bftaMFKaY7qhaBZPqQ,3739
+"vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=AMD_Instinct_MI300X.json",sha256=_1eVE7ok935L2V43-3D3bVNWSVaoViia19sh0VrXmXM,4735
+"vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json",sha256=AffDc0_51ML8HiA3757zbD10TZJdUsUDIYIqO4g0yUw,3250
+"vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=IEYBNjt9HGnzoOVSWvL0A0jUqq926QD0_BvVYR4RA1Y,3252
+"vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=Ns9Y12aZbJnFhcG3nwb67bDqqiQAo9tdTAIe8K2Ajz4,3255
+"vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=6jRC0oOpVpq5c1xePFKNRy-Xtmb038i4LE9N2zao2W4,3730
+"vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=AMD_Instinct_MI300X.json",sha256=cFWeyNJtEbs-Bfohgzclxo1rcYGU863oV0BzJyQ4T0w,4734
+"vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=HOxWmCI2ifHmWc0or2y8nEen86jDeLDov1-tuMzuhxo,3256
+"vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=csHezh0HGWaNwrblGzMgcE95hqbqjWS8HImLRJYr_ts,3266
+"vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=_5weLBinQCDzyV75hHKIT95Y0ce94KWft2_5BC6EkbQ,3254
+"vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=ro3drDpWAdeXH7IjMvx8wYGhIuDPOl0bpbJaIB5Msns,3732
+"vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=AMD_Instinct_MI300X.json",sha256=qvGJh6BN6FJ9T-kR_ttpu9D9csWmDUfGYCAmo39kQsY,4734
+"vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json",sha256=Ru460ZgnUP4U8OsJfwF8n-AI-gfcolNR3_qzoxG6DtY,3254
+"vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=K6BGrKw_oHTAtHjsZldcjp-BUM1dIecKXrrRn9OpRGs,3254
+"vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=qqFoMaObuO8pFWcSb9q0wYsdC4eSCO7B-_ruQhR1N9M,3264
+"vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=-5nkLIunjG1ghPoUEtt2AXEQw9oGiilP7K3UvQv9CqE,3252
+"vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_L40S.json",sha256=qX5_yErBEwDRzhv2FvxrS3pEMa8zn0GHzLp5TUMX90g,3872
+"vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=ysRCWmxV20K2BYD9XEUtxwREFGtA3QHI191vHRA0k_Q,3733
+"vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=AMD_Instinct_MI300X.json",sha256=L8VA1sfygHoyLJ-Ybfs8DP5c0YWFmMkwxHT8yJ9PEFM,4732
+"vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=DxYu8regZOSFu8ugFGA_QbwWK4g8xwQUZF9a_nNY4Cs,3255
+"vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=obzfE_9XgsbFNfC9biYOHxR-V_Bgc7PKT8qZZJaiJJc,3262
+"vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=qwKy8oaMsd3QrXgQbM_x9xcfYiHK_Ou1CEwDPL5Gbgo,3259
+"vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=QsR-Xr9vyuiArMTSo-dX-1DFgATfqwIGOzFuQJAuE_Y,3734
+"vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=AMD_Instinct_MI300X.json",sha256=halUboffSrtXJ09gUM6BTDySLpO_pSvxnYHqSFleyoY,4735
+"vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=BAJnXTZoewwCtzJLUPJ0oYuALv640MvDuLseGcsYaaw,3252
+"vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=-Tj7ImS6ZFDof_0VTyq7kVm8XD9B54RD6CUOPSf3Jjg,3265
+"vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=tme0ydWzIxdABZLk4tU8G_X2dJUYGGZNkQzNGcmcvUc,3261
+"vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=kF4Fx0yHUmiMSLFNXT6xqAEA4AgCaHOoy_3irv4dNss,3732
+"vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=AMD_Instinct_MI300X.json",sha256=uOlVzTdJl_4VrRK4wmxIb8JKfveFZRjO9syjw_oEeL0,4732
+"vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=sY2nWMPh9lsIkhPCjkHO245wpnfFbrHmzdcZDVFPVww,3265
+vllm/model_executor/layers/fused_moe/fused_marlin_moe.py,sha256=dzv596s3Dz0IvNrzQRY2MOVtCiHB9V3V9BnZ2VBR0K0,12629
+vllm/model_executor/layers/fused_moe/fused_moe.py,sha256=FB7G9i3NwUdQ2AXCqn2p8nsT1N-1Q-XfVfzyM2xWCE4,53611
+vllm/model_executor/layers/fused_moe/layer.py,sha256=XB93XjY8-muIxoC77lN5ErToiG6ZWEY-FPEX3XybEkM,27237
+vllm/model_executor/layers/fused_moe/moe_pallas.py,sha256=20nVD5HOmg1D9Jol0k4IS4PF_k6QIUIZBGIUxPNlrIE,2338
+vllm/model_executor/layers/fused_moe/moe_torch_iterative.py,sha256=EifnhcLw46jB1DJLbXh06LKg3U54Tt-k3RH7DZRwo08,1884
+vllm/model_executor/layers/layernorm.py,sha256=iQ5B9JX2r-Ir56o0LMmsqwEipOJNaBPCaVFM4JiRXes,6879
+vllm/model_executor/layers/linear.py,sha256=Ago4DWmcYggy92QA4__7b-cnaddfqG0rHHxaDX1wRcA,51875
+vllm/model_executor/layers/logits_processor.py,sha256=EVOsKTlu7lSgUCCLQYCinqV4pA1c74NThDalG1kchDg,7685
+vllm/model_executor/layers/mamba/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vllm/model_executor/layers/mamba/__pycache__/__init__.cpython-311.pyc,,
+vllm/model_executor/layers/mamba/__pycache__/mamba_mixer.cpython-311.pyc,,
+vllm/model_executor/layers/mamba/mamba_mixer.py,sha256=qWLliA7YxXm5ZzZxLKsYW8hEZMyXyd6pzSkHvMBEMb0,10120
+vllm/model_executor/layers/mamba/ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vllm/model_executor/layers/mamba/ops/__pycache__/__init__.cpython-311.pyc,,
+vllm/model_executor/layers/mamba/ops/__pycache__/causal_conv1d.cpython-311.pyc,,
+vllm/model_executor/layers/mamba/ops/__pycache__/mamba_ssm.cpython-311.pyc,,
+vllm/model_executor/layers/mamba/ops/causal_conv1d.py,sha256=_ZiWUKMLApKDWDH8iB_8Zw_GGGAFHDJRxbcMWQlMYac,4470
+vllm/model_executor/layers/mamba/ops/mamba_ssm.py,sha256=HUDI4IAh17QrAYYLWU6oRVZixym8yFuTscmtifQaza4,14110
+vllm/model_executor/layers/pooler.py,sha256=1HUO5hcGpxoFE966sotGRF2SrLYGn_MOj4OpHLVrm1w,10904
+vllm/model_executor/layers/quantization/__init__.py,sha256=Zd8kVWjyyspQMRPcTVJ7ZAiW-I477qZ5KyIsvFlXtNw,4727
+vllm/model_executor/layers/quantization/__pycache__/__init__.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/__pycache__/aqlm.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/__pycache__/awq.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/__pycache__/awq_marlin.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/__pycache__/awq_triton.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/__pycache__/base_config.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/__pycache__/bitsandbytes.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/__pycache__/deepspeedfp.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/__pycache__/experts_int8.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/__pycache__/fbgemm_fp8.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/__pycache__/fp8.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/__pycache__/gguf.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/__pycache__/gptq.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/__pycache__/gptq_marlin.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/__pycache__/gptq_marlin_24.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/__pycache__/hqq_marlin.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/__pycache__/ipex_quant.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/__pycache__/kv_cache.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/__pycache__/marlin.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/__pycache__/modelopt.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/__pycache__/moe_wna16.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/__pycache__/neuron_quant.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/__pycache__/qqq.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/__pycache__/schema.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/__pycache__/tpu_int8.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/aqlm.py,sha256=1cox6rKyx40kaL_arjH74cv-MXzEmY1V9HnKsN4pdxw,13605
+vllm/model_executor/layers/quantization/awq.py,sha256=VqBBjy3nSkJTDTxuxNZ4d_Vp9iivJuvjRdqSRfIDZXQ,7050
+vllm/model_executor/layers/quantization/awq_marlin.py,sha256=oZBIvc5HDVqqyt-iFUYsRydryhqUQmemK-iRl2a_ESw,19337
+vllm/model_executor/layers/quantization/awq_triton.py,sha256=SNr8Xro-iH8IrPJBvGdI6R_gsV0QmNnEzWOZhx2p014,12415
+vllm/model_executor/layers/quantization/base_config.py,sha256=nPAlJhsuJvfSIZmNQJDsCaYsWqpfc_zBMDJbrlM_KGk,4915
+vllm/model_executor/layers/quantization/bitsandbytes.py,sha256=cUbub--j-HGzlvlyJ5k5PN2EHYdwYmNhIniF2S2cdEI,14200
+vllm/model_executor/layers/quantization/compressed_tensors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vllm/model_executor/layers/quantization/compressed_tensors/__pycache__/__init__.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/compressed_tensors/__pycache__/compressed_tensors.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/compressed_tensors/__pycache__/compressed_tensors_moe.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/compressed_tensors/__pycache__/triton_scaled_mm.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/compressed_tensors/__pycache__/utils.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py,sha256=mBpPmIJ400gFYIgWDIaRQkE133j5PPlY4lvdzHI00m0,26758
+vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py,sha256=DAP3-1sKqdchMfhxaE6-3v_f-X129odEVObJPNMvKLQ,24809
+vllm/model_executor/layers/quantization/compressed_tensors/schemes/__init__.py,sha256=BwKgT5i-Gu1d45Z72x5PUP3Yect8hO37yW9rfUeIoug,931
+vllm/model_executor/layers/quantization/compressed_tensors/schemes/__pycache__/__init__.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/compressed_tensors/schemes/__pycache__/compressed_tensors_24.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/compressed_tensors/schemes/__pycache__/compressed_tensors_scheme.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/compressed_tensors/schemes/__pycache__/compressed_tensors_w4a16_24.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/compressed_tensors/schemes/__pycache__/compressed_tensors_w8a16_fp8.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/compressed_tensors/schemes/__pycache__/compressed_tensors_w8a8_fp8.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/compressed_tensors/schemes/__pycache__/compressed_tensors_w8a8_int8.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/compressed_tensors/schemes/__pycache__/compressed_tensors_wNa16.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_24.py,sha256=l0iiT1vCWYwqAvw5RhDGRQS-mJsWGSdMWHOxLIr1hF0,13882
+vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_scheme.py,sha256=Jxb5WOHR4m98DRO4y3XKJMDpT_NIp3EwkAJe8HYqwqE,1527
+vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w4a16_24.py,sha256=v0u6xXo4zJ530YWbnnJXbJFoMrjUVveF6z4HVKhCqrk,6207
+vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a16_fp8.py,sha256=GmDa7Kwrqj7Ja5-VZ7DRY8MTcmPChOKOK7ku3gguLT8,5409
+vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py,sha256=5XQvPJ73jkBemtF6JXyazqzX9xOtGAX_Aizy0cHqmuw,6284
+vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_int8.py,sha256=yns8-JTL4TM6boMggXk8E-yOalJgQ9O1neAs57oMh3g,4872
+vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_wNa16.py,sha256=N1GHQwN38JcHGDNC0oll59pJePaW1rqb6aZL8QS9LhY,7002
+vllm/model_executor/layers/quantization/compressed_tensors/triton_scaled_mm.py,sha256=gMLaK9uSOVqYw1wv8klnGLc1qass2K-DnpWTSm0rvuM,7731
+vllm/model_executor/layers/quantization/compressed_tensors/utils.py,sha256=YSJeIjPjkJ_dZIzKEnGGz419L7rC3HJnvhhohySbntE,7707
+vllm/model_executor/layers/quantization/deepspeedfp.py,sha256=4uu33p5aLq8QixKMuZSEd0sM3NnXKkD5iiYKpi7zQQc,7114
+vllm/model_executor/layers/quantization/experts_int8.py,sha256=6Xh4yzbPX6JHcznITDM-YoHn1E20YgnZe3smH5lMlr4,7286
+vllm/model_executor/layers/quantization/fbgemm_fp8.py,sha256=FUdGx9LV6nQU40_CBDgft1e7AC_3TZwcWjeKN8as5Vw,6499
+vllm/model_executor/layers/quantization/fp8.py,sha256=Q50CyRsgMuB6juYhikCqrLh1BIK56Xeu9QPTGogTgkU,32180
+vllm/model_executor/layers/quantization/gguf.py,sha256=ekv5leaAakl_6WTCuX9QaQTupRMHr7U-cp8ihT4b95Q,8290
+vllm/model_executor/layers/quantization/gptq.py,sha256=fj3aQlLROaRMEAf4vy1oOQXNhlT0Y8hDj95eQUGoXOw,9221
+vllm/model_executor/layers/quantization/gptq_marlin.py,sha256=_mOx7VYh814UD4WDYyF3DhzvGY9RqArPHe2yaQodXjI,22372
+vllm/model_executor/layers/quantization/gptq_marlin_24.py,sha256=zo-mfwHb1ZQbCov9hMs4VNC1hOT5J6UxYxXW7eVptmY,10854
+vllm/model_executor/layers/quantization/hqq_marlin.py,sha256=3sy-hO8XoTRocSn0y85OZYK-fd9w9dZy5uQbU63oYyg,12695
+vllm/model_executor/layers/quantization/ipex_quant.py,sha256=DcwEYr0Xp4pVq6TKCbF3Hcc8fBNz2uQKfL94AITEr_g,9734
+vllm/model_executor/layers/quantization/kernels/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vllm/model_executor/layers/quantization/kernels/__pycache__/__init__.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/kernels/mixed_precision/MPLinearKernel.py,sha256=Qx5iCPjsnW69RjhPY6qRRsTWddK4gf4sbrgh6czvtYE,2879
+vllm/model_executor/layers/quantization/kernels/mixed_precision/__init__.py,sha256=uvXWMKZhuGumUmlRUDGY7l-f79hXVDk7LQ2CifWsVNA,2846
+vllm/model_executor/layers/quantization/kernels/mixed_precision/__pycache__/MPLinearKernel.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/kernels/mixed_precision/__pycache__/__init__.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/kernels/mixed_precision/__pycache__/exllama.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/kernels/mixed_precision/__pycache__/machete.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/kernels/mixed_precision/__pycache__/marlin.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/kernels/mixed_precision/exllama.py,sha256=pqVO7rcPUT28KaVAoYJ1e96JSF18scX--uz_WprnUKI,6151
+vllm/model_executor/layers/quantization/kernels/mixed_precision/machete.py,sha256=JBJ8VeQ7zR8rGRAJO3IzZGiwItnCCqMup1eWOFMD9os,5206
+vllm/model_executor/layers/quantization/kernels/mixed_precision/marlin.py,sha256=GZuxk-7xiEVT5uasGIvPPYgvKtaHnoETGdokTLHZTrk,5893
+vllm/model_executor/layers/quantization/kernels/scaled_mm/ScaledMMLinearKernel.py,sha256=EKNHGvrBoiElOpU-LOa4V0pui-MkLDcwxE9AScv9Sdc,2046
+vllm/model_executor/layers/quantization/kernels/scaled_mm/__init__.py,sha256=1_Q-qSl5JYKiQ48_nvuNar-LoCgp4zYmk3G2N4cyzsM,3321
+vllm/model_executor/layers/quantization/kernels/scaled_mm/__pycache__/ScaledMMLinearKernel.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/kernels/scaled_mm/__pycache__/__init__.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/kernels/scaled_mm/__pycache__/cutlass.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/kernels/scaled_mm/__pycache__/triton.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/kernels/scaled_mm/__pycache__/xla.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/kernels/scaled_mm/cutlass.py,sha256=-xzsBNkRLZ9WnMq4Iq2YvXjuSDMtmNdP2oEa4QGJIcE,5989
+vllm/model_executor/layers/quantization/kernels/scaled_mm/triton.py,sha256=3ssC8LJcQ-iPtlxEdoAZfPngiAuCaAy3-RWuBGz3ldE,1283
+vllm/model_executor/layers/quantization/kernels/scaled_mm/xla.py,sha256=DVQP94zjroZjb_ZQLS6lXXLtm50OOAMbFz0y-imjwSo,4199
+vllm/model_executor/layers/quantization/kv_cache.py,sha256=V4TdygQR6dm3Om95Z_KctJi71jK5GMghvUIc_o4SlhU,4077
+vllm/model_executor/layers/quantization/marlin.py,sha256=2kmoUfynMW0rZtCwXFwUPktdD5gB4-yerAI67S1LUCY,9590
+vllm/model_executor/layers/quantization/modelopt.py,sha256=1PgzYMw6TFEF7tlyZpTo80YsciEOFRJwq-_46SRsih4,6526
+vllm/model_executor/layers/quantization/moe_wna16.py,sha256=SKJtvXddNPokNAQcCKJ5BcoihdJkr6SOdNjPpCN2Lbs,18735
+vllm/model_executor/layers/quantization/neuron_quant.py,sha256=BUvTpDhFVLByd_mr8yRrenVfjfrMRvlCXj-MyOyIdUc,2392
+vllm/model_executor/layers/quantization/qqq.py,sha256=NewuQ2IZRI-SGWPTYktbt_ue7D8n3F8JkZnAj1RFgWo,9911
+vllm/model_executor/layers/quantization/quark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vllm/model_executor/layers/quantization/quark/__pycache__/__init__.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/quark/__pycache__/quark.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/quark/__pycache__/quark_moe.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/quark/__pycache__/utils.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/quark/quark.py,sha256=soCfkI9IkQL217riDEHPmtXH0f-QHKcXmRXMgx2bUKw,16724
+vllm/model_executor/layers/quantization/quark/quark_moe.py,sha256=eHbLgLdjKSiD7hn1JS75Uy_15jEd_tJ4NjHnwBCSXAo,10660
+vllm/model_executor/layers/quantization/quark/schemes/__init__.py,sha256=uEyTk05b5tNt-oEmSSkySwnWJ87o-oVphnSJoTf3MAM,221
+vllm/model_executor/layers/quantization/quark/schemes/__pycache__/__init__.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/quark/schemes/__pycache__/quark_scheme.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/quark/schemes/__pycache__/quark_w8a8_fp8.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/quark/schemes/__pycache__/quark_w8a8_int8.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/quark/schemes/quark_scheme.py,sha256=f_5qRmZ3SZvIgBWiMlNjfu-WN1uVwSvfLGFe9QS25R4,1491
+vllm/model_executor/layers/quantization/quark/schemes/quark_w8a8_fp8.py,sha256=KjxRZaD35GI3zckZVZUDib2ij7G8y1erbMWrJG7LmIQ,5935
+vllm/model_executor/layers/quantization/quark/schemes/quark_w8a8_int8.py,sha256=KKth7fai8d27MgtYbMCwF14ULY57Rp72JuBme4UWH3E,4677
+vllm/model_executor/layers/quantization/quark/utils.py,sha256=9VCOhQ_qNa31Se-LBuV11m1_HXNV4Fp8UJU9q0pRljM,3558
+vllm/model_executor/layers/quantization/schema.py,sha256=vb8XZXdDPgY437o96uNlQOUDhPoGeowrlwO1b4QdugE,3686
+vllm/model_executor/layers/quantization/tpu_int8.py,sha256=waQV-CegANwNFpD3ipO4mibocgLjZAVFubvOi1c4Omk,4416
+vllm/model_executor/layers/quantization/utils/__init__.py,sha256=VbdLnvlGCFpa2o9SRnEMflRyJ3NOXd6j6d1fPN_xm5w,166
+vllm/model_executor/layers/quantization/utils/__pycache__/__init__.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/utils/__pycache__/fp8_utils.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/utils/__pycache__/layer_utils.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/utils/__pycache__/machete_utils.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/utils/__pycache__/marlin_utils.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/utils/__pycache__/marlin_utils_fp8.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/utils/__pycache__/marlin_utils_test.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/utils/__pycache__/marlin_utils_test_24.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/utils/__pycache__/marlin_utils_test_qqq.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/utils/__pycache__/quant_utils.cpython-311.pyc,,
+vllm/model_executor/layers/quantization/utils/__pycache__/w8a8_utils.cpython-311.pyc,,
+"vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=tkLjwLC_aVXhzuvo-2QHkojXZauPJsf3jNHFn1S7uRA,3244
+"vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=Qoj9rLLRDbKM4IKBCXvN8RcxzSmNPd0TQUiM7CXDqHI,3241
+"vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=4D3Ku4y7BCVEJzueKvQC_KvOR026w3ONWsxfsA_YrEc,3249
+"vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=7v4tp0RaT4vxF4urSBrkK5FR_5ikeFQ1htF3DwDl1lk,3249
+"vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=0J2MFgaLkv-mfVE5x363lgVKYU6miLG_xRO3tJUga_M,3249
+"vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=4ubbhwSFX_XbefRLEkLoWxJkcetFWPzsszPu0X3_Wrw,3242
+"vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=FhyniGTx5QeCuVrBSVTQys6q05Pr5lPEcPykpAX7Iyo,3247
+"vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=0v17v78pETXv6S2ZoibekxOVhiTmCm807DYG4DONUck,3259
+"vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=I44PvJj758-sw_fCOVROLTpG0NQ5_5PCYyQcpZC1YSY,3259
+"vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=-CVHqClROli9FWe_FnlnuAG2LiFivDFK_nghH6t-BWc,3261
+"vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=GsLoYkaZ2p4Qu0Coj-X90s7JWyfZBOloIHPlyNKSIes,3246
+"vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=8zuJhFdd6aXREpiqPFhIKEFWA5lgLVGrG0-a9UXcBqk,3262
+"vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=Zn1TvhAoPOv0zQBYHOZhwdDw3oqyxm0zIa7IJkTCHpo,3247
+"vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=yqjO7zML7EseBJw6Bn5MTyHeAitkPsl1dndXeL6Rn6A,3257
+"vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=-nQIhKAOVCQrxLV6HDlcD0V8HMWvqrv-vyiORVU7qls,3244
+"vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=M3nwpZd2-0w263ywZt9gaw53z7MN673T5tl4tc43Ntk,3249
+"vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=vLoV3JMtvHOKpR5D1BeCQPMuYlWUAlrXu54gByNkwKY,3266
+"vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=Mtw7a9BSspj2TzC-aPxE82o1LEvwzgbUuIofwRxUNA0,3263
+"vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=NHdx3tZnfLF7NplswMzcTRbQEQFLtChg4rd7GU9lMbM,3262
+"vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=laYeH4w0iZOj2Yg3vDgtKoroNQnwBEX4GUGLrO9095I,3260
+"vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=TWcPDZ2miQMD6OWDC1FteRs80ND9RC-oJL3PLVmJbtI,3257
+"vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=6Z7kIa14RjVq3ek_C15q5mUu1IrY2r0OP8S-_pm-MYU,3252
+"vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=r63SZkUJJV87B00hAX074_uaC7wwQXdurlJsB1jUA0I,3254
+"vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=xBhxdCFf3waTUsLxJxA54R90zODbC_DKI3XXBVKjKRw,3252
+"vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=O_SV2vo_oaABfT6Mxqcmo12pnhKtfX4TnXfe02OcHJk,3254
+"vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=g12Xkurat7oUS7LdS9pHLKFlur4_FaMGiGBvdq-iBCs,3242
+"vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=tFdrY5nADmXUlShdN8w8Jzkxuj_RPLXCRceX9FhQ35E,3251
+"vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=TO2qRGmp37v53Zqu8Joeq_BSbtwM_mpVoozGyoNg0-o,3254
+"vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=0xquf00fgfrDODpaxyre0VDcjqfzqExj939rzeJ8pMo,3244
+"vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=qG6v3n3qF6LE2DdGT-mDIXecZ1a7vg7p3QqXYCMX85k,3254
+"vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=EgFTGyW_YuDwyEDUCoGglyI1ETdj9J7AR0UfJ86jMoI,3249
+"vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=NiorJgOotxkQcP49ID3z5al1UA4QQDrT8MvbCwAWL5Y,3248
+"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=bPQWtvaJrzOOIgI-R-MIxs_f4yC_FobkDydu3OkOFtg,3252
+"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=ZRgiuHZ2SFC6u-WV5DGwau4k1RiPLI67eENO0e-5Ylg,3253
+"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=-hP_P8NM0K04mGzTmpGBNibQ5xxh5gPz5WtoMXhoz1E,3253
+"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=FB5Le4obvPoCgFSnC_3-Uh59n-Mt4Rol8saXVcK3RPw,3252
+"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=kLviGvVngpgOuelfKtvv9Is7MWQ89rGxlomMRP6t0Ic,3250
+"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=vlys0Zi_CaaU41OHGbWSBtbVglFi98bgqEySBMc9Sdg,3258
+"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=YWyByOlKSqp5lbcUa8eu6N2dHRKJqJDbCDSjdDQJngg,3249
+"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=j5PTW0IC4Z2yQIygcdICaOsvb639u6Mv-ZpJYkrBQ2k,3254
+"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=Ggy4hejkcWjiw5Bi-wGzSP5JLVuvOjip_rbjXFBJZbs,3257
+"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=Xy4mgZx5iiEvuv2ydO4dFNIT8s0jgBhNHE1vu93fGJM,3250
+"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=x476nFeltB_2iO9_6y-z2P_unAbh7ghLPFi5z2LOTOo,3253
+"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=4uWiQMh3cZY_EtLA0a3PU8Z1VCunF2PpolTPYeP9Rjo,3256
+"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=D0moiKqS73oril32iNj5gRJUWpT2SZ5jf-ZesUZnNv4,3254
+"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=N37dUL_J2JVpgLFlnlz__Ck7Z4njROnNAO8V2oiDqr8,3253
+"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=LdtOyXsA9r18GiFkmDOkiRinsDSZBZ8NYapL59EZ4iM,3264
+"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=07GarBHmiiYkyqn-qxEtrAcgCETuUbqm6HqlbH9yJi8,3252
+"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=kEuvCsW3YNByF-DALYqPZpW3TL8ZbtQ5gUNq7-8YvZ4,3252
+"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json",sha256=PD4AJYCkHfy2ivv9baMouFXzBTy0eKMumbAfxfm91HI,3256
+"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=FFBjSWlpKXMxfAUUYUqXbOK_Hd7qBeBsfbcaa9uB4qY,3249
+vllm/model_executor/layers/quantization/utils/fp8_utils.py,sha256=u49_10Un3mKV9NICpkoXbMFGDu_tlZJ542_BG1WMAX8,18523
+vllm/model_executor/layers/quantization/utils/layer_utils.py,sha256=HmjtrTYHbc5u6kKfE8cnu5O2Nqg9ZyRy9BAGHYgenDY,1562
+vllm/model_executor/layers/quantization/utils/machete_utils.py,sha256=OeMLMQDbifbxz-HRYmjrC4RnMOutos17544P-h_dNE8,1074
+vllm/model_executor/layers/quantization/utils/marlin_utils.py,sha256=nTnBnKDkzatlScXWVfGATpg9YMkttp4e-m7Y63wDHHc,13678
+vllm/model_executor/layers/quantization/utils/marlin_utils_fp8.py,sha256=bASNhLOrC-7AaC-xGUIQiDqUx0HRWPX1liD4iqzjas8,3672
+vllm/model_executor/layers/quantization/utils/marlin_utils_test.py,sha256=HkXMXXwpP4mfxpWI8p6BRjoq2ipVGpWLe1eWYSYxi-c,5311
+vllm/model_executor/layers/quantization/utils/marlin_utils_test_24.py,sha256=zk_DLYsG7n7Y9mvtEkY89fXSH9kPEsuPLfbX0W38DEA,17558
+vllm/model_executor/layers/quantization/utils/marlin_utils_test_qqq.py,sha256=ybGP1JEn-Mti_vvEs8Ch4cl1NfRK4TMDxdYWEYZSvxI,4101
+vllm/model_executor/layers/quantization/utils/quant_utils.py,sha256=XOrnnp5RJdyfggUTrMsjR4OBPaNhfTd55HvoZw5plt0,19458
+vllm/model_executor/layers/quantization/utils/w8a8_utils.py,sha256=WQKrSvdkHem-mVpAs19APMpyTRVadmA90DTKHVF8rS8,9582
+vllm/model_executor/layers/rejection_sampler.py,sha256=4Rp-lgHMcZ4JFZs2ujBG2Rmm2FmPoGbq-ZbxQW_cVeU,16469
+vllm/model_executor/layers/resampler.py,sha256=fA3oc51Ku2jW2orqWVX0voeMc1zSooV6EyGteRIvGAs,10444
+vllm/model_executor/layers/rotary_embedding.py,sha256=lfv76Vwhfh5LvqzK6NBMV-R9Cot3mRYmcJhnqiAHV44,43994
+vllm/model_executor/layers/sampler.py,sha256=xsB75r9N0NPJa74kofiqXJ13koDs-Mkd_BP9F_jvSys,53762
+vllm/model_executor/layers/spec_decode_base_sampler.py,sha256=J_RiMvR0LACXjsTufxD3w4s4wVZtZ9waeDd6XEFqRkE,10119
+vllm/model_executor/layers/typical_acceptance_sampler.py,sha256=uzlrDmPtV19Mv17iJIceBU4Lj9IiJ1M3kXTxZ_y0mek,7047
+vllm/model_executor/layers/utils.py,sha256=PyCRuK3cnHx31HgQ2kiOt-Fx3Stm0O3q6ACOfVia8Qc,2674
+vllm/model_executor/layers/vocab_parallel_embedding.py,sha256=cgtcGzxKQ-JdiV3-z_22LdCHqm6SFaElmdZWTII2u2Q,22621
+vllm/model_executor/model_loader/__init__.py,sha256=SCcBIQqgBnp-uob0RHRC2La8mElcfAkXaJ_jwPLbxWI,646
+vllm/model_executor/model_loader/__pycache__/__init__.cpython-311.pyc,,
+vllm/model_executor/model_loader/__pycache__/loader.cpython-311.pyc,,
+vllm/model_executor/model_loader/__pycache__/neuron.cpython-311.pyc,,
+vllm/model_executor/model_loader/__pycache__/openvino.cpython-311.pyc,,
+vllm/model_executor/model_loader/__pycache__/tensorizer.cpython-311.pyc,,
+vllm/model_executor/model_loader/__pycache__/utils.cpython-311.pyc,,
+vllm/model_executor/model_loader/__pycache__/weight_utils.cpython-311.pyc,,
+vllm/model_executor/model_loader/loader.py,sha256=h8V8Tw1q8s_EL-1mXxvZn1mk_riop7d3Kmo1JFr2mkc,62460
+vllm/model_executor/model_loader/neuron.py,sha256=vfxSDDhrVRrBw5KsJaO0ICGXnf5ZlnEECd0E8Me8ETc,8328
+vllm/model_executor/model_loader/openvino.py,sha256=SnzygL5Em3WQ410wHLF099T8cngQ8K9kpwBP9CtvG_c,7716
+vllm/model_executor/model_loader/tensorizer.py,sha256=IrDTxwTz43NIJxiWJ-Ravu3iw20O3Opv-xKntAW4oUs,20260
+vllm/model_executor/model_loader/utils.py,sha256=ULvJOHzFoGMZlVPYU7iIilttSWPYknFgaFRGqZnc1LI,6513
+vllm/model_executor/model_loader/weight_utils.py,sha256=xlqPpwJ69zA9xPuL-ZtObfqZ2XJZh3a6EBpbGAyCChs,26237
+vllm/model_executor/models/__init__.py,sha256=VA7jLZHGmibl55M5pQLfaW-FwlTmlEBOUzlQ1PWwqmQ,758
+vllm/model_executor/models/__pycache__/__init__.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/adapters.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/arctic.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/aria.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/baichuan.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/bart.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/bert.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/blip.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/blip2.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/bloom.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/chameleon.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/chatglm.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/clip.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/commandr.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/dbrx.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/decilm.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/deepseek.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/deepseek_v2.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/deepseek_vl2.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/eagle.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/exaone.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/fairseq2_llama.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/falcon.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/florence2.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/fuyu.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/gemma.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/gemma2.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/glm.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/glm4_vision_encoder.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/gpt2.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/gpt_bigcode.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/gpt_j.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/gpt_neox.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/granite.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/granitemoe.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/gritlm.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/h2ovl.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/idefics2_vision_model.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/idefics3.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/interfaces.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/interfaces_base.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/intern_vit.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/internlm2.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/internlm2_ve.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/internvl.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/jais.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/jamba.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/llama.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/llava.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/llava_next.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/llava_next_video.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/llava_onevision.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/mamba.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/mamba_cache.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/medusa.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/minicpm.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/minicpm3.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/minicpmo.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/minicpmv.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/mixtral.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/mixtral_quant.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/mllama.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/mlp_speculator.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/module_mapping.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/molmo.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/mpt.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/nemotron.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/nvlm_d.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/olmo.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/olmo2.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/olmoe.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/opt.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/orion.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/paligemma.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/persimmon.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/phi.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/phi3.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/phi3_small.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/phi3v.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/phimoe.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/pixtral.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/qwen.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/qwen2.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/qwen2_5_vl.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/qwen2_audio.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/qwen2_moe.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/qwen2_rm.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/qwen2_vl.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/registry.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/roberta.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/siglip.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/solar.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/stablelm.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/starcoder2.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/telechat2.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/transformers.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/ultravox.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/utils.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/vision.cpython-311.pyc,,
+vllm/model_executor/models/__pycache__/whisper.cpython-311.pyc,,
+vllm/model_executor/models/adapters.py,sha256=1G3hLCo2DSoGAbjE-Os-RQM5DIuw6OmHzZIE7FSYoUU,8341
+vllm/model_executor/models/arctic.py,sha256=UzPduVU4GRJlHUbBasCAcBwjFKibdJY5WCSa5UJjLQ8,25325
+vllm/model_executor/models/aria.py,sha256=ude6Rto5mTho80WtnQmzUXaJwKgs3Y5ivl85ZpCiD5Y,25496
+vllm/model_executor/models/baichuan.py,sha256=_H8q99d7T36sQRcI9egEoLapJdUvxZLlIXtdplWFqso,19452
+vllm/model_executor/models/bart.py,sha256=F0VurjNMn17VER5qobIqGFMmRCif7KHsB3VRNJkZpu8,36581
+vllm/model_executor/models/bert.py,sha256=i5G_BURorjAhXaqmPbFJSw-mAgFc7Wr09n0vdVON96I,21005
+vllm/model_executor/models/blip.py,sha256=E0ty49zD_wOmBZmCJwCunXurCfXFD7CbROUVs9e37Zc,12190
+vllm/model_executor/models/blip2.py,sha256=pNMbpa1FBFWjGlC0sgaicKYDnJ4FshYRkioUOv2G_cE,26212
+vllm/model_executor/models/bloom.py,sha256=enNsQf5c6iEqMRzmtNzuq4nhx883ieWoThGwRNXP5-s,14907
+vllm/model_executor/models/chameleon.py,sha256=a5k-u03agHgE723AKuYA9l9dmzwbHXTzJOyzNetqBeI,46254
+vllm/model_executor/models/chatglm.py,sha256=pMkV4vvjMlGAcVxLaVG5wiS21tPo3l5BWt24IRR64do,29943
+vllm/model_executor/models/clip.py,sha256=6YSE95hvt9R2QpnR1DxILvDJemIEzOjxiYIqLUbrCmI,19805
+vllm/model_executor/models/commandr.py,sha256=UlSbMeZfsna8oz2QwZEdp6jo65h3LAzB4owlLj8jw8M,19966
+vllm/model_executor/models/dbrx.py,sha256=VJqlpmeRaRK2fCCWo0VwkCzCRQ8tf3M5NmySNrtwE78,19242
+vllm/model_executor/models/decilm.py,sha256=eMmIM3LUlTkZh9EnNksdBJySO9cjP39LE8AAT4aGM_o,5380
+vllm/model_executor/models/deepseek.py,sha256=_Xe3Xr5tY3ek0KAPrTFC56ZF7veNwiVvMhG5K3nW8y8,20583
+vllm/model_executor/models/deepseek_v2.py,sha256=1PyjHQ-GQK1yTX3u68uQEOUklUe0BIxHC8XNJA4Dpfw,34275
+vllm/model_executor/models/deepseek_vl2.py,sha256=P7_ZwvsNS_HZC8bfcd0xqD2NMhOaIsvVQc6YMzTuHLM,25033
+vllm/model_executor/models/eagle.py,sha256=KjspfLcYIcSMFahuyoRyzn_-ebSrowojbwmlqdMvRQM,9319
+vllm/model_executor/models/exaone.py,sha256=rwdJICbgSB0GBMuVBr03r0BKQ_zXn_0aMpr25PjGIdU,22018
+vllm/model_executor/models/fairseq2_llama.py,sha256=J983ipTj1VNjEtr8AO5zse78kpdRqnL2WzUIC-gRZSU,6489
+vllm/model_executor/models/falcon.py,sha256=44i1htfX_jyLqApu2_irj278swH1hP-e0PTf7aDEXrg,22073
+vllm/model_executor/models/florence2.py,sha256=WK2yDQvFT4LbNi5TUu48y0f1EirMsXqzlnno70iRcWQ,10131
+vllm/model_executor/models/fuyu.py,sha256=TzFjXm8_vz0VFk8JB7aHIve66h6aA-ryUAN8ofEsJDM,14696
+vllm/model_executor/models/gemma.py,sha256=Y0BP-sz0wExJp4sZwB9PUGvrACC9obzkwAU4EYA1CM0,17496
+vllm/model_executor/models/gemma2.py,sha256=-kLNtndPqtOoeOENch4iK9oVRPZ2bYMQ2vThWtLIeZI,18886
+vllm/model_executor/models/glm.py,sha256=cXlSy28_9DLF7_n9QGsXSOmA449pnA7d0TeyHW457DM,979
+vllm/model_executor/models/glm4_vision_encoder.py,sha256=tqh9uA0P2f5s87QzsV6dID3hL01ErA5-1TUBsOO1KfU,10456
+vllm/model_executor/models/gpt2.py,sha256=FaR_kRUZx3FhTmNcVDFh49gKQTXgsmqSsd3p0gtbfog,13367
+vllm/model_executor/models/gpt_bigcode.py,sha256=VZwjWG4xhlARF-gXT-lCKysSlggI_VYQCTxEUXIWtQw,13954
+vllm/model_executor/models/gpt_j.py,sha256=azmOuWE5n6sriGJ3FZbuDzbnVSLaREOhH1LC_eVGIcY,13875
+vllm/model_executor/models/gpt_neox.py,sha256=feEujPn2yPNXN0wSkBtyVfHfc8RJr-Iw2f_0ngnEIxk,14051
+vllm/model_executor/models/granite.py,sha256=VXloy_FH-B2Ro4BXHDDprxo_k3t8PvV1Uaopm9_8nW0,21158
+vllm/model_executor/models/granitemoe.py,sha256=RPMq8An4Ym-XuiZ9fGuCxp3FKgFUvJft1VO1UGL_d4U,18817
+vllm/model_executor/models/gritlm.py,sha256=axmHQRxBCU6gLLpLu3Gs9c7hWqIAxC1UrL8RLM9kzZg,9152
+vllm/model_executor/models/h2ovl.py,sha256=N7cFONfGq7pL8124r6KhKK8lKagYu6iuJhJg8AeQhVs,18250
+vllm/model_executor/models/idefics2_vision_model.py,sha256=ayLc5O8hjSwBhmZRdnvx8dtTU8x3ejghzu3ftrOcGiI,13601
+vllm/model_executor/models/idefics3.py,sha256=SkShAmm1WSqpYEqHIFu2LMpKW1xBBaHCOhk7d_opnwU,27232
+vllm/model_executor/models/interfaces.py,sha256=FF6TgcN9qQcPKelRc8vjPj2lrycWEW6kGT2vgupFQ3c,12651
+vllm/model_executor/models/interfaces_base.py,sha256=wErHsx42tbVyXxcHajAWN85hoAsl4HwHGPg2Qo67LjA,4777
+vllm/model_executor/models/intern_vit.py,sha256=jcUq9Q6xp0BHdWs7Zgo-iDoUAiO3bcIrBsYrrGr6xsc,17332
+vllm/model_executor/models/internlm2.py,sha256=2rACIQ02NurhpuPAYKNp2x6iU81S12CBfhS0ro68uYc,18587
+vllm/model_executor/models/internlm2_ve.py,sha256=qRdTU06JySIA1DBtmASrnY3IGXUD_nhZsQERfodKtOA,6123
+vllm/model_executor/models/internvl.py,sha256=5kpoVq2Nq7UeevP0J2sTG6taN3m6LP4QPjPhmjE4Zis,33616
+vllm/model_executor/models/jais.py,sha256=lnvZ9BFwJG5pzv_HkfVB7aBeG1IrDo0RszCLhu1jxoU,15533
+vllm/model_executor/models/jamba.py,sha256=PooOuQRo7waQ8OJ4NDkiD2g_cgvPHL3Mbi-uRvxDHYg,26068
+vllm/model_executor/models/llama.py,sha256=LuFw4GLHl_b_N7n1JD_-4kNcILIH03nBatsrMU-Q1W4,24225
+vllm/model_executor/models/llava.py,sha256=iy4I7DY1LTVIM41Q4sCaZ3cbxiPguHt1X5gjrA8kzfA,31182
+vllm/model_executor/models/llava_next.py,sha256=K9P9ToMBvZnsvdJCD2ZjlsWxec1NTYbTetCGn7rCNQM,24222
+vllm/model_executor/models/llava_next_video.py,sha256=OvM5Gk-08YOvg-WAu8UTPnZhOIZBv7Nv7reXWP1s-Bg,18645
+vllm/model_executor/models/llava_onevision.py,sha256=FEfwfLoS-Ohy2hr0JmP3itHVsUj_88K60CJHh3_E64c,37227
+vllm/model_executor/models/mamba.py,sha256=-Hm6slyN380xsS1mdqLEdvjl7HqR6hlR7XfG022aIHM,12925
+vllm/model_executor/models/mamba_cache.py,sha256=lIXqrv7ge48MJp0LTe7QWeVHvlniEXfxeQMJrgGM5og,7080
+vllm/model_executor/models/medusa.py,sha256=c0jYTHJXo8FfueT0-y_yYjA7e91TIFz4PYRu7G5Kqlo,8558
+vllm/model_executor/models/minicpm.py,sha256=JOVHd0YE3olndaK2d4hXeXvELQj8VFADj-XfPiGaD78,25057
+vllm/model_executor/models/minicpm3.py,sha256=bnK4HEJbD-OEyUZVRxudTvfFmTPQJ4QtyZspa09Eim8,9853
+vllm/model_executor/models/minicpmo.py,sha256=izsU-3mwpklxbHpmMmF4xRGvgGH39_c4Lu-IxfcYZeo,33228
+vllm/model_executor/models/minicpmv.py,sha256=xIYhr2dOHRu-AU9rHDp1MLS1fECJaW0McsT3bZpEots,60364
+vllm/model_executor/models/mixtral.py,sha256=PoCLCjry9uZBZhWSdOnrWebmRkbVF7qb-85EXAyzfcA,20749
+vllm/model_executor/models/mixtral_quant.py,sha256=ewTrriEZlR14NWLDmzEVgSPPjXl572XHAlRQHUlA-W0,18851
+vllm/model_executor/models/mllama.py,sha256=6xRJVUjLE2lytRFsyDOdvVDv0TMTvNwve-fgyZVoIPU,63312
+vllm/model_executor/models/mlp_speculator.py,sha256=hv7XRnxENUo4R07K_Ox_9RVTZyKEDS96fYNECVKjz0A,7905
+vllm/model_executor/models/module_mapping.py,sha256=d_72RpeQ7wVO9LXl2eeQDIRyurGtEMsN33ey5IdDG_8,1781
+vllm/model_executor/models/molmo.py,sha256=YA9QK_PBx14zfDrsU7jQQw2N_gDSlFponFM8CpMJyzM,51182
+vllm/model_executor/models/mpt.py,sha256=o-tCgyyRdaGCNjCvhToYU26dEiRLbLoLe0Hh8PSqSzM,13421
+vllm/model_executor/models/nemotron.py,sha256=HZEkqoQWNU0cRLNl_Amdcj6Wp4JyjOiWts1YcxLqRrs,21787
+vllm/model_executor/models/nvlm_d.py,sha256=SoSuZbsa3R-eJMY1XjEo-EO0FCTCynenqPrbXzXFCOM,8968
+vllm/model_executor/models/olmo.py,sha256=Xnfapv4Ryt-JxOJkh6cqaMYXebUQ4LxpwVKp6tI-EFI,16063
+vllm/model_executor/models/olmo2.py,sha256=J71jBvD9UoLhsh97dpMPtJbtfONYZH2eIjMiTb-WkUk,16840
+vllm/model_executor/models/olmoe.py,sha256=UY08on5PeihDweaWnEg6iebINBujyQ2-4HHkfsmswjQ,19161
+vllm/model_executor/models/opt.py,sha256=v2WFYCJvjUJuJMfMIWERt8IoGatyX9f1Qj81bZnLG58,17405
+vllm/model_executor/models/orion.py,sha256=Cy2YMih8IJq4bbhW-ZXfxGFu1UaLdZ815OppeAlB8P8,14901
+vllm/model_executor/models/paligemma.py,sha256=u5MpXY-Du9cGNYoG10q6stmaYqw_-bc0IZPD5mvzn6w,12870
+vllm/model_executor/models/persimmon.py,sha256=cpKyIDlmlJSwAcvrBZw1FG2gaPkrrhX0bh5h19UzA7I,15245
+vllm/model_executor/models/phi.py,sha256=9U-TpDl7pfPv7GwciAVOM_hRaKZD2aKzAjiXz3UGBIA,15032
+vllm/model_executor/models/phi3.py,sha256=dfxzNvOZ2XPpQIy5mdRFsxFZObV1_h5vlC-ak2hw3HI,388
+vllm/model_executor/models/phi3_small.py,sha256=dWf_AH0npQN-fHhmUEiR74cCCgxbRNPJfznPw2rP3KE,18879
+vllm/model_executor/models/phi3v.py,sha256=c8mGnjXVdwHpjgiyEyjUUH1Y9lnH68rvtGdgOJEq4as,28114
+vllm/model_executor/models/phimoe.py,sha256=IdAAb0jEnayLnDY_D76QS2TwShrmzM6X_5k9i04yluw,25472
+vllm/model_executor/models/pixtral.py,sha256=ABWdfLqWhCr28H0A9vDr7HYkLHGg7ipPlPoiPc0Z5uA,41529
+vllm/model_executor/models/qwen.py,sha256=7_1VoLjiSGdr2pr9VnRfZutY9HH82IiIc9K7WYXNx20,41502
+vllm/model_executor/models/qwen2.py,sha256=a6RdZmM2ecaxM3vP8lG0Ogm3_Phg9cQRgzeZwCYkLco,23047
+vllm/model_executor/models/qwen2_5_vl.py,sha256=EDFReNttsBng86dUewcWYazXxtGI5ch14viXgsL26Yo,46131
+vllm/model_executor/models/qwen2_audio.py,sha256=tY9YFLh_H1PNUwMyiBNrjlWgHbN2qSPihly8JNDUPjg,17617
+vllm/model_executor/models/qwen2_moe.py,sha256=em1sY2Z2TPNqjTpxoZ-HeVdljoEGMh0D1ovhtpAZ_t4,23132
+vllm/model_executor/models/qwen2_rm.py,sha256=aqgmbfEvQ999rhjskXQxhNyNvBkT9K_fYJjJ7fA7QYI,4884
+vllm/model_executor/models/qwen2_vl.py,sha256=yNtb6CsQWIDoPo8GtiIyO2e9CdxpgIiRWgEiEdKxvE8,52595
+vllm/model_executor/models/registry.py,sha256=SSdZR3zT3la_3QnGb9dTie_SRv3b3kudYBCDJ5FwkP4,20528
+vllm/model_executor/models/roberta.py,sha256=gvbrPs3nYsBqRmagCPygSYAPIixQCXHXiblEYZudkgg,10719
+vllm/model_executor/models/siglip.py,sha256=dFgb6-AAp4I-EbSb3Lbpgv5NzfhFo4yX5G9wZEbnrNg,23509
+vllm/model_executor/models/solar.py,sha256=LKrf1z-QSVbUX-JtUMEeh94MhoaQnWuPMyc6NKRsgD4,21015
+vllm/model_executor/models/stablelm.py,sha256=8gTWWqAQeYbukECa6Nz5g7-ftMq6Q1aL0BPYCNQf5GU,15989
+vllm/model_executor/models/starcoder2.py,sha256=bGwDgB5s0syaAMn2Rc-dSrZQIca3VKY3PFgi5xVZFBE,15298
+vllm/model_executor/models/telechat2.py,sha256=wtbgsiw2c21C3H4Db5LYi_DIqRJXpZhO9thMUWoR1zw,5829
+vllm/model_executor/models/transformers.py,sha256=-N6Cg6F6PbxwXCoZ2Jqh_9h5_tuNFwPLED-q2iZteyQ,10459
+vllm/model_executor/models/ultravox.py,sha256=XK7w7IGLIpPKHvHenHrriM4xFdJufVgk92NFc1M5anA,22630
+vllm/model_executor/models/utils.py,sha256=z9SuSqehkN2wDmTRHkzY3S71MYWTTIgdx9EQN7wViGk,21546
+vllm/model_executor/models/vision.py,sha256=LkaAo0Re1iMZI41JxAsUshVeTZU6OKv5t2BcPJrEFi0,5554
+vllm/model_executor/models/whisper.py,sha256=zdqLtrWZOGsqLI09gh2WqCqrdyYRY79jNYVnG3jwJf4,28179
+vllm/model_executor/parameter.py,sha256=SAO196rVPbQycr1CixtAROIC2SHFXuki2Q0ykAL5uLo,15753
+vllm/model_executor/pooling_metadata.py,sha256=FeDxnEg8W8-ZOtg17JE1z26RfrKaU2_ZaWvwkochqKA,2077
+vllm/model_executor/sampling_metadata.py,sha256=bnNL-A_4DtG3alnqi38kAtOXywlDEAqdm_GPF86s_7E,22967
+vllm/model_executor/utils.py,sha256=o1nKePmbzfAwPqVqXJbOGwDUXy_fLUGaFZlsPEbMhpI,1915
+vllm/multimodal/__init__.py,sha256=3iq4bjqOr0U4LC7kLvqIH-xdlIeCzn8lmmMLU_ecfO8,967
+vllm/multimodal/__pycache__/__init__.cpython-311.pyc,,
+vllm/multimodal/__pycache__/audio.cpython-311.pyc,,
+vllm/multimodal/__pycache__/base.cpython-311.pyc,,
+vllm/multimodal/__pycache__/hasher.cpython-311.pyc,,
+vllm/multimodal/__pycache__/image.cpython-311.pyc,,
+vllm/multimodal/__pycache__/inputs.cpython-311.pyc,,
+vllm/multimodal/__pycache__/parse.cpython-311.pyc,,
+vllm/multimodal/__pycache__/processing.cpython-311.pyc,,
+vllm/multimodal/__pycache__/profiling.cpython-311.pyc,,
+vllm/multimodal/__pycache__/registry.cpython-311.pyc,,
+vllm/multimodal/__pycache__/utils.cpython-311.pyc,,
+vllm/multimodal/__pycache__/video.cpython-311.pyc,,
+vllm/multimodal/audio.py,sha256=Dt6wQhAOnB0Di_4jMTU8n20GtYq99LwBBM1Tgz94trg,2092
+vllm/multimodal/base.py,sha256=yuCE4JDh76aWVrKlz7ZY_eNAkuACq6FAZ8teabukYyM,15964
+vllm/multimodal/hasher.py,sha256=qUzT2Oz-4wTcTWuA8fHz4kGDk1k7HRunBZvhHIaOYRg,2838
+vllm/multimodal/image.py,sha256=NGxSAXwQnE5AqlZGrkaIFvWIePvzaJPZ4YquVfxpyzc,4529
+vllm/multimodal/inputs.py,sha256=6DpfaJ9KTV5PjaHUjHBo4jCdwePUuBWovwUu8YdMKt4,23667
+vllm/multimodal/parse.py,sha256=_-kJEvSQsHWOERLTWQ4e51frKBfnkA1FmH30pB6G4Fw,11359
+vllm/multimodal/processing.py,sha256=NrPx-WNjFYM-D9OzK1xxPwIucTvo12nxvp9K4pUvd18,43358
+vllm/multimodal/profiling.py,sha256=SkEw8ObL3FpXZ2BcybQOqYJMxTMJg58PcWTvo_Vj5bY,6857
+vllm/multimodal/registry.py,sha256=827zcATMEtSyPTaSMkPq7152OpwHglXlxfw7FOMjMOQ,16266
+vllm/multimodal/utils.py,sha256=vEgQmhIcR5hJQy9u0h0ilJ_zWTMyj_Q0jAalC369cRE,17118
+vllm/multimodal/video.py,sha256=33mntwtUdLkfn5780H5oRHBBA1rK3cMPrpBZ99oj_U8,6104
+vllm/outputs.py,sha256=VhYOTyo7-ZUK7k808oOycsq8wUq046hR45U3ZwhDgTE,19881
+vllm/platforms/__init__.py,sha256=ffMBLIbsJ4QgJ9mfglnDtXoJXWu1Y7CqIo0wNmUqoJk,7332
+vllm/platforms/__pycache__/__init__.cpython-311.pyc,,
+vllm/platforms/__pycache__/cpu.cpython-311.pyc,,
+vllm/platforms/__pycache__/cuda.cpython-311.pyc,,
+vllm/platforms/__pycache__/hpu.cpython-311.pyc,,
+vllm/platforms/__pycache__/interface.cpython-311.pyc,,
+vllm/platforms/__pycache__/neuron.cpython-311.pyc,,
+vllm/platforms/__pycache__/openvino.cpython-311.pyc,,
+vllm/platforms/__pycache__/rocm.cpython-311.pyc,,
+vllm/platforms/__pycache__/tpu.cpython-311.pyc,,
+vllm/platforms/__pycache__/xpu.cpython-311.pyc,,
+vllm/platforms/cpu.py,sha256=32LYr4_vfIafXtkjLLUx63y8s35hdqdgzWPgwMGxA3k,5572
+vllm/platforms/cuda.py,sha256=3Dvm9hivXW8ePq7Q7bsip3Fo1x2hWFROfLN4PnrxwMA,14593
+vllm/platforms/hpu.py,sha256=1SIy0lqR-Sb7z6WXRMwQwAZ4HzHzqU0WfkzAUsUk4zg,3351
+vllm/platforms/interface.py,sha256=cwJcvbKOEgoB2rGj_O6a7k45sdYkieLcsCUMys_3g14,9705
+vllm/platforms/neuron.py,sha256=AAJ_0_xoC_v5zo3YrHtSDJAxgyrT3mribpNZVurND90,1817
+vllm/platforms/openvino.py,sha256=ZYlSScMIf-pED-2AYCc5-8w5Z86F36A4au6ZbpcPlCo,5885
+vllm/platforms/rocm.py,sha256=JYjqIyPuhh5SUagDdyrm69EFvlxI17fIgKD-YpAEelU,6617
+vllm/platforms/tpu.py,sha256=lhRE9sYuvSjplOH3H1XnBdT_LHqxTt42bXSK-RwEiU0,3472
+vllm/platforms/xpu.py,sha256=c2Ym0nVkfZtdF6ehzA7w7k-jVxsQBZdiKr0W5eHGsT4,5352
+vllm/plugins/__init__.py,sha256=eZT4FNCwDJhjMADo5KgKBaGLd_7YR9LzhFgnLuwhAnc,2962
+vllm/plugins/__pycache__/__init__.cpython-311.pyc,,
+vllm/pooling_params.py,sha256=zotcVMzPjm46lxg9fAcCFf2goFqbf6cySIAEg1fnVdo,751
+vllm/profiler/__init__.py,sha256=GWIaNWjYFXuIGl4qCtZyhz-Z88ysZH-mz4G7pYKOe78,128
+vllm/profiler/__pycache__/__init__.cpython-311.pyc,,
+vllm/profiler/__pycache__/layerwise_profile.cpython-311.pyc,,
+vllm/profiler/__pycache__/utils.cpython-311.pyc,,
+vllm/profiler/layerwise_profile.py,sha256=WIh-o0z6si4ua2sCvqRjs4OekSY5PvP7XPgBLud9DTI,13837
+vllm/profiler/utils.py,sha256=itpGYyhLXCQ07nT_dL7seAPJyDlPv9f2Dg6Grh5lpAE,4663
+vllm/prompt_adapter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vllm/prompt_adapter/__pycache__/__init__.cpython-311.pyc,,
+vllm/prompt_adapter/__pycache__/layers.cpython-311.pyc,,
+vllm/prompt_adapter/__pycache__/models.cpython-311.pyc,,
+vllm/prompt_adapter/__pycache__/request.cpython-311.pyc,,
+vllm/prompt_adapter/__pycache__/utils.cpython-311.pyc,,
+vllm/prompt_adapter/__pycache__/worker_manager.cpython-311.pyc,,
+vllm/prompt_adapter/layers.py,sha256=aoeVO5L_abEwB92Groio_5lB9al37OiEJac_GkSuCko,2726
+vllm/prompt_adapter/models.py,sha256=D1hawhlVLFoa0a6FFIRBsWLYVgEbtaT2WCkSTgB4PLc,13721
+vllm/prompt_adapter/request.py,sha256=2_iVKLKdWE3kbt7ja_wPHhCerMqVXlg6ztDw344Yd6I,838
+vllm/prompt_adapter/utils.py,sha256=fMMwK-mzuhT9Wu9araO0rSdtNkAmTNvsCAfQXfOkWQk,3668
+vllm/prompt_adapter/worker_manager.py,sha256=qMEPVkdg2_L4bYSIBg_XPEM5As8UgrPrgudoiG4kEAE,7536
+vllm/py.typed,sha256=F5LUrt0voM87SNuuOky2X9veCVDqJUgRg_VohYqDigY,65
+vllm/sampling_params.py,sha256=JWzBhdW6anpuRdlWY7BXLLHsTKje1EhVGI3ov744qao,22364
+vllm/scalar_type.py,sha256=hW0VB2_F5_EFO7tDLDkdN4LrENiynZsa3D3zSndp9kc,11806
+vllm/scripts.py,sha256=6EJGP0l5nYAkS9kOFBfNuhZ8BCjCPPCPi-WvYmymqqc,7249
+vllm/sequence.py,sha256=DfBfbyQbGYiuQbcV6b9NS_XUa9U8VcKWkgCX1aCtlEM,57947
+vllm/spec_decode/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vllm/spec_decode/__pycache__/__init__.cpython-311.pyc,,
+vllm/spec_decode/__pycache__/batch_expansion.cpython-311.pyc,,
+vllm/spec_decode/__pycache__/draft_model_runner.cpython-311.pyc,,
+vllm/spec_decode/__pycache__/interfaces.cpython-311.pyc,,
+vllm/spec_decode/__pycache__/medusa_worker.cpython-311.pyc,,
+vllm/spec_decode/__pycache__/metrics.cpython-311.pyc,,
+vllm/spec_decode/__pycache__/mlp_speculator_worker.cpython-311.pyc,,
+vllm/spec_decode/__pycache__/mqa_scorer.cpython-311.pyc,,
+vllm/spec_decode/__pycache__/multi_step_worker.cpython-311.pyc,,
+vllm/spec_decode/__pycache__/ngram_worker.cpython-311.pyc,,
+vllm/spec_decode/__pycache__/proposer_worker_base.cpython-311.pyc,,
+vllm/spec_decode/__pycache__/smaller_tp_proposer_worker.cpython-311.pyc,,
+vllm/spec_decode/__pycache__/spec_decode_worker.cpython-311.pyc,,
+vllm/spec_decode/__pycache__/target_model_runner.cpython-311.pyc,,
+vllm/spec_decode/__pycache__/top1_proposer.cpython-311.pyc,,
+vllm/spec_decode/__pycache__/util.cpython-311.pyc,,
+vllm/spec_decode/batch_expansion.py,sha256=_n2h_IcPtL1XhAEVtv0pOPNPvf9GDqfojdZhCAneUjk,22740
+vllm/spec_decode/draft_model_runner.py,sha256=LDc_sNHBkkxTkocuFBgeA4Tdi62oAciZOH4tGG02Dxw,13973
+vllm/spec_decode/interfaces.py,sha256=_hnecfFBgi-_wKMIxPobP-DRaefD769WqethORvcofA,3086
+vllm/spec_decode/medusa_worker.py,sha256=liPn4iIMqJ8owL4cXgrDTVb3bjpUKBlqeNDlFlwEvjc,4900
+vllm/spec_decode/metrics.py,sha256=zCNetl5Ajwk4e1Yl6cig45GQU-lIwwz3FYBtzTGimws,8062
+vllm/spec_decode/mlp_speculator_worker.py,sha256=NuOIAQtZxCWPs7YXWRJW3KwFCT4i3ETCobsA0gK-qVs,3737
+vllm/spec_decode/mqa_scorer.py,sha256=GdhVN2Ef3L_LlnoftqivY0rRKgPF7EV81XUu3PyMouo,7508
+vllm/spec_decode/multi_step_worker.py,sha256=GtoiHk_XleDafQCIeE-JlfYJOFEU320rn7gI7Z3-qRQ,18013
+vllm/spec_decode/ngram_worker.py,sha256=rljyKSx-odVLsGVG6U6H53P4T8U6fblPyAM3xJOaNaI,7754
+vllm/spec_decode/proposer_worker_base.py,sha256=Iv_xg7F5o3hCCRFNj84BR_mpWy9K4xAUKrLLcK-tzLo,2089
+vllm/spec_decode/smaller_tp_proposer_worker.py,sha256=WY29xqghYzyX7mFS-s4zBxBjqRguxUqCWZEK7fUXbUw,6175
+vllm/spec_decode/spec_decode_worker.py,sha256=5DL4UUwMukkQ-YHdZwH1y3y_QHjgoZ7-1DOjvG9xh8w,60601
+vllm/spec_decode/target_model_runner.py,sha256=sRW1sqPhEdEwL5MvHCRRmd-lmc-G8LjJgG7R22R6JM0,2073
+vllm/spec_decode/top1_proposer.py,sha256=sEvZMEoj_s9-aXzkX6faUZuffoPtxATuMbEmRnZKecM,12354
+vllm/spec_decode/util.py,sha256=Btt4tyWuZUeOdIScKIDwD5xUJSeTNZwjbFiQtkF5oVU,9684
+vllm/tracing.py,sha256=fR1aDoun27YzEPBuhmb3Namm4DNvXsunqQ8X9v_2YOw,4749
+vllm/transformers_utils/__init__.py,sha256=RZEL-BvlcJuQJFMPZLM-LjViwZxSsV2o8Tnx7strgDU,617
+vllm/transformers_utils/__pycache__/__init__.cpython-311.pyc,,
+vllm/transformers_utils/__pycache__/config.cpython-311.pyc,,
+vllm/transformers_utils/__pycache__/detokenizer.cpython-311.pyc,,
+vllm/transformers_utils/__pycache__/detokenizer_utils.cpython-311.pyc,,
+vllm/transformers_utils/__pycache__/processor.cpython-311.pyc,,
+vllm/transformers_utils/__pycache__/s3_utils.cpython-311.pyc,,
+vllm/transformers_utils/__pycache__/tokenizer.cpython-311.pyc,,
+vllm/transformers_utils/__pycache__/utils.cpython-311.pyc,,
+vllm/transformers_utils/config.py,sha256=iVoBL_qKY4syyfb0UyelFZsO6O9bD5b6fmAVxK5daQ4,22470
+vllm/transformers_utils/configs/__init__.py,sha256=Jmr0QgIrZCL_NtbUY5e-seSzJs4CpDwpak2NFhb2OrI,1993
+vllm/transformers_utils/configs/__pycache__/__init__.cpython-311.pyc,,
+vllm/transformers_utils/configs/__pycache__/arctic.cpython-311.pyc,,
+vllm/transformers_utils/configs/__pycache__/chatglm.cpython-311.pyc,,
+vllm/transformers_utils/configs/__pycache__/cohere2.cpython-311.pyc,,
+vllm/transformers_utils/configs/__pycache__/dbrx.cpython-311.pyc,,
+vllm/transformers_utils/configs/__pycache__/deepseek_vl2.cpython-311.pyc,,
+vllm/transformers_utils/configs/__pycache__/eagle.cpython-311.pyc,,
+vllm/transformers_utils/configs/__pycache__/exaone.cpython-311.pyc,,
+vllm/transformers_utils/configs/__pycache__/falcon.cpython-311.pyc,,
+vllm/transformers_utils/configs/__pycache__/h2ovl.cpython-311.pyc,,
+vllm/transformers_utils/configs/__pycache__/internvl.cpython-311.pyc,,
+vllm/transformers_utils/configs/__pycache__/jais.cpython-311.pyc,,
+vllm/transformers_utils/configs/__pycache__/medusa.cpython-311.pyc,,
+vllm/transformers_utils/configs/__pycache__/mllama.cpython-311.pyc,,
+vllm/transformers_utils/configs/__pycache__/mlp_speculator.cpython-311.pyc,,
+vllm/transformers_utils/configs/__pycache__/mpt.cpython-311.pyc,,
+vllm/transformers_utils/configs/__pycache__/nemotron.cpython-311.pyc,,
+vllm/transformers_utils/configs/__pycache__/nvlm_d.cpython-311.pyc,,
+vllm/transformers_utils/configs/__pycache__/olmo2.cpython-311.pyc,,
+vllm/transformers_utils/configs/__pycache__/solar.cpython-311.pyc,,
+vllm/transformers_utils/configs/__pycache__/telechat2.cpython-311.pyc,,
+vllm/transformers_utils/configs/__pycache__/ultravox.cpython-311.pyc,,
+vllm/transformers_utils/configs/arctic.py,sha256=_c6snrc6ypgzlZmylCZD8bNNhqFSuppKDlDTeWLzdLE,8990
+vllm/transformers_utils/configs/chatglm.py,sha256=6H5Hv6Z_yziBZG9q4N_0Obj6eHsGL9DrxQeBhkLsZ9Y,2870
+vllm/transformers_utils/configs/cohere2.py,sha256=yeLdn79NO2kyuYH9IqRAO0WGbMJCh5ntd1jabtZaWJs,10353
+vllm/transformers_utils/configs/dbrx.py,sha256=d0xE5WH999Pxqp7v1MXJzmk10cwp0UMOOBADD8bO5rU,10957
+vllm/transformers_utils/configs/deepseek_vl2.py,sha256=QICTzlaoLil0UUXc0fwgl-CImEPpjOnTy1dAUweKLDw,7252
+vllm/transformers_utils/configs/eagle.py,sha256=er_MAjFQmUxBh3qeCcGIUWlqel9PoRQbsO1VFQ4boLI,1593
+vllm/transformers_utils/configs/exaone.py,sha256=o82MCDMCtD8cHvrPzJroEpJV5e4xuVK3yzlHBq4IjvE,8883
+vllm/transformers_utils/configs/falcon.py,sha256=1w9gXJJPzvup7Hd05O1xYzp_IDXDdtxByt06U92uy7c,2917
+vllm/transformers_utils/configs/h2ovl.py,sha256=Tsyex8PgWS_WEuzgTZ9vGcgn7Pv1m0yJDs64Q2meT_Q,489
+vllm/transformers_utils/configs/internvl.py,sha256=hqm1INrEucyhhPKQhwRiwEZ6Ulw1gvnFIw1EISnE5QI,1867
+vllm/transformers_utils/configs/jais.py,sha256=ZSi5YaOVeramO6DOZ5nj7nQwsE20Sxgl6ZxhAFPAdys,10359
+vllm/transformers_utils/configs/medusa.py,sha256=2gSa-OtMNHi2eL_AJDgbslqb9ntcg9fRfhhgRPGoxr0,1943
+vllm/transformers_utils/configs/mllama.py,sha256=lIkiJ83huJQq7kLAV2hfLWAcUxWVT9aa1YAcSRUzz1Y,805
+vllm/transformers_utils/configs/mlp_speculator.py,sha256=MgeWpPARW5jwb8Nw1hnZaqJbdDdBOc_a_ESTeRy3O8g,2437
+vllm/transformers_utils/configs/mpt.py,sha256=zLtFoXM4PKJK67mQoeBOwLQrT4wR-zdEMYAsGrHV108,7589
+vllm/transformers_utils/configs/nemotron.py,sha256=xw--8lmM5VzLM6Nfyha4vaeRvVYh5v3bjrAP_Z010nk,8974
+vllm/transformers_utils/configs/nvlm_d.py,sha256=2Mr9ZAI6VG0DbLDw0BnFEIgeiZd7ip3bSoVsfcEeNqQ,458
+vllm/transformers_utils/configs/olmo2.py,sha256=d_YFuCwGdWR6-P7oQA2LS5XR-vD6TEUByo7qnn0Ez3Q,8075
+vllm/transformers_utils/configs/solar.py,sha256=y5b9R4mQXdgi-fUv2ZqMIFouW7P57lT5nppn54aCOuo,10841
+vllm/transformers_utils/configs/telechat2.py,sha256=JsOuzKHPQHqtJBZNi27dtwc-FWelsQ9GlmORN2AubPE,2200
+vllm/transformers_utils/configs/ultravox.py,sha256=14glDZ9IVoGMt-DMp_O2NeUDhKge_xHDRKamvuoqSS8,4119
+vllm/transformers_utils/detokenizer.py,sha256=tjD1mplMeTvXrFA25A8tYJr3pDezESoaM3m88N7D8HY,7250
+vllm/transformers_utils/detokenizer_utils.py,sha256=qrDj9iKcbjtMYOE12wnFkWccFmVTRB9W8W6BkqzBItg,6769
+vllm/transformers_utils/processor.py,sha256=0Qh4moGATpwqlDrGExHi8c7HoLDAIJq_MT5bQPtGwIw,3618
+vllm/transformers_utils/processors/__init__.py,sha256=WloJ524I5uG04zlyJVWoPtDGVzlRvWpVsuwcczjOM3o,165
+vllm/transformers_utils/processors/__pycache__/__init__.cpython-311.pyc,,
+vllm/transformers_utils/processors/__pycache__/deepseek_vl2.cpython-311.pyc,,
+vllm/transformers_utils/processors/deepseek_vl2.py,sha256=BI8GbCxTl1mkkQ-f3NINzEa-fJ4o78_JlIGn5Ba90So,14598
+vllm/transformers_utils/s3_utils.py,sha256=KZEHwQjQcn1ph8kxWbd0Wzb2lnqYGzeiQsLssFRy9-E,4733
+vllm/transformers_utils/tokenizer.py,sha256=6BxkoQv9lxjOsqbo3pK3U0kN81cFgTT9qLdNogb-RX4,8905
+vllm/transformers_utils/tokenizer_group/__init__.py,sha256=LKgVU0nyJxinK3lJqSmcrGSV0WRsh208y-C0nnZFhLE,2455
+vllm/transformers_utils/tokenizer_group/__pycache__/__init__.cpython-311.pyc,,
+vllm/transformers_utils/tokenizer_group/__pycache__/base_tokenizer_group.cpython-311.pyc,,
+vllm/transformers_utils/tokenizer_group/__pycache__/ray_tokenizer_group.cpython-311.pyc,,
+vllm/transformers_utils/tokenizer_group/__pycache__/tokenizer_group.cpython-311.pyc,,
+vllm/transformers_utils/tokenizer_group/base_tokenizer_group.py,sha256=PMkdfJWPsuhf6pHuLZ_vNWbIV5DZE6WxKXU1Qh_WaYQ,2055
+vllm/transformers_utils/tokenizer_group/ray_tokenizer_group.py,sha256=KnFwojkzriWCVXdauEYhpeqratpl_JuLadhxUnceFT8,9807
+vllm/transformers_utils/tokenizer_group/tokenizer_group.py,sha256=BEDWo8fgUtTqrwPFThK2h7uHP49ea9wUmLZeHCbVTYk,4569
+vllm/transformers_utils/tokenizers/__init__.py,sha256=c92zhoHIgS-rSpv2_mR9Rm5AotVZuDaDDUPDjXrUfNA,167
+vllm/transformers_utils/tokenizers/__pycache__/__init__.cpython-311.pyc,,
+vllm/transformers_utils/tokenizers/__pycache__/mistral.cpython-311.pyc,,
+vllm/transformers_utils/tokenizers/mistral.py,sha256=8sDRsMV421m92VU15svs_PaK3O5ltmpQRMthBwV0K6Y,15297
+vllm/transformers_utils/utils.py,sha256=dNXEHooc1Fs62-goF5Zri6u4Qa6fmQrHwvtknVdlcPc,529
+vllm/triton_utils/__init__.py,sha256=wDNYu8rRvsNfoosE-FXjcEtbXRuteh6X_nxdIMVe5xk,281
+vllm/triton_utils/__pycache__/__init__.cpython-311.pyc,,
+vllm/triton_utils/__pycache__/custom_cache_manager.cpython-311.pyc,,
+vllm/triton_utils/__pycache__/importing.cpython-311.pyc,,
+vllm/triton_utils/custom_cache_manager.py,sha256=M6Ee0uBVqN9HXlp_qKdZ7Wqat14sSqnkErO8wrYeUsA,2265
+vllm/triton_utils/importing.py,sha256=_ADlYGmmMMVaUX6K97C94FjaKKywGPTgZAgGzSDpTKw,450
+vllm/usage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vllm/usage/__pycache__/__init__.cpython-311.pyc,,
+vllm/usage/__pycache__/usage_lib.cpython-311.pyc,,
+vllm/usage/usage_lib.py,sha256=g-hS5j3g56lK7lAp3hqEFuMzwo1YLzsyu6aoSDPuolU,8421
+vllm/utils.py,sha256=6rx8FC8FRs878Ubl-qlykSahb7JHWHEyXc5nXiZOQjA,75830
+vllm/v1/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vllm/v1/__pycache__/__init__.cpython-311.pyc,,
+vllm/v1/__pycache__/kv_cache_interface.cpython-311.pyc,,
+vllm/v1/__pycache__/outputs.cpython-311.pyc,,
+vllm/v1/__pycache__/request.cpython-311.pyc,,
+vllm/v1/__pycache__/serial_utils.cpython-311.pyc,,
+vllm/v1/__pycache__/utils.cpython-311.pyc,,
+vllm/v1/attention/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vllm/v1/attention/__pycache__/__init__.cpython-311.pyc,,
+vllm/v1/attention/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vllm/v1/attention/backends/__pycache__/__init__.cpython-311.pyc,,
+vllm/v1/attention/backends/__pycache__/flash_attn.cpython-311.pyc,,
+vllm/v1/attention/backends/flash_attn.py,sha256=GUSuwNWJF3SVlvAysNXca_qAWId-OWHjn2_CJKUsEeg,17304
+vllm/v1/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vllm/v1/core/__pycache__/__init__.cpython-311.pyc,,
+vllm/v1/core/__pycache__/encoder_cache_manager.cpython-311.pyc,,
+vllm/v1/core/__pycache__/kv_cache_manager.cpython-311.pyc,,
+vllm/v1/core/__pycache__/kv_cache_utils.cpython-311.pyc,,
+vllm/v1/core/__pycache__/scheduler.cpython-311.pyc,,
+vllm/v1/core/encoder_cache_manager.py,sha256=OFTBwqzbGCUFFj6A4zptePajUAW0EGnIGLl3R7PD64U,4624
+vllm/v1/core/kv_cache_manager.py,sha256=o7c8wg3S5FlNx3LMWnng24dXmiZ6KZYu44x_sGwQCkA,21371
+vllm/v1/core/kv_cache_utils.py,sha256=hNS-7Uk13dFjH6U6T3_qca3TwxFO7o0oQL7SM25qnNQ,17248
+vllm/v1/core/scheduler.py,sha256=8HHIGBkanR7HLqwut7_VdJH796e8U1SSLfiHaqpty8U,27167
+vllm/v1/engine/__init__.py,sha256=eIUx6-vw8oJHeWN94t5Bb0aAFGcuJjCHVDYq75hOrSE,3024
+vllm/v1/engine/__pycache__/__init__.cpython-311.pyc,,
+vllm/v1/engine/__pycache__/async_llm.cpython-311.pyc,,
+vllm/v1/engine/__pycache__/core.cpython-311.pyc,,
+vllm/v1/engine/__pycache__/core_client.cpython-311.pyc,,
+vllm/v1/engine/__pycache__/detokenizer.cpython-311.pyc,,
+vllm/v1/engine/__pycache__/llm_engine.cpython-311.pyc,,
+vllm/v1/engine/__pycache__/mm_input_mapper.cpython-311.pyc,,
+vllm/v1/engine/__pycache__/output_processor.cpython-311.pyc,,
+vllm/v1/engine/__pycache__/processor.cpython-311.pyc,,
+vllm/v1/engine/async_llm.py,sha256=UyBKtkjUome0Vr-bu4nDPQ5zF3r7JdP3OgssAtX98sM,14239
+vllm/v1/engine/core.py,sha256=rFvfX7yzOkxdiy07iIx8GFM8OjyEyBTT8znzikVHvQg,11527
+vllm/v1/engine/core_client.py,sha256=uksWk6uoT0pC0E2QucXHfq7h0ZIL5DRXzFCS1kXTT8s,10829
+vllm/v1/engine/detokenizer.py,sha256=w5d3ipzDdg8OoWySfbNRUiT56UCW74hX7wZr0CNIylU,6500
+vllm/v1/engine/llm_engine.py,sha256=-vAz89iFujkAnj83SPjE1W4U4eAPVBazDLqrbJRIdtU,7092
+vllm/v1/engine/mm_input_mapper.py,sha256=7Oc89E9scGXKlqa92bXT8sf9kqgjMSzDCQo2y97M4Ok,5128
+vllm/v1/engine/output_processor.py,sha256=Xn6Nx554mFBUrzk2EE7eFsriSdAoYv8-nREnKFZ099Q,7767
+vllm/v1/engine/processor.py,sha256=rGfj0-zy_OvepX8WzEpni-tQKiR1GvFahatqhVm5Ja0,9801
+vllm/v1/executor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vllm/v1/executor/__pycache__/__init__.cpython-311.pyc,,
+vllm/v1/executor/__pycache__/abstract.cpython-311.pyc,,
+vllm/v1/executor/__pycache__/multiproc_executor.cpython-311.pyc,,
+vllm/v1/executor/abstract.py,sha256=PMxiTgz9aFSMXhg5RvJ3dJI3NYXz4LyrhA1sChoL5L0,3624
+vllm/v1/executor/multiproc_executor.py,sha256=tVOolD_D3i5i9rg6uoZreu766WAopVghvTQ_aQqunlo,14125
+vllm/v1/kv_cache_interface.py,sha256=0-n-04vufyNiDB6RSHAR3QYUbB8UlBWT0Yln1TS9Xco,3412
+vllm/v1/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vllm/v1/metrics/__pycache__/__init__.cpython-311.pyc,,
+vllm/v1/metrics/__pycache__/loggers.cpython-311.pyc,,
+vllm/v1/metrics/__pycache__/stats.cpython-311.pyc,,
+vllm/v1/metrics/loggers.py,sha256=AdcLH_19T-rMWE3Nzy9gvuosoy2tJtF7Qxkvl8cc7Pg,8451
+vllm/v1/metrics/stats.py,sha256=OFkm1fypbjqfjp7QrP66wnb8rxwf36HqWBJsxxoGexE,2847
+vllm/v1/outputs.py,sha256=GdsZ06OLJdYeZma5ojv8suUoIibGKDsqPKIgjWVBtIA,1007
+vllm/v1/request.py,sha256=yUzy_DzOFybElnpNdTc7GLSJd9Dch2n-bEZtTx21tWM,6009
+vllm/v1/sample/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vllm/v1/sample/__pycache__/__init__.cpython-311.pyc,,
+vllm/v1/sample/__pycache__/metadata.cpython-311.pyc,,
+vllm/v1/sample/__pycache__/sampler.cpython-311.pyc,,
+vllm/v1/sample/metadata.py,sha256=e76Oop4JBSEn1P7MQ7UrQRkn2uaNvjo6SeG1iRJT8Zs,683
+vllm/v1/sample/ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vllm/v1/sample/ops/__pycache__/__init__.cpython-311.pyc,,
+vllm/v1/sample/ops/__pycache__/penalties.cpython-311.pyc,,
+vllm/v1/sample/ops/__pycache__/topk_topp_sampler.cpython-311.pyc,,
+vllm/v1/sample/ops/penalties.py,sha256=dFk7rV5xKo9Aq0vhKa41FXOn0EYT8aLlqwR2gMgWWzs,2261
+vllm/v1/sample/ops/topk_topp_sampler.py,sha256=WYuJV4RL-FJfqHzPG4B7tT2fMZRiA1YHeRRrbpInySk,7692
+vllm/v1/sample/sampler.py,sha256=lZnzSzCINYH0BuP-9ZjE5UD2b26JjZCiIqO9CVQTgjY,5069
+vllm/v1/serial_utils.py,sha256=Kzrt_ObsfIbLJC-0jzlvijPtD1sCkMvPerJUXGVH8JA,200
+vllm/v1/stats/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vllm/v1/stats/__pycache__/__init__.cpython-311.pyc,,
+vllm/v1/stats/__pycache__/common.cpython-311.pyc,,
+vllm/v1/stats/common.py,sha256=4IZaANJVZZtNAngonteN9erA4wP4WirBLj0D8CKY1Aw,17243
+vllm/v1/utils.py,sha256=jSi9kVGceXsqPMyTiHhWwmSP0K6qoivzC0rIUvks_bA,5939
+vllm/v1/worker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vllm/v1/worker/__pycache__/__init__.cpython-311.pyc,,
+vllm/v1/worker/__pycache__/block_table.cpython-311.pyc,,
+vllm/v1/worker/__pycache__/gpu_input_batch.cpython-311.pyc,,
+vllm/v1/worker/__pycache__/gpu_model_runner.cpython-311.pyc,,
+vllm/v1/worker/__pycache__/gpu_worker.cpython-311.pyc,,
+vllm/v1/worker/block_table.py,sha256=VkT1mhAkgWH-lzsRYqrXR3FCDM2p2xDtCnt-iHqjzQk,2492
+vllm/v1/worker/gpu_input_batch.py,sha256=fWlovFiHVduCfnfUaqyOHHYrndJmEHzM0L8vefEvsEQ,18285
+vllm/v1/worker/gpu_model_runner.py,sha256=xAEnV87iFAWRn7nOWFUBwRXLRpbB57XK4ZChQ3kA2Do,52931
+vllm/v1/worker/gpu_worker.py,sha256=QSMmIANmkc7YwPBpgsh6L_99eyT6fhWasbp6GDS2O5Y,12029
+vllm/version.py,sha256=FezMCgn-HcEBLdlhPHeLde8CpiHzYM-h0vERNcjoo4g,335
+vllm/vllm_flash_attn/__init__.py,sha256=TmGCpoLn69L1Yvx5uvJ2CEEQdwgPsoHT5kUa1tI4Qs4,309
+vllm/vllm_flash_attn/__pycache__/__init__.cpython-311.pyc,,
+vllm/vllm_flash_attn/__pycache__/flash_attn_interface.cpython-311.pyc,,
+vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so,sha256=HQRmcexu55HoazXbE7Dh8OYp_uzC1JvoLu2TgX8paSE,220721128
+vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so,sha256=YyUAc9Mt0xrz3YLKaCWq269FYsnEzZM2rbhvkfPU4UM,275990736
+vllm/vllm_flash_attn/flash_attn_interface.py,sha256=3H1Ret-yAcom5BAUF2fRbqEcwMA3O3Jto51p7fSzBOA,24016
+vllm/worker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vllm/worker/__pycache__/__init__.cpython-311.pyc,,
+vllm/worker/__pycache__/cache_engine.cpython-311.pyc,,
+vllm/worker/__pycache__/cpu_enc_dec_model_runner.cpython-311.pyc,,
+vllm/worker/__pycache__/cpu_model_runner.cpython-311.pyc,,
+vllm/worker/__pycache__/cpu_pooling_model_runner.cpython-311.pyc,,
+vllm/worker/__pycache__/cpu_worker.cpython-311.pyc,,
+vllm/worker/__pycache__/enc_dec_model_runner.cpython-311.pyc,,
+vllm/worker/__pycache__/hpu_model_runner.cpython-311.pyc,,
+vllm/worker/__pycache__/hpu_worker.cpython-311.pyc,,
+vllm/worker/__pycache__/model_runner.cpython-311.pyc,,
+vllm/worker/__pycache__/model_runner_base.cpython-311.pyc,,
+vllm/worker/__pycache__/multi_step_model_runner.cpython-311.pyc,,
+vllm/worker/__pycache__/multi_step_tpu_worker.cpython-311.pyc,,
+vllm/worker/__pycache__/multi_step_worker.cpython-311.pyc,,
+vllm/worker/__pycache__/neuron_model_runner.cpython-311.pyc,,
+vllm/worker/__pycache__/neuron_worker.cpython-311.pyc,,
+vllm/worker/__pycache__/openvino_model_runner.cpython-311.pyc,,
+vllm/worker/__pycache__/openvino_worker.cpython-311.pyc,,
+vllm/worker/__pycache__/pooling_model_runner.cpython-311.pyc,,
+vllm/worker/__pycache__/tpu_model_runner.cpython-311.pyc,,
+vllm/worker/__pycache__/tpu_worker.cpython-311.pyc,,
+vllm/worker/__pycache__/utils.cpython-311.pyc,,
+vllm/worker/__pycache__/worker.cpython-311.pyc,,
+vllm/worker/__pycache__/worker_base.cpython-311.pyc,,
+vllm/worker/__pycache__/xpu_model_runner.cpython-311.pyc,,
+vllm/worker/__pycache__/xpu_worker.cpython-311.pyc,,
+vllm/worker/cache_engine.py,sha256=cBpnZouy9kTiGWzg_WWLGaa9KMchC8UwyXkmvomUrc8,7050
+vllm/worker/cpu_enc_dec_model_runner.py,sha256=2OYtwp1Y3SZo13KZvSP7V7PjYn40i2ChR9tlx7-mW54,13135
+vllm/worker/cpu_model_runner.py,sha256=pwnqGJQgbAh8VEDXWI50PvuspTgtTbsWaDblkkp1BfY,28815
+vllm/worker/cpu_pooling_model_runner.py,sha256=yNxunKCqCIsbhHk-OSz3HxiBI85lE5vFFEkQ7BeTynM,5363
+vllm/worker/cpu_worker.py,sha256=acb-D-uEBwnwVfUTkHotXAs7EhdgV8Sd4b-dL6PO9NM,15547
+vllm/worker/enc_dec_model_runner.py,sha256=02qM0PJeZfMetmWJy8orlSX4TO4CFZHRRZFBIP9SkUU,22836
+vllm/worker/hpu_model_runner.py,sha256=eS8UYJK_J4_yR7U8YVuyEOexvwVGOAxdvTE_ivN9Z0Y,89277
+vllm/worker/hpu_worker.py,sha256=zi5AvpShrl7_rnHBwDTFqpkLhn6fszze-bDU3ScKa5s,21449
+vllm/worker/model_runner.py,sha256=x9KNwneFLCZo7wSrquMSSnG85WmInJKoJXtmRzPx3b4,89960
+vllm/worker/model_runner_base.py,sha256=FbU_tMPxO6u2AEgdupiE2q99FcxMpVYffDyuN0GHwzE,8558
+vllm/worker/multi_step_model_runner.py,sha256=F1XIpv7RcD_ZpF0jY0VvIpE_HUsl1dlMofEity8UjeA,39221
+vllm/worker/multi_step_tpu_worker.py,sha256=c6A1A6Aagis6VyskYP6jP35Mt5paZwDV2jn750Sn5LA,4450
+vllm/worker/multi_step_worker.py,sha256=90a_O9w3bmGHjs5rGVsNEbYzMMhNVZSqLLyOVAHCxk0,9416
+vllm/worker/neuron_model_runner.py,sha256=aY3Ch_9QlkdG564MmahfGr2HlQgSLWC7XHGQnpAsdVY,14883
+vllm/worker/neuron_worker.py,sha256=ecWBUuUjWcd8iLI-zW_pqJMqNhsYlaReZ3zdllUkfFQ,4983
+vllm/worker/openvino_model_runner.py,sha256=_rTyHFWZCIyhbMfBavoqfnjhBR_5si8JCNWFqPr1nEs,14612
+vllm/worker/openvino_worker.py,sha256=qziTiMKhiawK4-hiHBl9u3oFnlGZhWuJdj42pPxyzW0,24760
+vllm/worker/pooling_model_runner.py,sha256=HKIJcRjY-kkju4CQdT_j4Tc_-l4WjGr1O4zNwEF0-Qo,8739
+vllm/worker/tpu_model_runner.py,sha256=fT9HI5s-VPxT36zbUevyELbN4Ypqdj6-lb85-0cE-FY,40930
+vllm/worker/tpu_worker.py,sha256=w5a8-dHwdFScDGYxPR-etJPVyhatQSe_N5LJhmjS-m8,13206
+vllm/worker/utils.py,sha256=qDJF2qtK6Pyfa6JV_ZTpIUDYl-rwn3imoshuS7yXr_A,1918
+vllm/worker/worker.py,sha256=TkWZKnwV1ERL3iVgvSCYFu75BpphrTwmwnDI06BXCzk,24401
+vllm/worker/worker_base.py,sha256=XQE4XtlD5yQHuhJ6_GrXCJA7Ouq1THz76K9MRoXP_5s,23399
+vllm/worker/xpu_model_runner.py,sha256=znuq7NtPXsdwHNbhu0FmM0qAsezdKvp8ahaea5eRRO4,25580
+vllm/worker/xpu_worker.py,sha256=ocSjanfjalPQy91LN2hX_SDbjQKxVxTr-PH8mJqIn6M,7929
diff --git a/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/REQUESTED b/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/REQUESTED
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/WHEEL b/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/WHEEL
new file mode 100644
index 0000000000000000000000000000000000000000..da2ac557c89f110fa0e5a2f5ad6c3795e352a695
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/WHEEL
@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (75.8.0)
+Root-Is-Purelib: false
+Tag: cp38-abi3-linux_x86_64
+
diff --git a/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/entry_points.txt b/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/entry_points.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6e42a03cf3892aa1d87aa4bf82d1b6a951aaa232
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/entry_points.txt
@@ -0,0 +1,2 @@
+[console_scripts]
+vllm = vllm.scripts:main
diff --git a/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/top_level.txt b/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/top_level.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e7a6c7781dce0db526824db3fe81e3675526d398
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/vllm-0.7.2.dist-info/top_level.txt
@@ -0,0 +1 @@
+vllm
diff --git a/.venv/lib/python3.11/site-packages/xgrammar/__init__.py b/.venv/lib/python3.11/site-packages/xgrammar/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..f77e36f074136980ff68bff42f2f30201f2788ec
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/xgrammar/__init__.py
@@ -0,0 +1,13 @@
+from . import testing
+from .compiler import CompiledGrammar, GrammarCompiler
+from .contrib import hf
+from .grammar import Grammar, StructuralTagItem
+from .matcher import (
+    GrammarMatcher,
+    allocate_token_bitmask,
+    apply_token_bitmask_inplace,
+    bitmask_dtype,
+    get_bitmask_shape,
+    reset_token_bitmask,
+)
+from .tokenizer_info import TokenizerInfo, VocabType
diff --git a/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3792c1041f77e23d183839bcdf80ec54ff5c10bf
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/base.cpython-311.pyc b/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/base.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..05ad733b20d4a43db9edb2c0849d0a9a6a17b479
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/base.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/compiler.cpython-311.pyc b/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/compiler.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ff930e72fa605a3445a64567f1a92be5beda1de3
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/compiler.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/grammar.cpython-311.pyc b/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/grammar.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b082a1e30bc37420cba3424991796407607b7e16
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/grammar.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/matcher.cpython-311.pyc b/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/matcher.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3c5ebfe36508055b5652512a8f03d42c1e4d57a5
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/matcher.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/testing.cpython-311.pyc b/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/testing.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..84536f71f943f2656f277fb89a9edaaf2fa60f4e
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/testing.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/tokenizer_info.cpython-311.pyc b/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/tokenizer_info.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..dc2baf6ae1ab2215951a5ef63a003fbb2d9a1142
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/tokenizer_info.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/version.cpython-311.pyc b/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/version.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b922a08382e2ec077337018dabbf44fe2b40ecd7
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/xgrammar/__pycache__/version.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/xgrammar/base.py b/.venv/lib/python3.11/site-packages/xgrammar/base.py
new file mode 100644
index 0000000000000000000000000000000000000000..13a824c9791110cc4b77c46824628b594adcec17
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/xgrammar/base.py
@@ -0,0 +1,82 @@
+"""This module provides classes to handle C++ objects from pybind11."""
+
+import os
+
+if os.environ.get("XGRAMMAR_BUILD_DOCS") != "1":
+    from . import xgrammar_bindings as _core
+else:
+    _core = "dummy namespace"
+
+
+class XGRObject:
+    """The base class for all objects in XGrammar. This class provides methods to handle the
+    C++ handle from pybind11.
+
+    In subclasses, the handle should be initialized via the the _create_from_handle, or via
+    the _init_handle method called within the __init__ method, and should not be modified
+    afterwards. Subclasses should use the _handle property to access the handle. When comparing
+    two objects, the equality is checked by comparing the C++ handles.
+
+    For performance considerations, objects in XGrammar should be lightweight and only maintain
+    a handle to the C++ objects. Heavy operations should be performed on the C++ side.
+    """
+
+    @classmethod
+    def _create_from_handle(cls, handle) -> "XGRObject":
+        """Construct an object of the class from a C++ handle.
+
+        Parameters
+        ----------
+        cls
+            The class of the object.
+
+        handle
+            The C++ handle.
+
+        Returns
+        -------
+        obj : XGRObject
+            An object of type cls.
+        """
+        obj = cls.__new__(cls)
+        obj.__handle = handle
+        return obj
+
+    def _init_handle(self, handle):
+        """Initialize an object with a handle. This method should be called in the __init__
+        method of the subclasses of XGRObject to initialize the C++ handle.
+
+        Parameters
+        ----------
+        handle
+            The C++ handle.
+        """
+        self.__handle = handle
+
+    @property
+    def _handle(self):
+        """Get the C++ handle of the object.
+
+        Returns
+        -------
+        handle
+            The C++ handle.
+        """
+        return self.__handle
+
+    def __eq__(self, other: object) -> bool:
+        """Compare two XGrammar objects by comparing their C++ handles.
+
+        Parameters
+        ----------
+        other : object
+            The other object to compare with.
+
+        Returns
+        -------
+        equal : bool
+            Whether the two objects have the same C++ handle.
+        """
+        if not isinstance(other, XGRObject):
+            return NotImplemented
+        return self._handle == other._handle
diff --git a/.venv/lib/python3.11/site-packages/xgrammar/compiler.py b/.venv/lib/python3.11/site-packages/xgrammar/compiler.py
new file mode 100644
index 0000000000000000000000000000000000000000..9fac5bb0e193529b1742a934524b5516ca53d779
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/xgrammar/compiler.py
@@ -0,0 +1,185 @@
+"""Compiling grammar for efficient token mask generation."""
+
+import json
+from typing import List, Optional, Tuple, Type, Union, overload
+
+from pydantic import BaseModel
+
+from .base import XGRObject, _core
+from .grammar import Grammar, StructuralTagItem, _handle_pydantic_schema
+from .tokenizer_info import TokenizerInfo
+
+
+class CompiledGrammar(XGRObject):
+    """This is the primary object to store compiled grammar.
+
+    A CompiledGrammar can be used to construct GrammarMatcher
+    to generate token masks efficiently.
+
+    Note
+    ----
+    Do not construct this class directly, instead
+    use :class:`GrammarCompiler` to construct the object.
+    """
+
+    @property
+    def grammar(self) -> Grammar:
+        """The original grammar."""
+        return Grammar._create_from_handle(self._handle.grammar)
+
+    @property
+    def tokenizer_info(self) -> TokenizerInfo:
+        """The tokenizer info associated with the compiled grammar."""
+        return TokenizerInfo._create_from_handle(self._handle.tokenizer_info)
+
+
+class GrammarCompiler(XGRObject):
+    """The compiler for grammars. It is associated with a certain tokenizer info, and compiles
+    grammars into CompiledGrammar with the tokenizer info. It allows parallel compilation with
+    multiple threads, and has a cache to store the compilation result, avoiding compiling the
+    same grammar multiple times.
+
+    Parameters
+    ----------
+    tokenizer_info : TokenizerInfo
+        The tokenizer info.
+
+    max_threads : int, default: 8
+        The maximum number of threads used to compile the grammar.
+
+    cache_enabled : bool, default: True
+        Whether to enable the cache.
+    """
+
+    def __init__(
+        self,
+        tokenizer_info: TokenizerInfo,
+        *,
+        max_threads: int = 8,
+        cache_enabled: bool = True,
+    ):
+        if not isinstance(tokenizer_info, TokenizerInfo):
+            raise ValueError(
+                "Please convert the tokenizer to TokenizerInfo before passing it "
+                "to GrammarCompiler."
+            )
+
+        self._init_handle(_core.GrammarCompiler(tokenizer_info._handle, max_threads, cache_enabled))
+
+    def compile_json_schema(
+        self,
+        schema: Union[str, Type[BaseModel]],
+        *,
+        any_whitespace: bool = True,
+        indent: Optional[int] = None,
+        separators: Optional[Tuple[str, str]] = None,
+        strict_mode: bool = True,
+    ) -> CompiledGrammar:
+        """Get CompiledGrammar from the specified JSON schema and format. The indent
+        and separators parameters follow the same convention as in json.dumps().
+
+        Parameters
+        ----------
+        schema : Union[str, Type[BaseModel]]
+            The schema string or Pydantic model.
+
+        indent : Optional[int], default: None
+            The number of spaces for indentation. If None, the output will be in one line.
+
+        separators : Optional[Tuple[str, str]], default: None
+            Two separators used in the schema: comma and colon. Examples: (",", ":"), (", ", ": ").
+            If None, the default separators will be used: (",", ": ") when the indent is not None,
+            and (", ", ": ") otherwise.
+
+        strict_mode : bool, default: True
+            Whether to use strict mode. In strict mode, the generated grammar will not allow
+            properties and items that is not specified in the schema. This is equivalent to
+            setting unevaluatedProperties and unevaluatedItems to false.
+
+        Returns
+        -------
+        compiled_grammar : CompiledGrammar
+            The compiled grammar.
+        """
+        schema_str = _handle_pydantic_schema(schema)
+        return CompiledGrammar._create_from_handle(
+            self._handle.compile_json_schema(
+                schema_str, any_whitespace, indent, separators, strict_mode
+            )
+        )
+
+    def compile_builtin_json_grammar(self) -> CompiledGrammar:
+        """Get CompiledGrammar from the standard JSON.
+
+        Returns
+        -------
+        compiled_grammar : CompiledGrammar
+            The compiled grammar.
+        """
+        return CompiledGrammar._create_from_handle(self._handle.compile_builtin_json_grammar())
+
+    def compile_structural_tag(
+        self, tags: List[StructuralTagItem], triggers: List[str]
+    ) -> CompiledGrammar:
+        """Compile a grammar from structural tags. See Grammar.from_structural_tag() for more
+        details.
+
+        Parameters
+        ----------
+        tags : List[StructuralTagItem]
+            The structural tags.
+
+        triggers : List[str]
+            The triggers.
+
+        Returns
+        -------
+        compiled_grammar : CompiledGrammar
+            The compiled grammar.
+        """
+        tags_tuple = [(tag.start, _handle_pydantic_schema(tag.schema_), tag.end) for tag in tags]
+        return CompiledGrammar._create_from_handle(
+            self._handle.compile_structural_tag(tags_tuple, triggers)
+        )
+
+    @overload
+    def compile_grammar(self, ebnf_string: str, *, root_rule_name: str = "root") -> CompiledGrammar:
+        """Compile a grammar from EBNF string. The EBNF string should follow the format
+        in https://github.com/ggerganov/llama.cpp/blob/master/grammars/README.md.
+
+        Parameters
+        ----------
+        ebnf_string : str
+            The grammar string in EBNF format.
+
+        root_rule_name : str, default: "root"
+            The name of the root rule in the grammar.
+
+        Returns
+        -------
+        compiled_grammar : CompiledGrammar
+            The compiled grammar.
+        """
+        ...
+
+    @overload
+    def compile_grammar(self, grammar: Grammar) -> CompiledGrammar:
+        """Compile a grammar object.
+
+        Returns
+        -------
+        compiled_grammar : CompiledGrammar
+            The compiled grammar.
+        """
+        ...
+
+    def compile_grammar(
+        self, grammar: Union[str, Grammar], *, root_rule_name: str = "root"
+    ) -> CompiledGrammar:
+        if isinstance(grammar, str):
+            grammar = Grammar.from_ebnf(grammar, root_rule_name=root_rule_name)
+        return CompiledGrammar._create_from_handle(self._handle.compile_grammar(grammar._handle))
+
+    def clear_cache(self) -> None:
+        """Clear all cached compiled grammars."""
+        self._handle.clear_cache()
diff --git a/.venv/lib/python3.11/site-packages/xgrammar/contrib/__init__.py b/.venv/lib/python3.11/site-packages/xgrammar/contrib/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/.venv/lib/python3.11/site-packages/xgrammar/contrib/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/xgrammar/contrib/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cfd860c8bd4caa90e7a0e746dcf27e5d6ba19388
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/xgrammar/contrib/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/xgrammar/contrib/__pycache__/hf.cpython-311.pyc b/.venv/lib/python3.11/site-packages/xgrammar/contrib/__pycache__/hf.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cd832142f367546c47d7ec6e5385c91baa8f8261
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/xgrammar/contrib/__pycache__/hf.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/xgrammar/contrib/hf.py b/.venv/lib/python3.11/site-packages/xgrammar/contrib/hf.py
new file mode 100644
index 0000000000000000000000000000000000000000..75c546012a9bafaaeecccff6401f65a6ef0cf999
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/xgrammar/contrib/hf.py
@@ -0,0 +1,104 @@
+"""
+This file helps integrate xgrammar in HF transformers package by extending
+transformers.LogitsProcessor, which is to be fed to `model.generate()`.
+"""
+
+from typing import List
+
+import torch
+import transformers
+
+import xgrammar as xgr
+
+
+class LogitsProcessor(transformers.LogitsProcessor):
+    """
+    LogitsProcessor for processing logits in transformers' generate() method.
+
+    Example usage
+    -------------
+        .. code:: python
+
+            model_name = "Qwen/Qwen2.5-0.5B-Instruct"
+            tokenizer = AutoTokenizer.from_pretrained(model_name)
+            config = AutoConfig.from_pretrained(model_name)
+            # This can be larger than tokenizer.vocab_size due to paddings
+            full_vocab_size = config.vocab_size
+            tokenizer_info = xgr.TokenizerInfo.from_huggingface(tokenizer, vocab_size=full_vocab_size)
+
+            grammar_compiler = xgr.GrammarCompiler(tokenizer_info)
+            compiled_grammar = grammar_compiler.compile_builtin_json_grammar()
+            xgr_logits_processor = xgr.contrib.hf.LogitsProcessor(compiled_grammar)
+            model.generate(prompt, logits_processor=[xgr_logits_processor])
+
+        For an end-to-end example, see folder `examples/hf_transformers/`.
+
+    Notes
+    -----
+        - Note that this LogitsProcessor can only be used once. For each `generate()` call,
+            instantiate a new one.
+        - Note that this implementation may contain extra overhead.
+    """
+
+    def __init__(self, compiled_grammar: xgr.CompiledGrammar):
+        """Initialize the LogitsProcessor.
+
+        Parameters
+        ----------
+        compiled_grammar : xgr.CompiledGrammar
+            A grammar compiled according to the given grammar and the model's tokenizer_info.
+        """
+        self.matchers: List[xgr.GrammarMatcher] = []
+        self.compiled_grammar = compiled_grammar
+        self.full_vocab_size = self.compiled_grammar.tokenizer_info.vocab_size
+        self.token_bitmask = None
+        self.prefilled = False
+        self.batch_size = 0
+
+    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
+        """
+        Accept token sampled in the last iteration, fill in bitmask, and apply bitmask to logits.
+
+        Returns:
+            scores: Logits modified with bitmask.
+        """
+        # Lazily initialize GrammarMatchers and bitmask
+        if len(self.matchers) == 0:
+            self.batch_size = input_ids.shape[0]
+            self.matchers = [
+                xgr.GrammarMatcher(self.compiled_grammar) for _ in range(self.batch_size)
+            ]
+            self.token_bitmask = xgr.allocate_token_bitmask(self.batch_size, self.full_vocab_size)
+
+        if input_ids.shape[0] != self.batch_size:
+            raise RuntimeError(
+                "Expect input_ids.shape[0] to be LogitsProcessor.batch_size."
+                + f"Got {input_ids.shape[0]} for the former, and {self.batch_size} for the latter."
+            )
+
+        if not self.prefilled:
+            # Have not sampled a token yet
+            self.prefilled = True
+        else:
+            for i in range(self.batch_size):
+                if not self.matchers[i].is_terminated():
+                    sampled_token = input_ids[i][-1]
+                    assert self.matchers[i].accept_token(sampled_token)
+
+        for i in range(self.batch_size):
+            if not self.matchers[i].is_terminated():
+                self.matchers[i].fill_next_token_bitmask(self.token_bitmask, i)
+
+        # We only support masking logits on CUDA or CPU
+        device_type = scores.device.type
+        if device_type != "cuda":
+            scores = scores.to("cpu")
+        xgr.apply_token_bitmask_inplace(scores, self.token_bitmask.to(scores.device))
+        if device_type != "cuda":
+            scores = scores.to(device_type)
+
+        # NOTE: Cannot reset here because __call__ is not invoked when stop token
+        # is sampled. This is why each `generate()` call needs to instantiate an
+        # LogitsProcessor
+
+        return scores
diff --git a/.venv/lib/python3.11/site-packages/xgrammar/grammar.py b/.venv/lib/python3.11/site-packages/xgrammar/grammar.py
new file mode 100644
index 0000000000000000000000000000000000000000..1c1fbb17a338fae787f99bd6dc55737f8799fce0
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/xgrammar/grammar.py
@@ -0,0 +1,271 @@
+"""This module provides classes representing grammars."""
+
+import json
+from typing import List, Optional, Tuple, Type, Union
+
+from pydantic import BaseModel, Field
+
+from .base import XGRObject, _core
+
+
+class StructuralTagItem(BaseModel):
+    """A structural tag item. See Grammar.from_structural_tag() for more details.
+
+    Attributes
+    ----------
+    start : str
+        The start tag.
+
+    schema_ : Union[str, Type[BaseModel]]
+        The schema.
+
+    end : str
+        The end tag.
+    """
+
+    start: str
+    schema_: Union[str, Type[BaseModel]] = Field(alias="schema")
+    end: str
+
+
+def _handle_pydantic_schema(schema: Union[str, Type[BaseModel]]) -> str:
+    if isinstance(schema, type) and issubclass(schema, BaseModel):
+        if hasattr(schema, "model_json_schema"):
+            # pydantic 2.x
+            return json.dumps(schema.model_json_schema())
+        elif hasattr(schema, "schema_json"):
+            # pydantic 1.x
+            return json.dumps(schema.schema_json())
+        else:
+            raise ValueError("The schema should have a model_json_schema or json_schema method.")
+    elif isinstance(schema, str):
+        return schema
+    else:
+        raise ValueError("The schema should be a string or a Pydantic model.")
+
+
+class Grammar(XGRObject):
+    """This class represents a grammar object in XGrammar, and can be used later in the
+    grammar-guided generation.
+
+    The Grammar object supports context-free grammar (CFG). EBNF (extended Backus-Naur Form) is
+    used as the format of the grammar. There are many specifications for EBNF in the literature,
+    and we follow the specification of GBNF (GGML BNF) in
+    https://github.com/ggerganov/llama.cpp/blob/master/grammars/README.md.
+
+    When printed, the grammar will be converted to GBNF format.
+    """
+
+    def __str__(self) -> str:
+        """Print the BNF grammar to a string, in EBNF format.
+
+        Returns
+        -------
+        grammar_string : str
+            The BNF grammar string.
+        """
+        return self._handle.to_string()
+
+    @staticmethod
+    def from_ebnf(ebnf_string: str, *, root_rule_name: str = "root") -> "Grammar":
+        """Construct a grammar from EBNF string. The EBNF string should follow the format
+        in https://github.com/ggerganov/llama.cpp/blob/master/grammars/README.md.
+
+        Parameters
+        ----------
+        ebnf_string : str
+            The grammar string in EBNF format.
+
+        root_rule_name : str, default: "root"
+            The name of the root rule in the grammar.
+
+        Raises
+        ------
+        RuntimeError
+            When converting the regex pattern fails, with details about the parsing error.
+        """
+        return Grammar._create_from_handle(_core.Grammar.from_ebnf(ebnf_string, root_rule_name))
+
+    @staticmethod
+    def from_json_schema(
+        schema: Union[str, Type[BaseModel]],
+        *,
+        any_whitespace: bool = True,
+        indent: Optional[int] = None,
+        separators: Optional[Tuple[str, str]] = None,
+        strict_mode: bool = True,
+    ) -> "Grammar":
+        """Construct a grammar from JSON schema. Pydantic model or JSON schema string can be
+        used to specify the schema.
+
+        It allows any whitespace by default. If user want to specify the format of the JSON,
+        set `any_whitespace` to False and use the `indent` and `separators` parameters. The
+        meaning and the default values of the parameters follows the convention in json.dumps().
+
+        It internally converts the JSON schema to a EBNF grammar.
+
+        Parameters
+        ----------
+        schema : Union[str, Type[BaseModel]]
+            The schema string or Pydantic model.
+
+        any_whitespace : bool, default: True
+            Whether to use any whitespace. If True, the generated grammar will ignore the
+            indent and separators parameters, and allow any whitespace.
+
+        indent : Optional[int], default: None
+            The number of spaces for indentation. If None, the output will be in one line.
+
+            Note that specifying the indentation means forcing the LLM to generate JSON strings
+            strictly formatted. However, some models may tend to generate JSON strings that
+            are not strictly formatted. In this case, forcing the LLM to generate strictly
+            formatted JSON strings may degrade the generation quality. See
+            <https://github.com/sgl-project/sglang/issues/2216#issuecomment-2516192009> for more
+            details.
+
+        separators : Optional[Tuple[str, str]], default: None
+            Two separators used in the schema: comma and colon. Examples: (",", ":"), (", ", ": ").
+            If None, the default separators will be used: (",", ": ") when the indent is not None,
+            and (", ", ": ") otherwise.
+
+        strict_mode : bool, default: True
+            Whether to use strict mode. In strict mode, the generated grammar will not allow
+            properties and items that is not specified in the schema. This is equivalent to
+            setting unevaluatedProperties and unevaluatedItems to false. It also disallows empty
+            JSON objects and arrays.
+
+            This helps LLM to generate accurate output in the grammar-guided generation with JSON
+            schema.
+
+        Returns
+        -------
+        grammar : Grammar
+            The constructed grammar.
+
+        Raises
+        ------
+        RuntimeError
+            When converting the json schema fails, with details about the parsing error.
+        """
+        schema_str = _handle_pydantic_schema(schema)
+        return Grammar._create_from_handle(
+            _core.Grammar.from_json_schema(
+                schema_str, any_whitespace, indent, separators, strict_mode
+            ),
+        )
+
+    @staticmethod
+    def from_regex(regex_string: str) -> "Grammar":
+        """Create a grammar from a regular expression string.
+
+        Parameters
+        ----------
+        regex_string : str
+            The regular expression pattern to create the grammar from.
+
+        Returns
+        -------
+        grammar : Grammar
+            The constructed grammar from the regex pattern.
+
+        Raises
+        ------
+        RuntimeError
+            When parsing the regex pattern fails, with details about the parsing error.
+        """
+        return Grammar._create_from_handle(_core.Grammar.from_regex(regex_string))
+
+    @staticmethod
+    def from_structural_tag(tags: List[StructuralTagItem], triggers: List[str]) -> "Grammar":
+        """Create a grammar from structural tags. The structural tag handles the dispatching
+        of different grammars based on the tags and triggers: it initially allows any output,
+        until a trigger is encountered, then dispatch to the corresponding tag; when the end tag
+        is encountered, the grammar will allow any following output, until the next trigger is
+        encountered.
+
+        The tags parameter is used to specify the output pattern. It is especially useful for LLM
+        function calling, where the pattern is:
+        <function=func_name>{"arg1": ..., "arg2": ...}</function>.
+        This pattern consists of three parts: a start tag (<function=func_name>), a parameter list
+        according to some schema ({"arg1": ..., "arg2": ...}), and an end tag (</function>). This
+        pattern can be described in a StructuralTagItem with a start tag, a schema, and an end tag.
+        The structural tag is able to handle multiple such patterns by passing them into multiple
+        tags.
+
+        The triggers parameter is used to trigger the dispatching of different grammars. The trigger
+        should be a prefix of a provided start tag. When the trigger is encountered, the
+        corresponding tag should be used to constrain the following output. There can be multiple
+        tags matching the same trigger. Then if the trigger is encountered, the following output
+        should match one of the tags. For example, in function calling, the triggers can be
+        ["<function="]. Then if "<function=" is encountered, the following output must match one
+        of the tags (e.g. <function=get_weather>{"city": "Beijing"}</function>).
+
+        The corrrespondence of tags and triggers is automatically determined: all tags with the
+        same trigger will be grouped together. User should make sure any trigger is not a prefix
+        of another trigger: then the corrrespondence of tags and triggers will be ambiguous.
+
+        To use this grammar in grammar-guided generation, the GrammarMatcher constructed from
+        structural tag will generate a mask for each token. When the trigger is not encountered,
+        the mask will likely be all-1 and not have to be used (fill_next_token_bitmask returns
+        False, meaning no token is masked). When a trigger is encountered, the mask should be
+        enforced (fill_next_token_bitmask will return True, meaning some token is masked) to the
+        output logits.
+
+        The benefit of this method is the token boundary between tags and triggers is automatically
+        handled. The user does not need to worry about the token boundary.
+
+        Parameters
+        ----------
+        tags : List[StructuralTagItem]
+            The structural tags.
+
+        triggers : List[str]
+            The triggers.
+
+        Examples
+        --------
+        >>> class Schema1(BaseModel):
+        >>>     arg1: str
+        >>>     arg2: int
+        >>> class Schema2(BaseModel):
+        >>>     arg3: float
+        >>>     arg4: List[str]
+        >>> tags = [
+        >>>     StructuralTagItem(start="<function=f>", schema=Schema1, end="</function>"),
+        >>>     StructuralTagItem(start="<function=g>", schema=Schema2, end="</function>"),
+        >>> ]
+        >>> triggers = ["<function="]
+        >>> grammar = Grammar.from_structural_tag(tags, triggers)
+        """
+        tags_tuple = [(tag.start, _handle_pydantic_schema(tag.schema_), tag.end) for tag in tags]
+        return Grammar._create_from_handle(_core.Grammar.from_structural_tag(tags_tuple, triggers))
+
+    @staticmethod
+    def builtin_json_grammar() -> "Grammar":
+        """Get the grammar of standard JSON. This is compatible with the official JSON grammar
+        specification in https://www.json.org/json-en.html.
+
+        Returns
+        -------
+        grammar : Grammar
+            The JSON grammar.
+        """
+        return Grammar._create_from_handle(_core.Grammar.builtin_json_grammar())
+
+    @staticmethod
+    def concat(*grammars: "Grammar") -> "Grammar":
+        """Create a grammar that matches the concatenation of the grammars in the list. That is
+        equivalent to using the `+` operator to concatenate the grammars in the list.
+
+        Parameters
+        ----------
+        grammars : List[Grammar]
+            The grammars to create the concatenation of.
+
+        Returns
+        -------
+        grammar : Grammar
+            The concatenation of the grammars.
+        """
+        grammar_handles = [grammar._handle for grammar in grammars]
+        return Grammar._create_from_handle(_core.Grammar.concat(grammar_handles))
diff --git a/.venv/lib/python3.11/site-packages/xgrammar/kernels/__init__.py b/.venv/lib/python3.11/site-packages/xgrammar/kernels/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..f99dfba4ad1870a8470602deee76ce94199c5b86
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/xgrammar/kernels/__init__.py
@@ -0,0 +1,4 @@
+"""The kernels for XGrammar."""
+
+from .apply_token_bitmask_inplace_cpu import apply_token_bitmask_inplace_cpu
+from .apply_token_bitmask_inplace_triton import apply_token_bitmask_inplace_triton
diff --git a/.venv/lib/python3.11/site-packages/xgrammar/kernels/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/xgrammar/kernels/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f670a1aaf362af0f2ff6261b4630cf97f32ab4b9
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/xgrammar/kernels/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/xgrammar/kernels/__pycache__/apply_token_bitmask_inplace_cpu.cpython-311.pyc b/.venv/lib/python3.11/site-packages/xgrammar/kernels/__pycache__/apply_token_bitmask_inplace_cpu.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c64165ca6a1b79022df590af0cfb9ff8fd0362b8
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/xgrammar/kernels/__pycache__/apply_token_bitmask_inplace_cpu.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/xgrammar/kernels/__pycache__/apply_token_bitmask_inplace_triton.cpython-311.pyc b/.venv/lib/python3.11/site-packages/xgrammar/kernels/__pycache__/apply_token_bitmask_inplace_triton.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..387e6a72adab3d4cf931b5a9ea29b961b8c2a8ce
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/xgrammar/kernels/__pycache__/apply_token_bitmask_inplace_triton.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/xgrammar/kernels/apply_token_bitmask_inplace_cpu.py b/.venv/lib/python3.11/site-packages/xgrammar/kernels/apply_token_bitmask_inplace_cpu.py
new file mode 100644
index 0000000000000000000000000000000000000000..dab190af36f33ab9b785d92670dc8b0305c2305d
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/xgrammar/kernels/apply_token_bitmask_inplace_cpu.py
@@ -0,0 +1,41 @@
+"""CPU implementation for in-place applying token mask."""
+
+import time
+from typing import List, Optional, Union
+
+import torch
+
+from ..base import _core
+
+
+def apply_token_bitmask_inplace_cpu(
+    logits: torch.Tensor,
+    bitmask: torch.Tensor,
+    indices: Optional[Union[List[int], torch.Tensor]] = None,
+) -> None:
+    """Apply token bitmask in-place on CPU."""
+    if logits.device.type != "cpu":
+        raise ValueError("logits must be on CPU")
+    if bitmask.device.type != "cpu":
+        raise ValueError("bitmask must be on CPU")
+    if logits.dtype != torch.float32:
+        raise ValueError("logits must be of type float32")
+    if bitmask.dtype != torch.int32:
+        raise ValueError("bitmask must be of type int32")
+    if logits.dim() != 1 and logits.dim() != 2:
+        raise ValueError("logits should be 1D or 2D, but got {}D".format(logits.dim()))
+    if bitmask.dim() != 1 and bitmask.dim() != 2:
+        raise ValueError("bitmask should be 1D or 2D, but got {}D".format(bitmask.dim()))
+
+    logits_shape = (1, logits.shape[0]) if logits.dim() == 1 else (logits.shape[0], logits.shape[1])
+    bitmask_shape = (
+        (1, bitmask.shape[0]) if bitmask.dim() == 1 else (bitmask.shape[0], bitmask.shape[1])
+    )
+
+    _core.kernels.apply_token_bitmask_inplace_cpu(
+        logits.data_ptr(),
+        logits_shape,
+        bitmask.data_ptr(),
+        bitmask_shape,
+        indices,
+    )
diff --git a/.venv/lib/python3.11/site-packages/xgrammar/kernels/apply_token_bitmask_inplace_triton.py b/.venv/lib/python3.11/site-packages/xgrammar/kernels/apply_token_bitmask_inplace_triton.py
new file mode 100644
index 0000000000000000000000000000000000000000..f91956da8de352ef3035d684d1db63aff723f893
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/xgrammar/kernels/apply_token_bitmask_inplace_triton.py
@@ -0,0 +1,75 @@
+from typing import List, Optional, Union
+
+import torch
+import triton
+import triton.language as tl
+
+
+@triton.jit
+def apply_token_bitmask_inplace_kernel(
+    logits_ptr,
+    bitmask_ptr,
+    indices_ptr,
+    num_rows,
+    vocab_size,
+    bitmask_size,
+    NUM_SMS: tl.constexpr,
+    BLOCK_SIZE: tl.constexpr,
+):
+    pid = tl.program_id(0)
+    num_blocks = tl.cdiv(vocab_size, BLOCK_SIZE)
+    for work_id in tl.range(pid, num_rows * num_blocks, NUM_SMS):
+        block_offset = (work_id % num_blocks) * BLOCK_SIZE
+        row_id = work_id // num_blocks
+        batch_id = tl.load(indices_ptr + row_id)
+        offsets = block_offset + tl.arange(0, BLOCK_SIZE)
+        bitmask_offsets = block_offset // 32 + tl.arange(0, BLOCK_SIZE // 32)
+        vocab_mask = offsets < vocab_size
+        packed_bitmask_mask = bitmask_offsets < bitmask_size
+        packed_bitmask = tl.load(
+            bitmask_ptr + batch_id * bitmask_size + bitmask_offsets, packed_bitmask_mask
+        )
+        bitmask = ((packed_bitmask[:, None] >> (tl.arange(0, 32)[None, :])) & 1) == 0
+        bitmask = bitmask.reshape(BLOCK_SIZE)
+
+        tl.store(logits_ptr + batch_id * vocab_size + offsets, -float("inf"), vocab_mask & bitmask)
+
+
+def apply_token_bitmask_inplace_triton(
+    logits: torch.Tensor,
+    bitmask: torch.Tensor,
+    indices: Optional[Union[List[int], torch.Tensor]] = None,
+):
+    def ceil_div(a, b):
+        return (a + b - 1) // b
+
+    NUM_SMS = torch.cuda.get_device_properties("cuda").multi_processor_count
+    BLOCK_SIZE = 4096
+    # Check input tensor shapes.
+    if logits.ndim == 2:
+        batch_size, vocab_size = logits.shape
+    elif logits.ndim == 1:
+        batch_size = 1
+        (vocab_size,) = logits.shape
+    else:
+        raise ValueError(f"Invalid logits tensor shape {logits.shape}")
+
+    if indices is None:
+        indices = torch.arange(batch_size, dtype=torch.int32, device=logits.device)
+    elif isinstance(indices, list):
+        indices = torch.tensor(indices, dtype=torch.int32, device=logits.device)
+
+    grid = lambda meta: (NUM_SMS,)
+
+    apply_token_bitmask_inplace_kernel[grid](
+        logits,
+        bitmask,
+        indices,
+        indices.shape[0],
+        vocab_size,
+        ceil_div(vocab_size, 32),
+        NUM_SMS,
+        BLOCK_SIZE,
+        num_warps=BLOCK_SIZE // 32 // (16 // logits.element_size()),
+        num_stages=3,
+    )
diff --git a/.venv/lib/python3.11/site-packages/xgrammar/matcher.py b/.venv/lib/python3.11/site-packages/xgrammar/matcher.py
new file mode 100644
index 0000000000000000000000000000000000000000..64a52895c00eebfa1a512f613ff1ee6da3687c14
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/xgrammar/matcher.py
@@ -0,0 +1,325 @@
+"""Match the output of the LLM to the specified grammar, then generate the mask for the next
+token."""
+
+import math
+from typing import List, Optional, Tuple, Union
+
+import torch
+
+from .base import XGRObject, _core
+from .compiler import CompiledGrammar
+from .kernels import apply_token_bitmask_inplace_cpu, apply_token_bitmask_inplace_triton
+
+"""The dtype of the bitmask: int32."""
+bitmask_dtype = torch.int32
+
+
+_is_cuda_available = torch.cuda.is_available()
+
+
+def get_bitmask_shape(batch_size: int, vocab_size: int) -> Tuple[int, int]:
+    """Return the shape of the bitmask: (batch_size, ceil(vocab_size / 32))"""
+    return (batch_size, math.ceil(vocab_size / 32))
+
+
+_FULL_MASK = torch.tensor(-1, dtype=bitmask_dtype)
+
+
+def allocate_token_bitmask(batch_size: int, vocab_size: int) -> torch.Tensor:
+    """Allocate the bitmask for the next token prediction. The bitmask is an int32 tensor on
+    CPU with shape (batch_size, ceil(vocab_size / 32)). Users who have their own needs to
+    manage CUDA memory can construct the tensor with get_bitmask_shape and bitmask_dtype
+    themselves.
+
+    The reason why we use int32 instead of uint32 is that old versions of PyTorch do not support
+    uint32.
+
+    Parameters
+    ----------
+    batch_size : int
+        The batch size of the bitmask.
+
+    vocab_size : int
+        The size of the vocabulary.
+
+    Returns
+    -------
+    bitmask : torch.Tensor
+        The shape of the bitmask.
+    """
+    # In CUDA, use pinned memory to speed up data transfer from CPU to GPU
+    return torch.full(
+        get_bitmask_shape(batch_size, vocab_size),
+        _FULL_MASK,
+        dtype=bitmask_dtype,
+        pin_memory=_is_cuda_available,
+    )
+
+
+def reset_token_bitmask(bitmask: torch.Tensor) -> None:
+    """Reset the bitmask to the full mask."""
+    bitmask.fill_(_FULL_MASK)
+
+
+def apply_token_bitmask_inplace(
+    logits: torch.Tensor,
+    bitmask: torch.Tensor,
+    *,
+    indices: Optional[List[int]] = None,
+) -> None:
+    """Apply the bitmask to the logits in-place. The bitmask is a 01 bitwise compressed tensor,
+    where 0 means the token is masked and 1 means the token is not masked. It can be generated by
+    allocate_token_bitmask and filled by fill_next_token_bitmask. After applying the bitmask, the
+    masked logits will be set to -inf.
+
+    The shape of logits and bitmask should be (batch_size, vocab_size) and
+    (batch_size, bitmask_size) respectively. bitmask_size = ceil(vocab_size / 32). The operation is:
+
+    .. code:: python
+
+        for i in range(batch_size):
+            for j in range(vocab_size):
+                if get_bitmask_value(bitmask, i, j) == 0:
+                    logits[i, j] = -inf
+
+    get_bitmask_value(bitmask, i, j) gets the j-th bit of the i-th row of the bitmask.
+
+    Indices can be used to specify which logits in the batch to apply the bitmask to. It is
+    especially useful when there are structured requests and unstructured requests mixed in the
+    same batch by skipping masking the logits in the unstructured requests. When specified, the
+    operation will be
+
+    .. code:: python
+
+        for batch_id in indices:
+            for j in range(vocab_size):
+                if get_bitmask_value(bitmask, batch_id, j) == 0:
+                    logits[batch_id, j] = -inf
+
+    The logits and bitmask should be on the same device. If both them are on CUDA, we launch a CUDA
+    kernel to apply bitmask. If both them are on CPU, we use a CPU implementation. The CUDA kernel
+    is optimized and should be preferred.
+
+    In practice, the bitmask is allocated on CPU, and the logits is usually on GPU, so users should
+    manually copy the bitmask to GPU before calling this function.
+
+    Parameters
+    ----------
+    logits : torch.Tensor
+        The tensor to apply the bitmask to.
+
+    bitmask : torch.Tensor
+        The bitmask to apply.
+
+    indices : Optional[List[int]], default: None
+        A list of indices to specify which logits in the batch to apply the bitmask to. If None,
+        apply the bitmask to all logits in the batch.
+    """
+    if bitmask.device != logits.device:
+        raise ValueError(
+            "logits and bitmask should be on the same device. "
+            + f"But got logits.device: {logits.device}, bitmask.device: {bitmask.device}"
+        )
+
+    if logits.device.type == "cuda":
+        apply_token_bitmask_inplace_triton(logits, bitmask, indices)
+    elif logits.device.type == "cpu":
+        apply_token_bitmask_inplace_cpu(logits, bitmask, indices)
+    else:
+        raise ValueError("Currently, logit masking is only supported on CUDA or CPU.")
+
+
+class GrammarMatcher(XGRObject):
+    """Match the output of the LLM to the specified grammar, then generate the mask for the next
+    token. This is the core class in the grammar-guided generation.
+
+    This class maintains a stateful matcher that can accept tokens and strings, then match them
+    to the specified grammar. The matcher can provide a bitmask for the next token prediction,
+    so that the output of the LLM follows the specified grammar. Its state can be reset and
+    rolled back by tokens. It also provides utilities for jump-forward decoding.
+
+    After matching the whole grammar, the matcher will accept a stop token. The token mask at
+    this time will only allow stop tokens. After accepting the stop token, the matcher will
+    terminate, then it cannot accept any new token or generate a new token mask, meaning the
+    generation is finished.
+
+    Under the hood, it utilizes a pushdown automaton with backtracking to match the grammar,
+    with optimizations specific to LLM token mask generation.
+
+    Parameters
+    ----------
+    compiled_grammar : CompiledGrammar
+        The initialization context for the grammar matcher.
+
+    override_stop_tokens : Optional[Union[int, List[int]]], default: None
+        If not None, the stop tokens to override the ones in the grammar.
+
+    terminate_without_stop_token : bool, default: False
+        Whether to terminate the matcher without accepting a stop token.
+
+    max_rollback_tokens : int, default: 0
+        The maximum number of rollback tokens allowed. The rollback operation is useful for
+        jump-forward decoding and speculative decoding.
+    """
+
+    def __init__(
+        self,
+        compiled_grammar: CompiledGrammar,
+        *,
+        override_stop_tokens: Optional[Union[int, List[int]]] = None,
+        terminate_without_stop_token: bool = False,
+        max_rollback_tokens: int = 0,
+    ) -> None:
+        if not isinstance(compiled_grammar, CompiledGrammar):
+            raise ValueError("The grammar should be compiled before passing it to GrammarMatcher.")
+
+        if isinstance(override_stop_tokens, int):
+            override_stop_tokens = [override_stop_tokens]
+
+        self._init_handle(
+            _core.GrammarMatcher(
+                compiled_grammar._handle,
+                override_stop_tokens,
+                terminate_without_stop_token,
+                max_rollback_tokens,
+            )
+        )
+
+    def accept_token(self, token_id: int, *, debug_print: bool = False) -> bool:
+        """Accept one token and update the state of the matcher.
+
+        Parameters
+        ----------
+        token_id : int
+            The id of the token to accept.
+
+        debug_print : bool, default: False
+            Whether to print information about the internal state of the matcher. Helpful
+            for debugging.
+
+        Returns
+        -------
+        accepted : bool
+            Whether the token is accepted.
+        """
+        return self._handle.accept_token(token_id, debug_print)
+
+    def fill_next_token_bitmask(
+        self, bitmask: torch.Tensor, index: int = 0, *, debug_print: bool = False
+    ) -> bool:
+        """Fill the bitmask for the next token prediction. The input bitmask can be generated
+        by allocate_token_bitmask, and must be on CPU. bitmask[index] will be filled with the
+        next token bitmask.
+
+        This method does not change the matcher state.
+
+        Parameters
+        ----------
+        bitmask : torch.Tensor
+            The bitmask for the next token prediction.
+
+        index : int, default: 0
+            The batch id of the bitmask.
+
+        debug_print : bool, default: False
+            Whether to print information about generated bitmask. Helpful for debugging.
+
+        Returns
+        -------
+        need_apply : bool
+            Whether the bitmask need to be applied (not all-true). An optimization: if False,
+            this means the bitmask is already all-true, so no need to apply it.
+        """
+        if bitmask.device.type != "cpu":
+            raise ValueError("bitmask should be on CPU.")
+        if bitmask.dtype != bitmask_dtype:
+            raise ValueError(f"bitmask should be of type {bitmask_dtype}.")
+        return self._handle.fill_next_token_bitmask(
+            bitmask.data_ptr(), list(bitmask.shape), index, debug_print
+        )
+
+    def find_jump_forward_string(self) -> str:
+        """Find the jump-forward string for jump-forward decoding. This is the longest string that
+        certainly conforms with the current grammar from the current matcher state. This string
+        can become the output of the LLM without requiring LLM decoding.
+
+        This method does not change the matcher state.
+
+        Returns
+        -------
+        jump_forward_string : str
+            The jump-forward string.
+        """
+        return self._handle.find_jump_forward_string()
+
+    def rollback(self, num_tokens: int = 1) -> None:
+        """Rollback the matcher to a previous state by several tokens.
+
+        Parameters
+        ----------
+        num_tokens : int, default: 1
+            The number of tokens to rollback. It cannot exceed the current number of steps, nor can
+            it exceed the specified maximum number of rollback tokens.
+        """
+        self._handle.rollback(num_tokens)
+
+    def is_terminated(self) -> bool:
+        """Check if the matcher has terminated. If terminate_without_stop_token is False, the
+        matcher will terminate if it has accepted the stop token. Otherwise, the matcher will
+        terminate after matching the whole grammar.
+
+        Returns
+        -------
+        terminated : bool
+            Whether the matcher has terminated.
+        """
+        return self._handle.is_terminated()
+
+    def reset(self) -> None:
+        """Reset the matcher to the initial state."""
+        return self._handle.reset()
+
+    @property
+    def max_rollback_tokens(self) -> int:
+        """Get the maximum number of rollback tokens allowed.
+
+        Returns
+        -------
+        max_rollback_tokens : int
+            The maximum number of rollback tokens.
+        """
+        return self._handle.max_rollback_tokens
+
+    @property
+    def stop_token_ids(self) -> List[int]:
+        """The ids of the stop tokens used in the matcher. If specified, the provided stop tokens
+        will be used. Otherwise, the stop tokens will be detected from the vocabulary.
+
+        Returns
+        -------
+        stop_token_ids : List[int]
+            The ids of the stop tokens.
+        """
+        return self._handle.stop_token_ids
+
+    def _debug_accept_string(
+        self, input_str: Union[str, bytes], *, debug_print: bool = False
+    ) -> bool:
+        """Accept a string and update the state of the matcher. The whole string is considered
+        as one step in rollback. It is only used to complement the functionality of accept_token.
+
+        Parameters
+        ----------
+        input_str : Union[str, bytes]
+            The string to be accepted.
+
+        debug_print : bool, default: False
+            Whether to print information about the internal state of the matcher. Helpful for
+            debugging.
+
+        Returns
+        -------
+        accepted : bool
+            Whether the string is accepted.
+        """
+        return self._handle._debug_accept_string(input_str, debug_print)
diff --git a/.venv/lib/python3.11/site-packages/xgrammar/support/__init__.py b/.venv/lib/python3.11/site-packages/xgrammar/support/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/.venv/lib/python3.11/site-packages/xgrammar/support/__pycache__/__init__.cpython-311.pyc b/.venv/lib/python3.11/site-packages/xgrammar/support/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..00a5f9682ebf767c69cd0d2af86f356fe2a09d7d
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/xgrammar/support/__pycache__/__init__.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/xgrammar/support/__pycache__/logging.cpython-311.pyc b/.venv/lib/python3.11/site-packages/xgrammar/support/__pycache__/logging.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1212aabfad7c05df7bc2082e24ac434a3b04f3d6
Binary files /dev/null and b/.venv/lib/python3.11/site-packages/xgrammar/support/__pycache__/logging.cpython-311.pyc differ
diff --git a/.venv/lib/python3.11/site-packages/xgrammar/support/logging.py b/.venv/lib/python3.11/site-packages/xgrammar/support/logging.py
new file mode 100644
index 0000000000000000000000000000000000000000..7059b122c05f0dc1bc2d185cdc01eb2daebda744
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/xgrammar/support/logging.py
@@ -0,0 +1,21 @@
+"""
+Logging support for XGrammar. It derives from Python's logging module, and in the future,
+it can be easily replaced by other logging modules such as structlog.
+"""
+
+import logging
+
+
+def enable_logging():
+    """Enable XGrammar's default logging formpat"""
+    logging.basicConfig(
+        level=logging.INFO,
+        style="{",
+        datefmt="%Y-%m-%d %H:%M:%S",
+        format="[{asctime}] {levelname} {filename}:{lineno}: {message}",
+    )
+
+
+def getLogger(name: str):  # pylint: disable=invalid-name
+    """Get a logger according to the given name"""
+    return logging.getLogger(name)
diff --git a/.venv/lib/python3.11/site-packages/xgrammar/testing.py b/.venv/lib/python3.11/site-packages/xgrammar/testing.py
new file mode 100644
index 0000000000000000000000000000000000000000..e451f74f97403abdfbc6d1806af6bcee4e692e7e
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/xgrammar/testing.py
@@ -0,0 +1,208 @@
+"""Testing utilities."""
+
+import time
+from typing import List, Optional, Tuple, Union
+
+import torch
+
+from .base import _core
+from .compiler import CompiledGrammar, GrammarCompiler
+from .grammar import Grammar
+from .matcher import GrammarMatcher, bitmask_dtype, get_bitmask_shape
+from .tokenizer_info import TokenizerInfo
+
+
+def _json_schema_to_ebnf(
+    schema: str,
+    *,
+    any_whitespace: bool = True,
+    indent: Optional[int] = None,
+    separators: Optional[Tuple[str, str]] = None,
+    strict_mode: bool = True,
+) -> str:
+    """Convert JSON schema string to BNF grammar string. For test purposes.
+
+    Parameters
+    ----------
+    schema : str
+        The schema string.
+
+    indent : Optional[int], default: None
+        The number of spaces for indentation. If None, the output will be in one line.
+
+    separators : Optional[Tuple[str, str]], default: None
+        Two separators used in the schema: comma and colon. Examples: (",", ":"), (", ", ": ").
+        If None, the default separators will be used: (",", ": ") when the indent is not None,
+        and (", ", ": ") otherwise.
+
+    strict_mode : bool, default: True
+        Whether to use strict mode. In strict mode, the generated grammar will not allow
+        properties and items that is not specified in the schema. This is equivalent to
+        setting unevaluatedProperties and unevaluatedItems to false.
+
+        This helps LLM to generate accurate output in the grammar-guided generation with JSON
+        schema.
+
+    Returns
+    -------
+    bnf_string : str
+        The BNF grammar string.
+    """
+    return _core.testing._json_schema_to_ebnf(
+        schema,
+        any_whitespace,
+        indent,
+        separators,
+        strict_mode,
+    )
+
+
+def _regex_to_ebnf(regex: str, with_rule_name: bool = True) -> str:
+    r"""Convert a regex string to BNF grammar string. For test purposes. The regex grammar
+    follows the syntax in JavaScript (ECMA 262). Check
+    https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Regular_expressions
+    for a tutorial. Currently the following features are not supported:
+    1. Backreference (\1)
+    2. non-capturing group, naming capture groups and assertions ((?...))
+    3. Unicode character class escape (\p{...})
+    4. Word boundary (\b)
+    5. Unicode property escapes (\p{...})
+    6. Quantifier with range {x,y}. Now user can just repeat the element as a workaround.
+
+    This method is primarily intended for testing and debugging purposes.
+
+    Parameters
+    ----------
+    regex : str
+        The regex string to be converted.
+
+    Returns
+    -------
+    bnf_string : str
+        The BNF grammar string converted from the input regex.
+    """
+    return _core.testing._regex_to_ebnf(regex, with_rule_name)
+
+
+def _is_grammar_accept_string(
+    grammar: Union[Grammar, str],
+    input_str: str,
+    *,
+    debug_print: bool = False,
+    print_time: bool = False,
+) -> bool:
+    """Check if a grammar accepts a string. For test purposes.
+
+    Parameters
+    ----------
+    grammar : Union[Grammar, str]
+        The grammar to check. Can be either a Grammar object or a BNF grammar string.
+    input_str : str
+        The input string to check.
+    debug_print : bool, default: False
+        Whether to print debug information during matching.
+    print_time : bool, default: False
+        Whether to print timing information.
+
+    Returns
+    -------
+    bool
+        True if the grammar accepts the string, False otherwise.
+    """
+
+    if isinstance(grammar, str):
+        grammar = Grammar.from_ebnf(grammar)
+    grammar_compiler = GrammarCompiler(TokenizerInfo([]), cache_enabled=False)
+    compiled_grammar = grammar_compiler.compile_grammar(grammar)
+    matcher = GrammarMatcher(compiled_grammar, terminate_without_stop_token=True)
+
+    if print_time:
+        start = time.monotonic_ns()
+    accepted = matcher._debug_accept_string(input_str, debug_print=debug_print)
+
+    if print_time:
+        end = time.monotonic_ns()
+        print(f"Accepting {input_str}, result: {accepted}, time: {(end - start) / 1e3} us")
+
+    if not accepted:
+        return False
+    return matcher.is_terminated()
+
+
+def _get_masked_tokens_from_bitmask(
+    bitmask: torch.Tensor, vocab_size: int, index: int = 0
+) -> List[int]:
+    """Get the ids of the rejected tokens from the bitmask. Mainly for debug purposes.
+
+    Parameters
+    ----------
+    bitmask : torch.Tensor
+        The rejected token bitmask. Should be generated by allocate_token_bitmask and
+        filled by fill_next_token_bitmask. Should be on CPU.
+
+    index : int, default: 0
+        The batch index of the bitmask. For batch inference, bitmask[index] will be used.
+        Otherwise is ignored.
+
+    Returns
+    -------
+    rejected_token_ids : List[int]
+        A list of rejected token ids.
+    """
+    if bitmask.device.type != "cpu":
+        raise ValueError("bitmask should be on CPU.")
+    if bitmask.dtype != bitmask_dtype:
+        raise ValueError(f"bitmask should be of type {bitmask_dtype}.")
+    return _core.testing._get_masked_tokens_from_bitmask(
+        bitmask.data_ptr(), list(bitmask.shape), vocab_size, index
+    )
+
+
+def _get_matcher_from_grammar_and_tokenizer_info(
+    grammar: Union[Grammar, str], tokenizer_info: Optional[TokenizerInfo] = None, **kwargs
+) -> GrammarMatcher:
+    """Create a GrammarMatcher from a grammar and tokenizer info.
+
+    Parameters
+    ----------
+    grammar : Union[Grammar, str]
+        The grammar to create the matcher from. Can be either a Grammar object or a string
+        containing EBNF grammar.
+    tokenizer_info : Optional[TokenizerInfo], default: None
+        Information about the tokenizer to use with this grammar. If None, an empty
+        TokenizerInfo will be created.
+    **kwargs
+        Additional keyword arguments to pass to the GrammarMatcher constructor.
+
+    Returns
+    -------
+    matcher : GrammarMatcher
+        The created grammar matcher.
+    """
+    if tokenizer_info is None:
+        tokenizer_info = TokenizerInfo([])
+    grammar_compiler = GrammarCompiler(tokenizer_info, cache_enabled=False)
+    compiled_grammar = grammar_compiler.compile_grammar(grammar)
+    return GrammarMatcher(compiled_grammar, **kwargs)
+
+
+def _get_grammar_union(*grammars: "Grammar") -> "Grammar":
+    """Create a grammar that matches any of the grammars in the list. That is equivalent to
+    using the `|` operator to concatenate the grammars in the list.
+
+    Parameters
+    ----------
+    grammars : List[Grammar]
+        The grammars to create the union of.
+
+    Returns
+    -------
+    grammar : Grammar
+        The union of the grammars.
+    """
+    grammar_handles = [grammar._handle for grammar in grammars]
+    return Grammar._create_from_handle(_core.Grammar.union(grammar_handles))
+
+
+def _get_allow_empty_rule_ids(compiled_grammar: CompiledGrammar) -> List[int]:
+    return _core.testing._get_allow_empty_rule_ids(compiled_grammar._handle)
diff --git a/.venv/lib/python3.11/site-packages/xgrammar/tokenizer_info.py b/.venv/lib/python3.11/site-packages/xgrammar/tokenizer_info.py
new file mode 100644
index 0000000000000000000000000000000000000000..75a00d085080cc7d31086ffff2293eb440e020cf
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/xgrammar/tokenizer_info.py
@@ -0,0 +1,333 @@
+"""This module provides the tokenizer info class to handle the tokenizer information."""
+
+from enum import Enum
+from typing import List, Optional, Union
+
+import sentencepiece
+import tiktoken
+from transformers import PreTrainedTokenizerBase, PreTrainedTokenizerFast
+
+from .base import XGRObject, _core
+from .support import logging
+
+logging.enable_logging()
+logger = logging.getLogger(__name__)
+
+
+class VocabType(Enum):
+    """The type of the vocabulary. Used in TokenizerInfo. XGrammar supports three types of
+    vocabularies:
+
+    RAW
+        The vocabulary is in the raw format. The tokens in the vocabulary are kept in their
+        original form without any processing. This kind of tokenizer includes the tiktoken
+        tokenizer, e.g. microsoft/Phi-3-small-8k-instruct, Qwen/Qwen-7B-Chat, etc.
+
+    BYTE_FALLBACK
+        The vocabulary used in the byte fallback BPE tokenizer. The tokens are encoded through
+        the byte-fallback conversion. E.g. "\u001B" -> "<0x1B>", " apple" -> "▁apple". This kind of
+        tokenizer includes meta-llama/Llama-2-7b-chat, microsoft/Phi-3.5-mini-instruct, etc.
+
+    BYTE_LEVEL
+        The vocabulary used in the byte level BPE tokenizer. The tokens are encoded through
+        the byte-to-unicode conversion, as in
+        https://github.com/huggingface/transformers/blob/87be06ca77166e6a6215eee5a990ab9f07238a18/src/transformers/models/gpt2/tokenization_gpt2.py#L38-L59
+
+        This kind of tokenizer includes meta-llama/Meta-Llama-3-8B-Instruct,
+        meta-llama/Meta-Llama-3.1-8B-Instruct, etc.
+    """
+
+    RAW = "RAW"
+    BYTE_FALLBACK = "BYTE_FALLBACK"
+    BYTE_LEVEL = "BYTE_LEVEL"
+
+
+class TokenizerInfo(XGRObject):
+    """The tokenizer info contains the vocabulary, the type of the vocabulary, and necessary
+    information for the grammar-guided generation.
+
+    Note that although some tokenizers will encode the tokens in a special format, e.g.
+    "<0x1B>" for "\u001B" in the ByteFallback tokenizer, and "Ġ" for " " in the Byte-Level BPE
+    tokenizer, TokenizerInfo always decodes the vocabulary to the original format (e.g. "\u001B"
+    and " ").
+
+    Also note that some models (e.g. Phi-3 and Deepseek-V2) may pad the vocabulary to a multiple
+    of 32. In this case, the model's vocab_size is larger than the tokenizer's vocabulary size.
+    Please pass the model's vocab_size to the vocab_size parameter in the constructor, because
+    this information is used to determine the size of the token mask.
+
+    Parameters
+    ----------
+    encoded_vocab : Union[List[bytes], List[str]]
+        The encoded vocabulary of the tokenizer.
+
+    vocab_type : VocabType, default: VocabType.RAW
+        The type of the vocabulary. See also VocabType.
+
+    vocab_size : Optional[int], default: None
+        The size of the vocabulary. If not provided, the vocabulary size will be len(encoded_vocab).
+
+    stop_token_ids : Optional[List[int]], default: None
+        The stop token ids. If not provided, the stop token ids will be auto detected (but may not
+        be correct).
+
+    prepend_space_in_tokenization : bool, default: False
+        Whether the tokenizer will prepend a space before the text in the tokenization process.
+    """
+
+    def __init__(
+        self,
+        encoded_vocab: Union[List[bytes], List[str]],
+        vocab_type: VocabType = VocabType.RAW,
+        *,
+        vocab_size: Optional[int] = None,
+        stop_token_ids: Optional[Union[List[int], int]] = None,
+        prepend_space_in_tokenization: bool = False,
+    ) -> None:
+        if isinstance(stop_token_ids, int):
+            stop_token_ids = [stop_token_ids]
+        self._init_handle(
+            _core.TokenizerInfo(
+                encoded_vocab,
+                vocab_type.value,
+                vocab_size,
+                stop_token_ids,
+                prepend_space_in_tokenization,
+            )
+        )
+
+    @staticmethod
+    def _is_tiktoken_tokenizer(tokenizer: PreTrainedTokenizerBase) -> bool:
+        # helper to check if tokenizer is a tiktoken tokenizer
+        has_tiktoken_encoding = hasattr(tokenizer, "tokenizer") and isinstance(
+            tokenizer.tokenizer, tiktoken.Encoding
+        )
+
+        filename_pattern = (
+            "vocab_file" in tokenizer.vocab_files_names
+            and "tiktoken" in tokenizer.vocab_files_names["vocab_file"]
+        )
+
+        return has_tiktoken_encoding or filename_pattern
+
+    @staticmethod
+    def _is_sentencepiece_tokenizer(tokenizer: PreTrainedTokenizerBase) -> bool:
+        # helper to check if tokenizer is a sentence piece tokenizer
+        has_sp_model_attr = hasattr(tokenizer, "sp_model") and isinstance(
+            tokenizer.sp_model, sentencepiece.SentencePieceProcessor
+        )
+
+        has_nested_sp_model_attr = (
+            hasattr(tokenizer, "tokenizer")
+            and hasattr(tokenizer.tokenizer, "sp_model")
+            and isinstance(tokenizer.tokenizer.sp_model, sentencepiece.SentencePieceProcessor)
+        )
+
+        return has_sp_model_attr or has_nested_sp_model_attr
+
+    @staticmethod
+    def from_huggingface(
+        tokenizer: PreTrainedTokenizerBase,
+        *,
+        vocab_size: Optional[int] = None,
+        stop_token_ids: Optional[Union[List[int], int]] = None,
+    ) -> "TokenizerInfo":
+        """Construct the tokenizer info from the huggingface tokenizer. This constructor supports
+        various tokenizer backends, including the huggingface fast tokenizer and tiktoken tokenizer.
+        Necessary information is automatically detected from the tokenizer.
+
+        Note that some models (e.g. Phi-3 and Deepseek-V2) may pad the vocabulary to a multiple
+        of 32. In this case, the model's vocab_size is larger than the tokenizer's vocabulary
+        size. Please pass the model's vocab_size (this should be defined in the model config)
+        to the vocab_size parameter in the constructor, because this information is used to
+        determine the size of the token mask.
+
+        Some models can have more than one stop token ids, and auto detection may not find all
+        of them. In this case, you can specify the stop token ids manually.
+
+        Parameters
+        ----------
+        tokenizer : PreTrainedTokenizerBase
+            The huggingface tokenizer.
+
+        vocab_size : Optional[int], default: None
+            The size of the vocabulary. If not provided, the vocabulary size will be
+            len(encoded_vocab).
+
+        stop_token_ids : Optional[List[int]], default: None
+            The stop token ids. If not provided, the stop token ids will be auto detected
+            (but may not be correct).
+
+        Returns
+        -------
+        tokenizer_info : TokenizerInfo
+            The tokenizer info.
+        """
+
+        if isinstance(stop_token_ids, int):
+            stop_token_ids = [stop_token_ids]
+        if isinstance(stop_token_ids, list) and len(stop_token_ids) == 0:
+            raise ValueError("stop_token_ids cannot be empty")
+
+        try:
+            vocab_dict = tokenizer.get_vocab()
+        except AttributeError as e:
+            msg = (
+                f"Cannot get the vocabulary of the tokenizer {type(tokenizer)}. The tokenizer "
+                "should have a get_vocab method."
+            )
+            raise ValueError(msg) from e
+
+        max_id = max(vocab_dict.values()) if vocab_dict else -1
+        detected_vocab_size = max(len(vocab_dict), max_id + 1)
+        if vocab_size is None:
+            vocab_size = detected_vocab_size
+        else:
+            if vocab_size < detected_vocab_size:
+                msg = f"Input vocab_size less than minimum viable vocab size for tokenizer {type(tokenizer)}."
+                raise ValueError(msg)
+
+        # maintain tokenizer's indexing
+        encoded_vocab = ["" for _ in range(vocab_size)]
+        for token, idx in vocab_dict.items():
+            encoded_vocab[idx] = token
+
+        if isinstance(tokenizer, PreTrainedTokenizerFast):
+            # huggingface fast tokenizer
+            # - the vocabulary is directly obtained from tokenizer.get_vocab()
+            #   (tokenizer.backend_tokenizer.to_str() may not contain the full vocab, special
+            #   tokens may be omitted)
+            # - the vocab size is obtained from len(tokenizer.get_vocab()) or provided by user
+            # - the vocab type and prepend_space_in_tokenization are obtained from
+            #   tokenizer.backend_tokenizer.to_str()
+            # - stop token id is provided by user, or auto detected.
+            backend_str = tokenizer.backend_tokenizer.to_str()
+            if stop_token_ids is None:
+                if hasattr(tokenizer, "eos_token_id") and tokenizer.eos_token_id is not None:
+                    stop_token_ids = [tokenizer.eos_token_id]
+                else:
+                    logger.warning(
+                        "When constructing TokenizerInfo from a huggingface tokenizer, "
+                        "stop_token_ids is neither provided by user nor found from the tokenizer. "
+                        "It will be automatically detected."
+                    )
+            return TokenizerInfo._create_from_handle(
+                _core.TokenizerInfo.from_huggingface(
+                    encoded_vocab, backend_str, vocab_size, stop_token_ids
+                )
+            )
+        elif TokenizerInfo._is_tiktoken_tokenizer(tokenizer):
+            # tiktoken tokenizer
+            # e.g. Phi-3-small-8k-instruct, Qwen-7B-Chat, stablelm-2-12b-chat (previously)
+            if stop_token_ids is None:
+                if hasattr(tokenizer, "eos_token_id") and tokenizer.eos_token_id is not None:
+                    stop_token_ids = [tokenizer.eos_token_id]
+                else:
+                    logger.warning(
+                        "When constructing TokenizerInfo from a huggingface tokenizer, "
+                        "stop_token_ids is neither provided by user nor found from the tokenizer. "
+                        "It will be automatically detected."
+                    )
+            return TokenizerInfo(
+                encoded_vocab,
+                VocabType.RAW,
+                vocab_size=vocab_size,
+                stop_token_ids=stop_token_ids,
+                prepend_space_in_tokenization=False,
+            )
+        elif TokenizerInfo._is_sentencepiece_tokenizer(tokenizer):
+            # sentencepiece tokenizer
+            # e.g. Chatglm3-6b
+            if hasattr(tokenizer, "sp_model"):
+                sp_model = tokenizer.sp_model
+            elif hasattr(tokenizer, "tokenizer") and hasattr(tokenizer.tokenizer, "sp_model"):
+                sp_model = tokenizer.tokenizer.sp_model
+
+            if stop_token_ids is None:
+                if hasattr(tokenizer, "eos_token_id") and tokenizer.eos_token_id is not None:
+                    stop_token_ids = [tokenizer.eos_token_id]
+                else:
+                    eos_id = sp_model.eos_id()
+                    if eos_id != -1:
+                        stop_token_ids = [eos_id]
+                    else:
+                        logger.warning(
+                            "When constructing TokenizerInfo from a huggingface tokenizer, "
+                            "stop_token_ids is neither provided by user nor found from the tokenizer. "
+                            "It will be automatically detected."
+                        )
+            # detect vocab_type of tokenizer
+            if "<0x0A>" in vocab_dict:
+                vocab_type = VocabType.BYTE_FALLBACK
+            else:
+                vocab_type = VocabType.RAW
+
+            return TokenizerInfo(
+                encoded_vocab,
+                vocab_type=vocab_type,
+                vocab_size=vocab_size,
+                stop_token_ids=stop_token_ids,
+                prepend_space_in_tokenization=True,
+            )
+        else:
+            # TODO(yixin): unsupported tokenizer
+            raise ValueError(f"Unsupported tokenizer type: {type(tokenizer)}")
+
+    @property
+    def vocab_type(self) -> VocabType:
+        """The type of the vocabulary."""
+        return VocabType(self._handle.vocab_type)
+
+    @property
+    def vocab_size(self) -> int:
+        """The size of the vocabulary."""
+        return self._handle.vocab_size
+
+    @property
+    def prepend_space_in_tokenization(self) -> bool:
+        """Whether the tokenizer will prepend a space before the text in the tokenization
+        process."""
+        return self._handle.prepend_space_in_tokenization
+
+    @property
+    def decoded_vocab(self) -> List[bytes]:
+        """The decoded vocabulary of the tokenizer. This converts the tokens in the LLM's
+        vocabulary back to the original format of the input text. E.g. for type ByteFallback,
+        the token <0x1B> is converted back to "\u001B".
+        """
+        return self._handle.decoded_vocab
+
+    @property
+    def stop_token_ids(self) -> List[int]:
+        """The stop token ids."""
+        return self._handle.stop_token_ids
+
+    @property
+    def special_token_ids(self) -> List[int]:
+        """The special token ids. Special tokens include control tokens, reserved tokens,
+        padded tokens, etc. Now it is automatically detected from the vocabulary."""
+        return self._handle.special_token_ids
+
+    def dump_metadata(self) -> str:
+        """Dump the metadata of the tokenizer to a json string. It can be used to construct the
+        tokenizer info from the vocabulary and the metadata string."""
+        return self._handle.dump_metadata()
+
+    @staticmethod
+    def from_vocab_and_metadata(
+        encoded_vocab: List[Union[bytes, str]], metadata: str
+    ) -> "TokenizerInfo":
+        """Construct the tokenizer info from the vocabulary and the metadata string in json
+        format.
+
+        Parameters
+        ----------
+        encoded_vocab : List[Union[bytes, str]]
+            The encoded vocabulary of the tokenizer.
+
+        metadata : str
+            The metadata string in json format.
+        """
+        return TokenizerInfo._create_from_handle(
+            _core.TokenizerInfo.from_vocab_and_metadata(encoded_vocab, metadata),
+        )
diff --git a/.venv/lib/python3.11/site-packages/xgrammar/version.py b/.venv/lib/python3.11/site-packages/xgrammar/version.py
new file mode 100644
index 0000000000000000000000000000000000000000..3f015a954279f39f1ea0670cc32493297e311359
--- /dev/null
+++ b/.venv/lib/python3.11/site-packages/xgrammar/version.py
@@ -0,0 +1,144 @@
+# pylint: disable=missing-docstring
+import argparse
+import logging
+import os
+import subprocess
+
+# Modify the following value during release
+# ---------------------------------------------------
+# Current version:
+# We use the version of the incoming release for code
+# that is under development.
+#
+# It is also fallback version to be used when --git-describe
+# is not invoked, or when the repository does not present the
+# git tags in a format that this script can use.
+#
+# Two tag formats are supported:
+# - vMAJ.MIN.PATCH (e.g. v0.8.0) or
+# - vMAJ.MIN.devN (e.g. v0.8.dev0)
+
+# ---------------------------------------------------
+
+__version__ = "0.1.11"
+PROJ_ROOT = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
+
+
+def py_str(cstr):
+    return cstr.decode("utf-8")
+
+
+def git_describe_version():
+    """Get PEP-440 compatible public and local version using git describe.
+
+    Returns
+    -------
+    pub_ver: str
+        Public version.
+
+    local_ver: str
+        Local version (with additional label appended to pub_ver).
+
+    Notes
+    -----
+    - We follow PEP 440's convention of public version
+      and local versions.
+    - Only tags conforming to vMAJOR.MINOR.REV (e.g. "v0.7.0")
+      are considered in order to generate the version string.
+      See the use of `--match` in the `git` command below.
+
+    Here are some examples:
+
+    - pub_ver = '0.7.0', local_ver = '0.7.0':
+      We are at the 0.7.0 release.
+    - pub_ver =  '0.8.dev94', local_ver = '0.8.dev94+g0d07a329e':
+      We are at the 0.8 development cycle.
+      The current source contains 94 additional commits
+      after the most recent tag(v0.7.0),
+      the git short hash tag of the current commit is 0d07a329e.
+    """
+    cmd = [
+        "git",
+        "describe",
+        "--tags",
+        "--match",
+        "v[0-9]*.[0-9]*.[0-9]*",
+        "--match",
+        "v[0-9]*.[0-9]*.dev[0-9]*",
+    ]
+    with subprocess.Popen(
+        cmd,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        cwd=PROJ_ROOT,
+    ) as proc:
+        (out, _) = proc.communicate()
+
+    if proc.returncode != 0:
+        msg = py_str(out)
+        logging.warning("git describe: %s", msg)
+        return None, None
+    describe = py_str(out).strip()
+    arr_info = describe.split("-")
+
+    # Remove the v prefix, mainly to be robust
+    # to the case where v is not presented as well.
+    if arr_info[0].startswith("v"):
+        arr_info[0] = arr_info[0][1:]
+
+    # hit the exact tag
+    if len(arr_info) == 1:
+        return arr_info[0], arr_info[0]
+
+    if len(arr_info) != 3:
+        logging.warning("Invalid output from git describe %s", describe)
+        return None, None
+
+    dev_pos = arr_info[0].find(".dev")
+
+    # Development versions:
+    # The code will reach this point in case it can't match a full release version, such as v0.7.0.
+    #
+    # 1. in case the last known label looks like vMAJ.MIN.devN e.g. v0.8.dev0, we use
+    # the current behavior of just using vMAJ.MIN.devNNNN+gGIT_REV
+    if dev_pos != -1:
+        dev_version = arr_info[0][: arr_info[0].find(".dev")]
+    # 2. in case the last known label looks like vMAJ.MIN.PATCH e.g. v0.8.0
+    # then we just carry on with a similar version to what git describe provides, which is
+    # vMAJ.MIN.PATCH.devNNNN+gGIT_REV
+    else:
+        dev_version = arr_info[0]
+
+    pub_ver = f"{dev_version}.dev{arr_info[1]}"
+    local_ver = f"{pub_ver}+{arr_info[2]}"
+    return pub_ver, local_ver
+
+
+def main():
+    logging.basicConfig(level=logging.INFO)
+    parser = argparse.ArgumentParser(description="Detect and synchronize version.")
+    parser.add_argument(
+        "--print-version",
+        action="store_true",
+        help="Print version to the command line. No changes is applied to files.",
+    )
+    parser.add_argument(
+        "--git-describe",
+        action="store_true",
+        help="Use git describe to generate development version.",
+    )
+    parser.add_argument("--dry-run", action="store_true")
+    opt = parser.parse_args()
+    pub_ver, local_ver = None, None
+    if opt.git_describe:
+        pub_ver, local_ver = git_describe_version()
+    if pub_ver is None:
+        pub_ver = __version__
+    if local_ver is None:
+        local_ver = __version__
+    if opt.print_version:
+        print(local_ver)
+
+
+if __name__ == "__main__":
+    main()