wwydmanski commited on Oct 22, 2025

Commit

03d6533

verified ·

1 Parent(s): 48606e1

Upload folder using huggingface_hub

Browse files

Files changed (22) hide show

.gitattributes +1 -0
TabularModel.py +701 -0
added_tokens.json +24 -0
chat_template.jinja +7 -0
config.json +134 -0
generation_config.json +11 -0
merges.txt +0 -0
model-00001-of-00007.safetensors +3 -0
model-00002-of-00007.safetensors +3 -0
model-00003-of-00007.safetensors +3 -0
model-00004-of-00007.safetensors +3 -0
model-00005-of-00007.safetensors +3 -0
model-00006-of-00007.safetensors +3 -0
model-00007-of-00007.safetensors +3 -0
model.safetensors.index.json +781 -0
preprocessor_config.json +39 -0
special_tokens_map.json +31 -0
tokenizer.json +3 -0
tokenizer_config.json +208 -0
utils.py +385 -0
video_preprocessor_config.json +43 -0
vocab.json +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

TabularModel.py ADDED Viewed

	@@ -0,0 +1,701 @@

+from transformers import (
+    AutoConfig,
+    AutoProcessor,
+    ProcessorMixin,
+    Qwen2TokenizerFast,
+    BaseImageProcessor,
+    Qwen2_5_VLForConditionalGeneration,
+)
+from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import (
+    Qwen2_5_VLCausalLMOutputWithPast,
+    Qwen2RMSNorm,
+)
+from transformers.tokenization_utils_base import PreTokenizedInput, TextInput
+from transformers.processing_utils import Unpack
+from transformers.feature_extraction_sequence_utils import BatchFeature
+from typing import List, Optional, TypedDict
+# from tabpfn_extensions import TabPFNRegressor
+# from tabpfn_extensions.embedding import TabPFNEmbedding
+import numpy as np
+import torch
+from torch import nn
+from torch.nn import CrossEntropyLoss
+from pprint import pprint
+class TabularProcessorKwargs(TypedDict):
+    """
+    Keyword arguments for tabular processing.
+    """
+    pass
+class TabularPreprocessor(BaseImageProcessor):
+    def __call__(self, X: list | np.ndarray | torch.Tensor) -> torch.Tensor:
+        if not isinstance(X, list):
+            X = [X]
+        res = []
+        for X_sample in X:
+            if isinstance(X_sample, torch.Tensor):
+                X_sample = X_sample.cpu().numpy()
+            res.append(X_sample)
+        res = np.array(res)
+        return BatchFeature(data={"tabular_values": torch.from_numpy(res).to(torch.float32)})
+AutoProcessor.register("TabularPreprocessor", TabularPreprocessor)
+class TabularProcessor(nn.Module):
+    def __init__(self, **kwargs: Unpack[TabularProcessorKwargs]):
+        super().__init__(**kwargs)
+        self.tabpfn = TabPFNRegressor(
+            n_estimators=1,
+            model_path="./tabpfn-v2-regressor.ckpt", device="cuda:1"
+        )
+    def __call__(self, X: np.ndarray | torch.Tensor) -> torch.Tensor:
+        # Will convert specified categorical indices to category dtype, as well
+        # as handle `np.object` arrays or otherwise `object` dtype pandas columns.
+        if len(X.shape) == 2:
+            X = [X]
+        res = []
+        for X_sample in X:
+            if isinstance(X_sample, torch.Tensor):
+                X_sample = X_sample.cpu().numpy()
+            X_sample = X_sample[0]
+            self.tabpfn.fit(X_sample, np.random.random(X_sample.shape[0]))
+            embs = self.tabpfn.get_embeddings(X_sample)
+            embs_t = torch.from_numpy(embs).to(self.tabpfn.device)
+            embs_t = embs_t.mean(dim=0)
+            res.append(embs_t)
+        res = torch.stack(res)
+        res = res.view(-1, 192)
+        return res
+class TabularBlock(nn.Module):
+    def __init__(self, input_dim: int, hidden_dim: int = 192):
+        super().__init__()
+        self.linear1 = nn.Linear(input_dim, hidden_dim)
+        self.activation = nn.GELU()
+        self.linear2 = nn.Linear(hidden_dim, input_dim)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        residual = x
+        x = self.linear1(x)
+        x = self.activation(x)
+        x = self.linear2(x)
+        return x + residual
+class TabularLearnableProcessor(nn.Module):
+    def __init__(self, num_features: int = 1):
+        super().__init__()
+        # Each cell is processed individually as a scalar
+        self.input_proj = nn.Linear(num_features, 192)
+        self.nodes = nn.Sequential(
+            nn.GELU(),
+            TabularBlock(192, 64),
+            nn.GELU(),
+            TabularBlock(192, 64),
+            nn.GELU(),
+            TabularBlock(192, 64),
+            nn.GELU(),
+            TabularBlock(192, 64),
+            nn.GELU(),
+            TabularBlock(192, 64),
+            nn.GELU(),
+            TabularBlock(192, 64),
+            nn.GELU(),
+            TabularBlock(192, 64),
+        )
+    def forward(self, X: np.ndarray | torch.Tensor) -> torch.Tensor:
+        if isinstance(X, np.ndarray):
+            X = torch.from_numpy(X)
+        param_dtype = self.input_proj.weight.dtype
+        X = X.to(param_dtype)
+        # Flatten the table - each cell becomes a separate token
+        # X shape: (batch_size, rows, cols) -> (batch_size * rows * cols, 1)
+        batch_size = X.shape[0]
+        X_flat = X.reshape(-1, 1)  # Flatten to individual cells
+        # RMS normalization per cell for stability
+        # X_normalized = X_flat * torch.rsqrt(X_flat.pow(2) + 1e-5)
+        projected = self.input_proj(X_flat)
+        # res = self.nodes(projected)
+        return projected
+class Qwen_2_5_TabularProcessor(ProcessorMixin):
+    r"""
+    Constructs a Qwen2.5-VL processor which wraps a Qwen2.5-VL image processor and a Qwen2 tokenizer into a single processor.
+    [`Qwen2_5_VLProcessor`] offers all the functionalities of [`Qwen2VLImageProcessor`] and [`Qwen2TokenizerFast`]. See the
+    [`~Qwen2_5_VLProcessor.__call__`] and [`~Qwen2_5_VLProcessor.decode`] for more information.
+    Args:
+        image_processor ([`Qwen2VLImageProcessor`], *optional*):
+            The image processor is a required input.
+        tokenizer ([`Qwen2TokenizerFast`], *optional*):
+            The tokenizer is a required input.
+        chat_template (`str`, *optional*): A Jinja template which will be used to convert lists of messages
+            in a chat into a tokenizable string.
+    """
+    attributes = ["tokenizer"]
+    valid_kwargs = ["chat_template"]
+    tokenizer_class = ("Qwen2Tokenizer", "Qwen2TokenizerFast")
+    def __init__(
+        self,
+        tabular_processor: TabularPreprocessor | None = None,
+        tokenizer=None,
+        chat_template=None,
+        **kwargs,
+    ):
+        self.tabular_token = (
+            "<|tabular_pad|>"
+            if not hasattr(tokenizer, "tabular_token")
+            else tokenizer.tabular_token
+        )
+        self.tabular_processor = tabular_processor
+        super().__init__(tokenizer, chat_template=chat_template)
+    def __call__(
+        self,
+        tabular_values: np.ndarray | torch.Tensor | None = None,
+        text: TextInput | PreTokenizedInput | list[TextInput] | list[PreTokenizedInput] | None = None,
+        **kwargs: Unpack[TabularProcessorKwargs],
+    ) -> BatchFeature:
+        """
+        Main method to prepare for the model one or several sequences(s) and image(s). This method forwards the `text`
+        and `kwargs` arguments to Qwen2TokenizerFast's [`~Qwen2TokenizerFast.__call__`] if `text` is not `None` to encode
+        the text. To prepare the vision inputs, this method forwards the `vision_infos` and `kwrags` arguments to
+        Qwen2VLImageProcessor's [`~Qwen2VLImageProcessor.__call__`] if `vision_infos` is not `None`.
+        Args:
+            images (`PIL.Image.Image`, `np.ndarray`, `torch.Tensor`, `List[PIL.Image.Image]`, `List[np.ndarray]`, `List[torch.Tensor]`):
+                The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or PyTorch
+                tensor. Both channels-first and channels-last formats are supported.
+            text (`str`, `List[str]`, `List[List[str]]`):
+                The sequence or batch of sequences to be encoded. Each sequence can be a string or a list of strings
+                (pretokenized string). If the sequences are provided as list of strings (pretokenized), you must set
+                `is_split_into_words=True` (to lift the ambiguity with a batch of sequences).
+            videos (`np.ndarray`, `torch.Tensor`, `List[np.ndarray]`, `List[torch.Tensor]`):
+                The image or batch of videos to be prepared. Each video can be a 4D NumPy array or PyTorch
+                tensor, or a nested list of 3D frames. Both channels-first and channels-last formats are supported.
+            return_tensors (`str` or [`~utils.TensorType`], *optional*):
+                If set, will return tensors of a particular framework. Acceptable values are:
+                - `'tf'`: Return TensorFlow `tf.constant` objects.
+                - `'pt'`: Return PyTorch `torch.Tensor` objects.
+                - `'np'`: Return NumPy `np.ndarray` objects.
+                - `'jax'`: Return JAX `jnp.ndarray` objects.
+        Returns:
+            [`BatchFeature`]: A [`BatchFeature`] with the following fields:
+            - **input_ids** -- List of token ids to be fed to a model. Returned when `text` is not `None`.
+            - **attention_mask** -- List of indices specifying which tokens should be attended to by the model (when
+              `return_attention_mask=True` or if *"attention_mask"* is in `self.model_input_names` and if `text` is not
+              `None`).
+            - **pixel_values** -- Pixel values to be fed to a model. Returned when `images` is not `None`.
+            - **pixel_values_videos** -- Pixel values of videos to be fed to a model. Returned when `videos` is not `None`.
+            - **image_grid_thw** -- List of image 3D grid in LLM. Returned when `images` is not `None`.
+            - **video_grid_thw** -- List of video 3D grid in LLM. Returned when `videos` is not `None`.
+            - **second_per_grid_ts** -- List of video seconds per time grid. Returned when `videos` is not `None`.
+        """
+        # print("Tabular values: ", tabular_values)
+        if tabular_values is not None:
+            tabular_inputs = self.tabular_processor(tabular_values)
+        else:
+            print("Warning! No tabular values provided!")
+            tabular_inputs = {}
+        if not isinstance(text, list):
+            text = [text]
+        if tabular_values is not None:
+            index = 0
+            for i in range(len(text)):
+                while self.tabular_token in text[i]:
+                    # Each cell becomes a token: num_tokens = rows * cols
+                    table_shape = tabular_inputs["tabular_values"][index].shape
+                    rows, cols = table_shape[0], table_shape[1]
+                    # Build pattern: for each row, add col tokens + row separator
+                    row_pattern = "<|placeholder|>" * cols + "<|tabular_row|>"
+                    replacement = row_pattern * rows
+                    text[i] = text[i].replace(
+                        self.tabular_token,
+                        replacement,
+                        1,
+                    )
+                    index += 1
+                text[i] = text[i].replace("<|placeholder|>", self.tabular_token)
+        text_inputs = self.tokenizer(text, **kwargs)
+        return BatchFeature(data={**text_inputs, **tabular_inputs})
+    def batch_decode(self, *args, **kwargs):
+        """
+        This method forwards all its arguments to Qwen2TokenizerFast's [`~PreTrainedTokenizer.batch_decode`]. Please
+        refer to the docstring of this method for more information.
+        """
+        return self.tokenizer.batch_decode(*args, **kwargs)
+    def decode(self, *args, **kwargs):
+        """
+        This method forwards all its arguments to Qwen2TokenizerFast's [`~PreTrainedTokenizer.decode`]. Please refer to
+        the docstring of this method for more information.
+        """
+        return self.tokenizer.decode(*args, **kwargs)
+    def post_process_image_text_to_text(
+        self,
+        generated_outputs,
+        skip_special_tokens=True,
+        clean_up_tokenization_spaces=False,
+        **kwargs,
+    ):
+        """
+        Post-process the output of the model to decode the text.
+        Args:
+            generated_outputs (`torch.Tensor` or `np.ndarray`):
+                The output of the model `generate` function. The output is expected to be a tensor of shape `(batch_size, sequence_length)`
+                or `(sequence_length,)`.
+            skip_special_tokens (`bool`, *optional*, defaults to `True`):
+                Whether or not to remove special tokens in the output. Argument passed to the tokenizer's `batch_decode` method.
+            Clean_up_tokenization_spaces (`bool`, *optional*, defaults to `False`):
+                Whether or not to clean up the tokenization spaces. Argument passed to the tokenizer's `batch_decode` method.
+            **kwargs:
+                Additional arguments to be passed to the tokenizer's `batch_decode method`.
+        Returns:
+            `List[str]`: The decoded text.
+        """
+        return self.tokenizer.batch_decode(
+            generated_outputs,
+            skip_special_tokens=skip_special_tokens,
+            clean_up_tokenization_spaces=clean_up_tokenization_spaces,
+            **kwargs,
+        )
+    @property
+    def model_input_names(self):
+        tokenizer_input_names = self.tokenizer.model_input_names
+        tabular_processor_input_names = self.tabular_processor.model_input_names if hasattr(self.tabular_processor, 'model_input_names') else []
+        names_from_processor = list(
+            dict.fromkeys(tokenizer_input_names + tabular_processor_input_names)
+        )
+        return names_from_processor + ["tabular_values"]
+class Qwen2_5_TabularModel(Qwen2_5_VLForConditionalGeneration):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.tabular_processor = TabularLearnableProcessor(num_features=1)
+        self.tabular_projection = nn.Sequential(
+            nn.Linear(192, self.config.hidden_size),
+            nn.ReLU(),
+            TabularBlock(self.config.hidden_size, self.config.hidden_size),
+            nn.ReLU(),
+            TabularBlock(self.config.hidden_size, self.config.hidden_size),
+            nn.ReLU(),
+            TabularBlock(self.config.hidden_size, self.config.hidden_size),
+        )
+    def forward(
+        self,
+        input_ids: Optional[torch.LongTensor] = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_values: Optional[List[torch.FloatTensor]] = None,
+        inputs_embeds: Optional[torch.FloatTensor] = None,
+        labels: Optional[torch.LongTensor] = None,
+        use_cache: Optional[bool] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+        pixel_values: Optional[torch.Tensor] = None,
+        pixel_values_videos: Optional[torch.FloatTensor] = None,
+        tabular_values: Optional[torch.Tensor] = None,
+        image_grid_thw: Optional[torch.LongTensor] = None,
+        video_grid_thw: Optional[torch.LongTensor] = None,
+        rope_deltas: Optional[torch.LongTensor] = None,
+        cache_position: Optional[torch.LongTensor] = None,
+        second_per_grid_ts: Optional[torch.Tensor] = None,
+    ):
+        r"""
+            labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
+                Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
+                config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
+                (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.
+        Returns:
+        Example:
+        ```python
+        >>> from PIL import Image
+        >>> import requests
+        >>> from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration
+        >>> model = Qwen2_5_VLForConditionalGeneration.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct")
+        >>> processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct")
+        >>> messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "image"},
+                    {"type": "text", "text": "What is shown in this image?"},
+                ],
+            },
+        ]
+        >>> url = "https://www.ilankelman.org/stopsigns/australia.jpg"
+        >>> image = Image.open(requests.get(url, stream=True).raw)
+        >>> text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+        >>> inputs = processor(text=[text], images=[image], vision_infos=[vision_infos])
+        >>> # Generate
+        >>> generate_ids = model.generate(inputs.input_ids, max_length=30)
+        >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
+        "The image shows a street scene with a red stop sign in the foreground. In the background, there is a large red gate with Chinese characters ..."
+        ```"""
+        output_attentions = (
+            output_attentions
+            if output_attentions is not None
+            else self.config.output_attentions
+        )
+        output_hidden_states = (
+            output_hidden_states
+            if output_hidden_states is not None
+            else self.config.output_hidden_states
+        )
+        return_dict = (
+            return_dict if return_dict is not None else self.config.use_return_dict
+        )
+        if inputs_embeds is None:
+            inputs_embeds = self.language_model.embed_tokens(input_ids)
+            if pixel_values is not None:
+                pixel_values = pixel_values.type(self.visual.dtype)
+                image_embeds = self.visual(pixel_values, grid_thw=image_grid_thw)
+                n_image_tokens = (input_ids == self.config.image_token_id).sum().item()
+                n_image_features = image_embeds.shape[0]
+                if n_image_tokens != n_image_features:
+                    raise ValueError(
+                        f"Image features and image tokens do not match: tokens: {n_image_tokens}, features {n_image_features}"
+                    )
+                mask = input_ids == self.config.image_token_id
+                mask_unsqueezed = mask.unsqueeze(-1)
+                mask_expanded = mask_unsqueezed.expand_as(inputs_embeds)
+                image_mask = mask_expanded.to(inputs_embeds.device)
+                image_embeds = image_embeds.to(
+                    inputs_embeds.device, inputs_embeds.dtype
+                )
+                inputs_embeds = inputs_embeds.masked_scatter(image_mask, image_embeds)
+            if pixel_values_videos is not None:
+                pixel_values_videos = pixel_values_videos.type(self.visual.dtype)
+                video_embeds = self.visual(pixel_values_videos, grid_thw=video_grid_thw)
+                n_video_tokens = (input_ids == self.config.video_token_id).sum().item()
+                n_video_features = video_embeds.shape[0]
+                if n_video_tokens != n_video_features:
+                    raise ValueError(
+                        f"Video features and video tokens do not match: tokens: {n_video_tokens}, features {n_video_features}"
+                    )
+                mask = input_ids == self.config.video_token_id
+                mask_unsqueezed = mask.unsqueeze(-1)
+                mask_expanded = mask_unsqueezed.expand_as(inputs_embeds)
+                video_mask = mask_expanded.to(inputs_embeds.device)
+                video_embeds = video_embeds.to(
+                    inputs_embeds.device, inputs_embeds.dtype
+                )
+                inputs_embeds = inputs_embeds.masked_scatter(video_mask, video_embeds)
+            if tabular_values is not None:
+                proc_feats = self.tabular_processor(tabular_values.to(self.device, torch.float32))
+                proc_feats = proc_feats.to(inputs_embeds.dtype).to(self.device)
+                tabular_embeds = self.tabular_projection(proc_feats)
+                tabular_token_id = getattr(self.config, "tabular_token_id", None)
+                if tabular_token_id is None:
+                    raise ValueError("Tabular token id (config.tabular_token_id) is not set.")
+                mask = (input_ids == int(tabular_token_id))
+                tabular_no_mask = mask.sum().item()
+                if tabular_no_mask != tabular_embeds.shape[0]:
+                    raise ValueError(
+                        f"Tabular features and tabular tokens do not match: tokens: {tabular_no_mask}, features {tabular_embeds.shape[0]}"
+                    )
+                mask_unsqueezed = mask.unsqueeze(-1)
+                mask_expanded = mask_unsqueezed.expand_as(inputs_embeds)
+                tabular_mask = mask_expanded.to(inputs_embeds.device)
+                tabular_embeds = tabular_embeds.to(
+                    inputs_embeds.device, inputs_embeds.dtype
+                )
+                inputs_embeds = inputs_embeds.masked_scatter(
+                    tabular_mask, tabular_embeds
+                )
+            if attention_mask is not None:
+                attention_mask = attention_mask.to(inputs_embeds.device)
+        # if we get 4D attention mask we cannot calculate rope deltas anymore. TODO @raushan fixme
+        if position_ids is None and (
+            attention_mask is None or attention_mask.ndim == 2
+        ):
+            # calculate RoPE index once per generation in the pre-fill stage only
+            if (
+                (cache_position is not None and cache_position[0] == 0)
+                or self.rope_deltas is None
+                or (past_key_values is None or past_key_values.get_seq_length() == 0)
+            ):
+                position_ids, rope_deltas = self.model.get_rope_index(
+                    input_ids,
+                    image_grid_thw,
+                    video_grid_thw,
+                    second_per_grid_ts,
+                    attention_mask,
+                )
+                self.rope_deltas = rope_deltas
+            # then use the prev pre-calculated rope-deltas to get the correct position ids
+            else:
+                batch_size, seq_length, _ = inputs_embeds.shape
+                delta = (
+                    (cache_position[0] + self.rope_deltas).to(inputs_embeds.device)
+                    if cache_position is not None
+                    else 0
+                )
+                position_ids = torch.arange(seq_length, device=inputs_embeds.device)
+                position_ids = position_ids.view(1, -1).expand(batch_size, -1)
+                if cache_position is not None:  # otherwise `deltas` is an int `0`
+                    delta = delta.repeat_interleave(batch_size // delta.shape[0], dim=0)
+                position_ids = position_ids.add(delta)
+                position_ids = position_ids.unsqueeze(0).expand(3, -1, -1)
+        outputs = self.model(
+            input_ids=None,
+            position_ids=position_ids,
+            attention_mask=attention_mask,
+            past_key_values=past_key_values,
+            inputs_embeds=inputs_embeds,
+            use_cache=use_cache,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+            cache_position=cache_position,
+        )
+        hidden_states = outputs[0]
+        logits = self.lm_head(hidden_states)
+        loss = None
+        if labels is not None:
+            # Upcast to float if we need to compute the loss to avoid potential precision issues
+            logits = logits.float()
+            # Shift so that tokens < n predict n
+            shift_logits = logits[..., :-1, :].contiguous()
+            shift_labels = labels[..., 1:].contiguous()
+            # Flatten the tokens
+            loss_fct = CrossEntropyLoss()
+            shift_logits = shift_logits.view(-1, self.config.vocab_size)
+            shift_labels = shift_labels.view(-1)
+            # Enable model parallelism
+            shift_labels = shift_labels.to(shift_logits.device)
+            loss = loss_fct(shift_logits, shift_labels)
+        if not return_dict:
+            output = (logits,) + outputs[1:]
+            return (loss,) + output if loss is not None else output
+        return Qwen2_5_VLCausalLMOutputWithPast(
+            loss=loss,
+            logits=logits,
+            past_key_values=outputs.past_key_values,
+            hidden_states=outputs.hidden_states,
+            attentions=outputs.attentions,
+            rope_deltas=self.rope_deltas,
+        )
+    def prepare_inputs_for_generation(
+        self,
+        input_ids,
+        past_key_values=None,
+        attention_mask=None,
+        inputs_embeds=None,
+        cache_position=None,
+        position_ids=None,
+        use_cache=True,
+        pixel_values=None,
+        pixel_values_videos=None,
+        image_grid_thw=None,
+        video_grid_thw=None,
+        second_per_grid_ts=None,
+        **kwargs,
+    ):
+        # Overwritten -- in specific circumstances we don't want to forward image inputs to the model
+        model_inputs = super().prepare_inputs_for_generation(
+            input_ids,
+            past_key_values=past_key_values,
+            attention_mask=attention_mask,
+            inputs_embeds=inputs_embeds,
+            cache_position=cache_position,
+            position_ids=position_ids,
+            pixel_values=pixel_values,
+            pixel_values_videos=pixel_values_videos,
+            image_grid_thw=image_grid_thw,
+            video_grid_thw=video_grid_thw,
+            second_per_grid_ts=second_per_grid_ts,
+            use_cache=use_cache,
+            **kwargs,
+        )
+        # Qwen2-5-VL position_ids are prepareed with rope_deltas in forward
+        model_inputs["position_ids"] = None
+        if cache_position[0] != 0:
+            model_inputs["pixel_values"] = None
+            model_inputs["pixel_values_videos"] = None
+            model_inputs["tabular_values"] = None
+        return model_inputs
+if __name__ == "__main__":
+    template = """"{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% set tabular_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif content['type'] == 'tabular' or 'tabular' in content %}{% set tabular_count.value = tabular_count.value + 1 %}{% if add_vision_id %}Table {{ tabular_count.value }}: {% endif %}<|vision_start|><|tabular_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"""
+    MODE = "reconstruction_variable"
+    model_name_trained = f"./models/Tabular-LM-v0.1-{MODE}"
+    # model_name_trained = "Qwen/Qwen2.5-VL-3B-Instruct"
+    # model_name_trained = "./models/checkpoints/checkpoint-1000"
+    tabular_processor = TabularPreprocessor()
+    qwen_tabular_processor = Qwen_2_5_TabularProcessor(
+        tabular_processor=tabular_processor,
+        tokenizer=Qwen2TokenizerFast.from_pretrained(model_name_trained),
+    )
+    qwen_tabular_processor.tabular_token = "<|tabular_pad|>"
+    qwen_tabular_processor.tokenizer.add_tokens([qwen_tabular_processor.tabular_token, "<|tabular_row|>"])
+    qwen_tabular_processor.tokenizer.chat_template = template
+    tabular_data = np.random.randn(4,6).round(2)
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "This is a table."},
+                {"index": 0, "type": "tabular"},
+                {"type": "text", "text": "Give me its content in csv format."},
+                # {"type": "text", "text": "Give me a statistical summary."},
+                # {"type": "text", "text": "Give me the correlation matrix in csv format"},
+                # {"type": "text", "text": "Give me the content of the table"},
+            ],
+        }
+    ]
+    preprocessed = qwen_tabular_processor.tokenizer.apply_chat_template(
+        messages, tokenize=False
+    )
+    processed = qwen_tabular_processor(
+        [tabular_data], text=preprocessed, return_tensors="pt"
+    )
+    model = Qwen2_5_TabularModel.from_pretrained(model_name_trained).to("cuda:1")
+    model.config.tabular_token_id = (
+        qwen_tabular_processor.tokenizer.convert_tokens_to_ids("<|tabular_pad|>")
+    )
+    model.config.tabular_row_token_id = (
+        qwen_tabular_processor.tokenizer.convert_tokens_to_ids("<|tabular_row|>")
+    )
+    processed = {key: value.to("cuda:1") for key, value in processed.items()}
+    res = model.generate(**processed, max_new_tokens=512, do_sample=False)
+    generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(processed["input_ids"], res, strict=True)]
+    output_text = qwen_tabular_processor.batch_decode(generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True)
+    print("="*80)
+    print("Original table:")
+    print(tabular_data)
+    print("\nModel output:")
+    print(output_text[0])
+    print("="*80)
+    if MODE in ["reconstruction", "reconstruction_variable"]:
+        # Try to evaluate reconstruction quality
+        from utils import text_to_array
+        generated_array = text_to_array(output_text[0])
+        # Round original to match expected precision
+        tabular_data_rounded = tabular_data.round(1)
+        print("\nReconstruction evaluation:")
+        print(f"Original shape: {tabular_data_rounded.shape}")
+        print(f"Generated shape: {generated_array.shape}")
+        if generated_array.shape == tabular_data_rounded.shape:
+            mse = np.mean((generated_array - tabular_data_rounded) ** 2)
+            mae = np.mean(np.abs(generated_array - tabular_data_rounded))
+            print(f"MSE: {mse:.4f}")
+            print(f"MAE: {mae:.4f}")
+        else:
+            print(f"Shape mismatch - cannot compute metrics")
+    if MODE == "summary":
+        summary_parts = []
+        # Podstawowe statystyki
+        summary_parts.append(f"Mean: {tabular_data.mean():.2f}")
+        summary_parts.append(f"Median: {np.median(tabular_data):.2f}")
+        summary_parts.append(f"Std: {tabular_data.std():.2f}")
+        summary_parts.append(f"Min: {tabular_data.min():.2f}")
+        summary_parts.append(f"Max: {tabular_data.max():.2f}")
+        # Średnie po wierszach
+        row_means = tabular_data.mean(axis=1)
+        row_means_str = ", ".join([f"{m:.2f}" for m in row_means])
+        summary_parts.append(f"Row means: [{row_means_str}]")
+        # Średnie po kolumnach
+        col_means = tabular_data.mean(axis=0)
+        col_means_str = ", ".join([f"{m:.2f}" for m in col_means])
+        summary_parts.append(f"Column means: [{col_means_str}]")
+        # Macierz korelacji (jeśli mamy więcej niż 1 kolumnę)
+        if tabular_data.shape[1] > 1:
+            try:
+                corrcoef = np.corrcoef(tabular_data.T)
+                corr_str = "Correlation matrix:\n"
+                for i in range(corrcoef.shape[0]):
+                    corr_row = ", ".join([f"{corrcoef[i, j]:.2f}" for j in range(corrcoef.shape[1])])
+                    corr_str += f"  [{corr_row}]\n"
+                summary_parts.append(corr_str.strip())
+            except:
+                pass
+        summary_text = "\n".join(summary_parts)
+        print("True summary:")
+        print(summary_text)

added_tokens.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "</tool_call>": 151658,
+  "<tool_call>": 151657,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,7 @@

+{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system
+You are a helpful assistant.<|im_end|>
+{% endif %}<|im_start|>{{ message['role'] }}
+{% if message['content'] is string %}{{ message['content'] }}<|im_end|>
+{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>
+{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant
+{% endif %}

config.json ADDED Viewed

	@@ -0,0 +1,134 @@

+{
+  "architectures": [
+    "Qwen2_5_VLForConditionalGeneration"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "dtype": "float32",
+  "eos_token_id": 151645,
+  "hidden_act": "silu",
+  "hidden_size": 3584,
+  "image_token_id": 151655,
+  "initializer_range": 0.02,
+  "intermediate_size": 18944,
+  "max_position_embeddings": 128000,
+  "max_window_layers": 28,
+  "model_type": "qwen2_5_vl",
+  "num_attention_heads": 28,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 4,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": {
+    "mrope_section": [
+      16,
+      24,
+      24
+    ],
+    "rope_type": "default",
+    "type": "default"
+  },
+  "rope_theta": 1000000.0,
+  "sliding_window": 32768,
+  "tabular_row_token_id": 151666,
+  "tabular_token_id": 151665,
+  "text_config": {
+    "_name_or_path": "Qwen/Qwen2.5-VL-7B-Instruct",
+    "architectures": [
+      "Qwen2_5_TabularModel"
+    ],
+    "attention_dropout": 0.0,
+    "dtype": "float32",
+    "eos_token_id": 151645,
+    "hidden_act": "silu",
+    "hidden_size": 3584,
+    "initializer_range": 0.02,
+    "intermediate_size": 18944,
+    "layer_types": [
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention"
+    ],
+    "max_position_embeddings": 128000,
+    "max_window_layers": 28,
+    "model_type": "qwen2_5_vl_text",
+    "num_attention_heads": 28,
+    "num_hidden_layers": 28,
+    "num_key_value_heads": 4,
+    "pad_token_id": 151643,
+    "rms_norm_eps": 1e-06,
+    "rope_scaling": {
+      "mrope_section": [
+        16,
+        24,
+        24
+      ],
+      "rope_type": "default",
+      "type": "default"
+    },
+    "rope_theta": 1000000.0,
+    "sliding_window": null,
+    "use_cache": false,
+    "use_sliding_window": false,
+    "vision_token_id": 151654,
+    "vocab_size": 152064
+  },
+  "tie_word_embeddings": false,
+  "transformers_version": "4.57.1",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "video_token_id": 151656,
+  "vision_config": {
+    "depth": 32,
+    "dtype": "float32",
+    "fullatt_block_indexes": [
+      7,
+      15,
+      23,
+      31
+    ],
+    "hidden_act": "silu",
+    "hidden_size": 1280,
+    "in_channels": 3,
+    "in_chans": 3,
+    "initializer_range": 0.02,
+    "intermediate_size": 3420,
+    "model_type": "qwen2_5_vl",
+    "num_heads": 16,
+    "out_hidden_size": 3584,
+    "patch_size": 14,
+    "spatial_merge_size": 2,
+    "spatial_patch_size": 14,
+    "temporal_patch_size": 2,
+    "tokens_per_second": 2,
+    "window_size": 112
+  },
+  "vision_end_token_id": 151653,
+  "vision_start_token_id": 151652,
+  "vision_token_id": 151654,
+  "vocab_size": 152064
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+  "do_sample": true,
+  "eos_token_id": [
+    151645,
+    151643
+  ],
+  "pad_token_id": 151643,
+  "repetition_penalty": 1.05,
+  "temperature": 1e-06,
+  "transformers_version": "4.57.1"
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model-00001-of-00007.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:765c46bcc1cefe87737ef64b0ba4516f5d4edff19feda16ff05cdcf99f1da101
+size 4952311608

model-00002-of-00007.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5f93970df45d64d405983e24e8d8b8a32d968b07cb5ee343f15bed20334179b4
+size 4984124272

model-00003-of-00007.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f3cc9536831500b8cbc21fbcf45d3fa8a53af99eb4c8c8031fa6efc908803084
+size 4932743936

model-00004-of-00007.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:53d4d96e7d03aa6616f5d5e6f5cca339733f3b8aa3c33362d5a992b42d0bbd74
+size 4998852296

model-00005-of-00007.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f77afdeb3d99e17978c49c2eaf75a53a61fe4f926660eb54fb79676db9499c4d
+size 4984124336

model-00006-of-00007.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:75b0a8e9dba356744aa0df4504fb99ca1a2221373aed91ed727511ea8e4a4e16
+size 4932743992

model-00007-of-00007.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2f682716b0a35062921038bf22590e08ac02559742f3dc21e1f55b89f893c2f5
+size 3695682720

model.safetensors.index.json ADDED Viewed

	@@ -0,0 +1,781 @@

+{
+  "metadata": {
+    "total_parameters": 8370124416,
+    "total_size": 33480497664
+  },
+  "weight_map": {
+    "lm_head.weight": "model-00007-of-00007.safetensors",
+    "model.embed_tokens.weight": "model-00001-of-00007.safetensors",
+    "model.layers.0.input_layernorm.weight": "model-00002-of-00007.safetensors",
+    "model.layers.0.mlp.down_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.0.mlp.gate_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.0.mlp.up_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.0.post_attention_layernorm.weight": "model-00002-of-00007.safetensors",
+    "model.layers.0.self_attn.k_proj.bias": "model-00001-of-00007.safetensors",
+    "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00007.safetensors",
+    "model.layers.0.self_attn.o_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.0.self_attn.q_proj.bias": "model-00001-of-00007.safetensors",
+    "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00007.safetensors",
+    "model.layers.0.self_attn.v_proj.bias": "model-00001-of-00007.safetensors",
+    "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00007.safetensors",
+    "model.layers.1.input_layernorm.weight": "model-00002-of-00007.safetensors",
+    "model.layers.1.mlp.down_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.1.mlp.gate_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.1.mlp.up_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.1.post_attention_layernorm.weight": "model-00002-of-00007.safetensors",
+    "model.layers.1.self_attn.k_proj.bias": "model-00002-of-00007.safetensors",
+    "model.layers.1.self_attn.k_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.1.self_attn.o_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.1.self_attn.q_proj.bias": "model-00002-of-00007.safetensors",
+    "model.layers.1.self_attn.q_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.1.self_attn.v_proj.bias": "model-00002-of-00007.safetensors",
+    "model.layers.1.self_attn.v_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.10.input_layernorm.weight": "model-00004-of-00007.safetensors",
+    "model.layers.10.mlp.down_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.10.mlp.gate_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.10.mlp.up_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.10.post_attention_layernorm.weight": "model-00004-of-00007.safetensors",
+    "model.layers.10.self_attn.k_proj.bias": "model-00003-of-00007.safetensors",
+    "model.layers.10.self_attn.k_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.10.self_attn.o_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.10.self_attn.q_proj.bias": "model-00003-of-00007.safetensors",
+    "model.layers.10.self_attn.q_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.10.self_attn.v_proj.bias": "model-00003-of-00007.safetensors",
+    "model.layers.10.self_attn.v_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.11.input_layernorm.weight": "model-00004-of-00007.safetensors",
+    "model.layers.11.mlp.down_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.11.mlp.gate_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.11.mlp.up_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.11.post_attention_layernorm.weight": "model-00004-of-00007.safetensors",
+    "model.layers.11.self_attn.k_proj.bias": "model-00004-of-00007.safetensors",
+    "model.layers.11.self_attn.k_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.11.self_attn.o_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.11.self_attn.q_proj.bias": "model-00004-of-00007.safetensors",
+    "model.layers.11.self_attn.q_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.11.self_attn.v_proj.bias": "model-00004-of-00007.safetensors",
+    "model.layers.11.self_attn.v_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.12.input_layernorm.weight": "model-00004-of-00007.safetensors",
+    "model.layers.12.mlp.down_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.12.mlp.gate_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.12.mlp.up_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.12.post_attention_layernorm.weight": "model-00004-of-00007.safetensors",
+    "model.layers.12.self_attn.k_proj.bias": "model-00004-of-00007.safetensors",
+    "model.layers.12.self_attn.k_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.12.self_attn.o_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.12.self_attn.q_proj.bias": "model-00004-of-00007.safetensors",
+    "model.layers.12.self_attn.q_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.12.self_attn.v_proj.bias": "model-00004-of-00007.safetensors",
+    "model.layers.12.self_attn.v_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.13.input_layernorm.weight": "model-00004-of-00007.safetensors",
+    "model.layers.13.mlp.down_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.13.mlp.gate_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.13.mlp.up_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.13.post_attention_layernorm.weight": "model-00004-of-00007.safetensors",
+    "model.layers.13.self_attn.k_proj.bias": "model-00004-of-00007.safetensors",
+    "model.layers.13.self_attn.k_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.13.self_attn.o_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.13.self_attn.q_proj.bias": "model-00004-of-00007.safetensors",
+    "model.layers.13.self_attn.q_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.13.self_attn.v_proj.bias": "model-00004-of-00007.safetensors",
+    "model.layers.13.self_attn.v_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.14.input_layernorm.weight": "model-00004-of-00007.safetensors",
+    "model.layers.14.mlp.down_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.14.mlp.gate_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.14.mlp.up_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.14.post_attention_layernorm.weight": "model-00004-of-00007.safetensors",
+    "model.layers.14.self_attn.k_proj.bias": "model-00004-of-00007.safetensors",
+    "model.layers.14.self_attn.k_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.14.self_attn.o_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.14.self_attn.q_proj.bias": "model-00004-of-00007.safetensors",
+    "model.layers.14.self_attn.q_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.14.self_attn.v_proj.bias": "model-00004-of-00007.safetensors",
+    "model.layers.14.self_attn.v_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.15.input_layernorm.weight": "model-00004-of-00007.safetensors",
+    "model.layers.15.mlp.down_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.15.mlp.gate_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.15.mlp.up_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.15.post_attention_layernorm.weight": "model-00004-of-00007.safetensors",
+    "model.layers.15.self_attn.k_proj.bias": "model-00004-of-00007.safetensors",
+    "model.layers.15.self_attn.k_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.15.self_attn.o_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.15.self_attn.q_proj.bias": "model-00004-of-00007.safetensors",
+    "model.layers.15.self_attn.q_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.15.self_attn.v_proj.bias": "model-00004-of-00007.safetensors",
+    "model.layers.15.self_attn.v_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.16.input_layernorm.weight": "model-00005-of-00007.safetensors",
+    "model.layers.16.mlp.down_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.16.mlp.gate_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.16.mlp.up_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.16.post_attention_layernorm.weight": "model-00005-of-00007.safetensors",
+    "model.layers.16.self_attn.k_proj.bias": "model-00004-of-00007.safetensors",
+    "model.layers.16.self_attn.k_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.16.self_attn.o_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.16.self_attn.q_proj.bias": "model-00004-of-00007.safetensors",
+    "model.layers.16.self_attn.q_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.16.self_attn.v_proj.bias": "model-00004-of-00007.safetensors",
+    "model.layers.16.self_attn.v_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.17.input_layernorm.weight": "model-00005-of-00007.safetensors",
+    "model.layers.17.mlp.down_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.17.mlp.gate_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.17.mlp.up_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.17.post_attention_layernorm.weight": "model-00005-of-00007.safetensors",
+    "model.layers.17.self_attn.k_proj.bias": "model-00005-of-00007.safetensors",
+    "model.layers.17.self_attn.k_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.17.self_attn.o_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.17.self_attn.q_proj.bias": "model-00005-of-00007.safetensors",
+    "model.layers.17.self_attn.q_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.17.self_attn.v_proj.bias": "model-00005-of-00007.safetensors",
+    "model.layers.17.self_attn.v_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.18.input_layernorm.weight": "model-00005-of-00007.safetensors",
+    "model.layers.18.mlp.down_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.18.mlp.gate_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.18.mlp.up_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.18.post_attention_layernorm.weight": "model-00005-of-00007.safetensors",
+    "model.layers.18.self_attn.k_proj.bias": "model-00005-of-00007.safetensors",
+    "model.layers.18.self_attn.k_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.18.self_attn.o_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.18.self_attn.q_proj.bias": "model-00005-of-00007.safetensors",
+    "model.layers.18.self_attn.q_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.18.self_attn.v_proj.bias": "model-00005-of-00007.safetensors",
+    "model.layers.18.self_attn.v_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.19.input_layernorm.weight": "model-00005-of-00007.safetensors",
+    "model.layers.19.mlp.down_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.19.mlp.gate_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.19.mlp.up_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.19.post_attention_layernorm.weight": "model-00005-of-00007.safetensors",
+    "model.layers.19.self_attn.k_proj.bias": "model-00005-of-00007.safetensors",
+    "model.layers.19.self_attn.k_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.19.self_attn.o_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.19.self_attn.q_proj.bias": "model-00005-of-00007.safetensors",
+    "model.layers.19.self_attn.q_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.19.self_attn.v_proj.bias": "model-00005-of-00007.safetensors",
+    "model.layers.19.self_attn.v_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.2.input_layernorm.weight": "model-00002-of-00007.safetensors",
+    "model.layers.2.mlp.down_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.2.mlp.gate_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.2.mlp.up_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.2.post_attention_layernorm.weight": "model-00002-of-00007.safetensors",
+    "model.layers.2.self_attn.k_proj.bias": "model-00002-of-00007.safetensors",
+    "model.layers.2.self_attn.k_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.2.self_attn.o_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.2.self_attn.q_proj.bias": "model-00002-of-00007.safetensors",
+    "model.layers.2.self_attn.q_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.2.self_attn.v_proj.bias": "model-00002-of-00007.safetensors",
+    "model.layers.2.self_attn.v_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.20.input_layernorm.weight": "model-00005-of-00007.safetensors",
+    "model.layers.20.mlp.down_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.20.mlp.gate_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.20.mlp.up_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.20.post_attention_layernorm.weight": "model-00005-of-00007.safetensors",
+    "model.layers.20.self_attn.k_proj.bias": "model-00005-of-00007.safetensors",
+    "model.layers.20.self_attn.k_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.20.self_attn.o_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.20.self_attn.q_proj.bias": "model-00005-of-00007.safetensors",
+    "model.layers.20.self_attn.q_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.20.self_attn.v_proj.bias": "model-00005-of-00007.safetensors",
+    "model.layers.20.self_attn.v_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.21.input_layernorm.weight": "model-00006-of-00007.safetensors",
+    "model.layers.21.mlp.down_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.21.mlp.gate_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.21.mlp.up_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.21.post_attention_layernorm.weight": "model-00006-of-00007.safetensors",
+    "model.layers.21.self_attn.k_proj.bias": "model-00005-of-00007.safetensors",
+    "model.layers.21.self_attn.k_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.21.self_attn.o_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.21.self_attn.q_proj.bias": "model-00005-of-00007.safetensors",
+    "model.layers.21.self_attn.q_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.21.self_attn.v_proj.bias": "model-00005-of-00007.safetensors",
+    "model.layers.21.self_attn.v_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.22.input_layernorm.weight": "model-00006-of-00007.safetensors",
+    "model.layers.22.mlp.down_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.22.mlp.gate_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.22.mlp.up_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.22.post_attention_layernorm.weight": "model-00006-of-00007.safetensors",
+    "model.layers.22.self_attn.k_proj.bias": "model-00006-of-00007.safetensors",
+    "model.layers.22.self_attn.k_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.22.self_attn.o_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.22.self_attn.q_proj.bias": "model-00006-of-00007.safetensors",
+    "model.layers.22.self_attn.q_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.22.self_attn.v_proj.bias": "model-00006-of-00007.safetensors",
+    "model.layers.22.self_attn.v_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.23.input_layernorm.weight": "model-00006-of-00007.safetensors",
+    "model.layers.23.mlp.down_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.23.mlp.gate_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.23.mlp.up_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.23.post_attention_layernorm.weight": "model-00006-of-00007.safetensors",
+    "model.layers.23.self_attn.k_proj.bias": "model-00006-of-00007.safetensors",
+    "model.layers.23.self_attn.k_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.23.self_attn.o_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.23.self_attn.q_proj.bias": "model-00006-of-00007.safetensors",
+    "model.layers.23.self_attn.q_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.23.self_attn.v_proj.bias": "model-00006-of-00007.safetensors",
+    "model.layers.23.self_attn.v_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.24.input_layernorm.weight": "model-00006-of-00007.safetensors",
+    "model.layers.24.mlp.down_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.24.mlp.gate_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.24.mlp.up_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.24.post_attention_layernorm.weight": "model-00006-of-00007.safetensors",
+    "model.layers.24.self_attn.k_proj.bias": "model-00006-of-00007.safetensors",
+    "model.layers.24.self_attn.k_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.24.self_attn.o_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.24.self_attn.q_proj.bias": "model-00006-of-00007.safetensors",
+    "model.layers.24.self_attn.q_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.24.self_attn.v_proj.bias": "model-00006-of-00007.safetensors",
+    "model.layers.24.self_attn.v_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.25.input_layernorm.weight": "model-00006-of-00007.safetensors",
+    "model.layers.25.mlp.down_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.25.mlp.gate_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.25.mlp.up_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.25.post_attention_layernorm.weight": "model-00006-of-00007.safetensors",
+    "model.layers.25.self_attn.k_proj.bias": "model-00006-of-00007.safetensors",
+    "model.layers.25.self_attn.k_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.25.self_attn.o_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.25.self_attn.q_proj.bias": "model-00006-of-00007.safetensors",
+    "model.layers.25.self_attn.q_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.25.self_attn.v_proj.bias": "model-00006-of-00007.safetensors",
+    "model.layers.25.self_attn.v_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.26.input_layernorm.weight": "model-00007-of-00007.safetensors",
+    "model.layers.26.mlp.down_proj.weight": "model-00007-of-00007.safetensors",
+    "model.layers.26.mlp.gate_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.26.mlp.up_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.26.post_attention_layernorm.weight": "model-00007-of-00007.safetensors",
+    "model.layers.26.self_attn.k_proj.bias": "model-00006-of-00007.safetensors",
+    "model.layers.26.self_attn.k_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.26.self_attn.o_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.26.self_attn.q_proj.bias": "model-00006-of-00007.safetensors",
+    "model.layers.26.self_attn.q_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.26.self_attn.v_proj.bias": "model-00006-of-00007.safetensors",
+    "model.layers.26.self_attn.v_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.27.input_layernorm.weight": "model-00007-of-00007.safetensors",
+    "model.layers.27.mlp.down_proj.weight": "model-00007-of-00007.safetensors",
+    "model.layers.27.mlp.gate_proj.weight": "model-00007-of-00007.safetensors",
+    "model.layers.27.mlp.up_proj.weight": "model-00007-of-00007.safetensors",
+    "model.layers.27.post_attention_layernorm.weight": "model-00007-of-00007.safetensors",
+    "model.layers.27.self_attn.k_proj.bias": "model-00007-of-00007.safetensors",
+    "model.layers.27.self_attn.k_proj.weight": "model-00007-of-00007.safetensors",
+    "model.layers.27.self_attn.o_proj.weight": "model-00007-of-00007.safetensors",
+    "model.layers.27.self_attn.q_proj.bias": "model-00007-of-00007.safetensors",
+    "model.layers.27.self_attn.q_proj.weight": "model-00007-of-00007.safetensors",
+    "model.layers.27.self_attn.v_proj.bias": "model-00007-of-00007.safetensors",
+    "model.layers.27.self_attn.v_proj.weight": "model-00007-of-00007.safetensors",
+    "model.layers.3.input_layernorm.weight": "model-00002-of-00007.safetensors",
+    "model.layers.3.mlp.down_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.3.mlp.gate_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.3.mlp.up_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.3.post_attention_layernorm.weight": "model-00002-of-00007.safetensors",
+    "model.layers.3.self_attn.k_proj.bias": "model-00002-of-00007.safetensors",
+    "model.layers.3.self_attn.k_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.3.self_attn.o_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.3.self_attn.q_proj.bias": "model-00002-of-00007.safetensors",
+    "model.layers.3.self_attn.q_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.3.self_attn.v_proj.bias": "model-00002-of-00007.safetensors",
+    "model.layers.3.self_attn.v_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.4.input_layernorm.weight": "model-00002-of-00007.safetensors",
+    "model.layers.4.mlp.down_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.4.mlp.gate_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.4.mlp.up_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.4.post_attention_layernorm.weight": "model-00002-of-00007.safetensors",
+    "model.layers.4.self_attn.k_proj.bias": "model-00002-of-00007.safetensors",
+    "model.layers.4.self_attn.k_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.4.self_attn.o_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.4.self_attn.q_proj.bias": "model-00002-of-00007.safetensors",
+    "model.layers.4.self_attn.q_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.4.self_attn.v_proj.bias": "model-00002-of-00007.safetensors",
+    "model.layers.4.self_attn.v_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.5.input_layernorm.weight": "model-00003-of-00007.safetensors",
+    "model.layers.5.mlp.down_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.5.mlp.gate_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.5.mlp.up_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.5.post_attention_layernorm.weight": "model-00003-of-00007.safetensors",
+    "model.layers.5.self_attn.k_proj.bias": "model-00002-of-00007.safetensors",
+    "model.layers.5.self_attn.k_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.5.self_attn.o_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.5.self_attn.q_proj.bias": "model-00002-of-00007.safetensors",
+    "model.layers.5.self_attn.q_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.5.self_attn.v_proj.bias": "model-00002-of-00007.safetensors",
+    "model.layers.5.self_attn.v_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.6.input_layernorm.weight": "model-00003-of-00007.safetensors",
+    "model.layers.6.mlp.down_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.6.mlp.gate_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.6.mlp.up_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.6.post_attention_layernorm.weight": "model-00003-of-00007.safetensors",
+    "model.layers.6.self_attn.k_proj.bias": "model-00003-of-00007.safetensors",
+    "model.layers.6.self_attn.k_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.6.self_attn.o_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.6.self_attn.q_proj.bias": "model-00003-of-00007.safetensors",
+    "model.layers.6.self_attn.q_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.6.self_attn.v_proj.bias": "model-00003-of-00007.safetensors",
+    "model.layers.6.self_attn.v_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.7.input_layernorm.weight": "model-00003-of-00007.safetensors",
+    "model.layers.7.mlp.down_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.7.mlp.gate_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.7.mlp.up_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.7.post_attention_layernorm.weight": "model-00003-of-00007.safetensors",
+    "model.layers.7.self_attn.k_proj.bias": "model-00003-of-00007.safetensors",
+    "model.layers.7.self_attn.k_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.7.self_attn.o_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.7.self_attn.q_proj.bias": "model-00003-of-00007.safetensors",
+    "model.layers.7.self_attn.q_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.7.self_attn.v_proj.bias": "model-00003-of-00007.safetensors",
+    "model.layers.7.self_attn.v_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.8.input_layernorm.weight": "model-00003-of-00007.safetensors",
+    "model.layers.8.mlp.down_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.8.mlp.gate_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.8.mlp.up_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.8.post_attention_layernorm.weight": "model-00003-of-00007.safetensors",
+    "model.layers.8.self_attn.k_proj.bias": "model-00003-of-00007.safetensors",
+    "model.layers.8.self_attn.k_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.8.self_attn.o_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.8.self_attn.q_proj.bias": "model-00003-of-00007.safetensors",
+    "model.layers.8.self_attn.q_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.8.self_attn.v_proj.bias": "model-00003-of-00007.safetensors",
+    "model.layers.8.self_attn.v_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.9.input_layernorm.weight": "model-00003-of-00007.safetensors",
+    "model.layers.9.mlp.down_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.9.mlp.gate_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.9.mlp.up_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.9.post_attention_layernorm.weight": "model-00003-of-00007.safetensors",
+    "model.layers.9.self_attn.k_proj.bias": "model-00003-of-00007.safetensors",
+    "model.layers.9.self_attn.k_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.9.self_attn.o_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.9.self_attn.q_proj.bias": "model-00003-of-00007.safetensors",
+    "model.layers.9.self_attn.q_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.9.self_attn.v_proj.bias": "model-00003-of-00007.safetensors",
+    "model.layers.9.self_attn.v_proj.weight": "model-00003-of-00007.safetensors",
+    "model.norm.weight": "model-00007-of-00007.safetensors",
+    "tabular_processor.input_proj.bias": "model-00007-of-00007.safetensors",
+    "tabular_processor.input_proj.weight": "model-00007-of-00007.safetensors",
+    "tabular_processor.nodes.1.linear1.bias": "model-00007-of-00007.safetensors",
+    "tabular_processor.nodes.1.linear1.weight": "model-00007-of-00007.safetensors",
+    "tabular_processor.nodes.1.linear2.bias": "model-00007-of-00007.safetensors",
+    "tabular_processor.nodes.1.linear2.weight": "model-00007-of-00007.safetensors",
+    "tabular_processor.nodes.11.linear1.bias": "model-00007-of-00007.safetensors",
+    "tabular_processor.nodes.11.linear1.weight": "model-00007-of-00007.safetensors",
+    "tabular_processor.nodes.11.linear2.bias": "model-00007-of-00007.safetensors",
+    "tabular_processor.nodes.11.linear2.weight": "model-00007-of-00007.safetensors",
+    "tabular_processor.nodes.13.linear1.bias": "model-00007-of-00007.safetensors",
+    "tabular_processor.nodes.13.linear1.weight": "model-00007-of-00007.safetensors",
+    "tabular_processor.nodes.13.linear2.bias": "model-00007-of-00007.safetensors",
+    "tabular_processor.nodes.13.linear2.weight": "model-00007-of-00007.safetensors",
+    "tabular_processor.nodes.3.linear1.bias": "model-00007-of-00007.safetensors",
+    "tabular_processor.nodes.3.linear1.weight": "model-00007-of-00007.safetensors",
+    "tabular_processor.nodes.3.linear2.bias": "model-00007-of-00007.safetensors",
+    "tabular_processor.nodes.3.linear2.weight": "model-00007-of-00007.safetensors",
+    "tabular_processor.nodes.5.linear1.bias": "model-00007-of-00007.safetensors",
+    "tabular_processor.nodes.5.linear1.weight": "model-00007-of-00007.safetensors",
+    "tabular_processor.nodes.5.linear2.bias": "model-00007-of-00007.safetensors",
+    "tabular_processor.nodes.5.linear2.weight": "model-00007-of-00007.safetensors",
+    "tabular_processor.nodes.7.linear1.bias": "model-00007-of-00007.safetensors",
+    "tabular_processor.nodes.7.linear1.weight": "model-00007-of-00007.safetensors",
+    "tabular_processor.nodes.7.linear2.bias": "model-00007-of-00007.safetensors",
+    "tabular_processor.nodes.7.linear2.weight": "model-00007-of-00007.safetensors",
+    "tabular_processor.nodes.9.linear1.bias": "model-00007-of-00007.safetensors",
+    "tabular_processor.nodes.9.linear1.weight": "model-00007-of-00007.safetensors",
+    "tabular_processor.nodes.9.linear2.bias": "model-00007-of-00007.safetensors",
+    "tabular_processor.nodes.9.linear2.weight": "model-00007-of-00007.safetensors",
+    "tabular_projection.0.bias": "model-00007-of-00007.safetensors",
+    "tabular_projection.0.weight": "model-00007-of-00007.safetensors",
+    "tabular_projection.2.linear1.bias": "model-00007-of-00007.safetensors",
+    "tabular_projection.2.linear1.weight": "model-00007-of-00007.safetensors",
+    "tabular_projection.2.linear2.bias": "model-00007-of-00007.safetensors",
+    "tabular_projection.2.linear2.weight": "model-00007-of-00007.safetensors",
+    "tabular_projection.4.linear1.bias": "model-00007-of-00007.safetensors",
+    "tabular_projection.4.linear1.weight": "model-00007-of-00007.safetensors",
+    "tabular_projection.4.linear2.bias": "model-00007-of-00007.safetensors",
+    "tabular_projection.4.linear2.weight": "model-00007-of-00007.safetensors",
+    "tabular_projection.6.linear1.bias": "model-00007-of-00007.safetensors",
+    "tabular_projection.6.linear1.weight": "model-00007-of-00007.safetensors",
+    "tabular_projection.6.linear2.bias": "model-00007-of-00007.safetensors",
+    "tabular_projection.6.linear2.weight": "model-00007-of-00007.safetensors",
+    "visual.blocks.0.attn.proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.0.attn.proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.0.attn.qkv.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.0.attn.qkv.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.0.mlp.down_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.0.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.0.mlp.gate_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.0.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.0.mlp.up_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.0.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.0.norm1.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.0.norm2.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.1.attn.proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.1.attn.proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.1.attn.qkv.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.1.attn.qkv.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.1.mlp.down_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.1.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.1.mlp.gate_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.1.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.1.mlp.up_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.1.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.1.norm1.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.1.norm2.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.10.attn.proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.10.attn.proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.10.attn.qkv.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.10.attn.qkv.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.10.mlp.down_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.10.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.10.mlp.gate_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.10.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.10.mlp.up_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.10.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.10.norm1.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.10.norm2.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.11.attn.proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.11.attn.proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.11.attn.qkv.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.11.attn.qkv.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.11.mlp.down_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.11.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.11.mlp.gate_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.11.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.11.mlp.up_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.11.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.11.norm1.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.11.norm2.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.12.attn.proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.12.attn.proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.12.attn.qkv.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.12.attn.qkv.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.12.mlp.down_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.12.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.12.mlp.gate_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.12.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.12.mlp.up_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.12.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.12.norm1.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.12.norm2.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.13.attn.proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.13.attn.proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.13.attn.qkv.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.13.attn.qkv.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.13.mlp.down_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.13.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.13.mlp.gate_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.13.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.13.mlp.up_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.13.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.13.norm1.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.13.norm2.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.14.attn.proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.14.attn.proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.14.attn.qkv.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.14.attn.qkv.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.14.mlp.down_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.14.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.14.mlp.gate_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.14.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.14.mlp.up_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.14.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.14.norm1.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.14.norm2.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.15.attn.proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.15.attn.proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.15.attn.qkv.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.15.attn.qkv.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.15.mlp.down_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.15.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.15.mlp.gate_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.15.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.15.mlp.up_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.15.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.15.norm1.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.15.norm2.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.16.attn.proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.16.attn.proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.16.attn.qkv.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.16.attn.qkv.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.16.mlp.down_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.16.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.16.mlp.gate_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.16.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.16.mlp.up_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.16.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.16.norm1.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.16.norm2.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.17.attn.proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.17.attn.proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.17.attn.qkv.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.17.attn.qkv.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.17.mlp.down_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.17.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.17.mlp.gate_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.17.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.17.mlp.up_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.17.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.17.norm1.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.17.norm2.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.18.attn.proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.18.attn.proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.18.attn.qkv.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.18.attn.qkv.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.18.mlp.down_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.18.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.18.mlp.gate_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.18.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.18.mlp.up_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.18.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.18.norm1.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.18.norm2.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.19.attn.proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.19.attn.proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.19.attn.qkv.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.19.attn.qkv.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.19.mlp.down_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.19.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.19.mlp.gate_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.19.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.19.mlp.up_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.19.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.19.norm1.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.19.norm2.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.2.attn.proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.2.attn.proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.2.attn.qkv.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.2.attn.qkv.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.2.mlp.down_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.2.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.2.mlp.gate_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.2.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.2.mlp.up_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.2.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.2.norm1.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.2.norm2.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.20.attn.proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.20.attn.proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.20.attn.qkv.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.20.attn.qkv.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.20.mlp.down_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.20.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.20.mlp.gate_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.20.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.20.mlp.up_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.20.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.20.norm1.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.20.norm2.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.21.attn.proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.21.attn.proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.21.attn.qkv.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.21.attn.qkv.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.21.mlp.down_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.21.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.21.mlp.gate_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.21.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.21.mlp.up_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.21.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.21.norm1.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.21.norm2.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.22.attn.proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.22.attn.proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.22.attn.qkv.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.22.attn.qkv.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.22.mlp.down_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.22.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.22.mlp.gate_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.22.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.22.mlp.up_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.22.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.22.norm1.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.22.norm2.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.23.attn.proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.23.attn.proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.23.attn.qkv.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.23.attn.qkv.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.23.mlp.down_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.23.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.23.mlp.gate_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.23.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.23.mlp.up_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.23.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.23.norm1.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.23.norm2.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.24.attn.proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.24.attn.proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.24.attn.qkv.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.24.attn.qkv.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.24.mlp.down_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.24.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.24.mlp.gate_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.24.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.24.mlp.up_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.24.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.24.norm1.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.24.norm2.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.25.attn.proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.25.attn.proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.25.attn.qkv.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.25.attn.qkv.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.25.mlp.down_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.25.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.25.mlp.gate_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.25.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.25.mlp.up_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.25.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.25.norm1.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.25.norm2.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.26.attn.proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.26.attn.proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.26.attn.qkv.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.26.attn.qkv.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.26.mlp.down_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.26.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.26.mlp.gate_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.26.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.26.mlp.up_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.26.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.26.norm1.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.26.norm2.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.27.attn.proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.27.attn.proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.27.attn.qkv.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.27.attn.qkv.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.27.mlp.down_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.27.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.27.mlp.gate_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.27.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.27.mlp.up_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.27.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.27.norm1.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.27.norm2.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.28.attn.proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.28.attn.proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.28.attn.qkv.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.28.attn.qkv.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.28.mlp.down_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.28.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.28.mlp.gate_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.28.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.28.mlp.up_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.28.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.28.norm1.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.28.norm2.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.29.attn.proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.29.attn.proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.29.attn.qkv.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.29.attn.qkv.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.29.mlp.down_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.29.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.29.mlp.gate_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.29.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.29.mlp.up_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.29.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.29.norm1.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.29.norm2.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.3.attn.proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.3.attn.proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.3.attn.qkv.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.3.attn.qkv.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.3.mlp.down_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.3.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.3.mlp.gate_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.3.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.3.mlp.up_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.3.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.3.norm1.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.3.norm2.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.30.attn.proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.30.attn.proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.30.attn.qkv.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.30.attn.qkv.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.30.mlp.down_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.30.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.30.mlp.gate_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.30.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.30.mlp.up_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.30.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.30.norm1.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.30.norm2.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.31.attn.proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.31.attn.proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.31.attn.qkv.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.31.attn.qkv.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.31.mlp.down_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.31.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.31.mlp.gate_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.31.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.31.mlp.up_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.31.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.31.norm1.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.31.norm2.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.4.attn.proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.4.attn.proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.4.attn.qkv.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.4.attn.qkv.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.4.mlp.down_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.4.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.4.mlp.gate_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.4.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.4.mlp.up_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.4.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.4.norm1.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.4.norm2.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.5.attn.proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.5.attn.proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.5.attn.qkv.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.5.attn.qkv.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.5.mlp.down_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.5.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.5.mlp.gate_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.5.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.5.mlp.up_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.5.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.5.norm1.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.5.norm2.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.6.attn.proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.6.attn.proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.6.attn.qkv.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.6.attn.qkv.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.6.mlp.down_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.6.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.6.mlp.gate_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.6.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.6.mlp.up_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.6.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.6.norm1.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.6.norm2.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.7.attn.proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.7.attn.proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.7.attn.qkv.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.7.attn.qkv.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.7.mlp.down_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.7.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.7.mlp.gate_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.7.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.7.mlp.up_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.7.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.7.norm1.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.7.norm2.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.8.attn.proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.8.attn.proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.8.attn.qkv.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.8.attn.qkv.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.8.mlp.down_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.8.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.8.mlp.gate_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.8.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.8.mlp.up_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.8.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.8.norm1.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.8.norm2.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.9.attn.proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.9.attn.proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.9.attn.qkv.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.9.attn.qkv.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.9.mlp.down_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.9.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.9.mlp.gate_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.9.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.9.mlp.up_proj.bias": "model-00001-of-00007.safetensors",
+    "visual.blocks.9.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.9.norm1.weight": "model-00001-of-00007.safetensors",
+    "visual.blocks.9.norm2.weight": "model-00001-of-00007.safetensors",
+    "visual.merger.ln_q.weight": "model-00001-of-00007.safetensors",
+    "visual.merger.mlp.0.bias": "model-00001-of-00007.safetensors",
+    "visual.merger.mlp.0.weight": "model-00001-of-00007.safetensors",
+    "visual.merger.mlp.2.bias": "model-00001-of-00007.safetensors",
+    "visual.merger.mlp.2.weight": "model-00001-of-00007.safetensors",
+    "visual.patch_embed.proj.weight": "model-00001-of-00007.safetensors"
+  }
+}

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "crop_size": null,
+  "data_format": "channels_first",
+  "default_to_square": true,
+  "device": null,
+  "disable_grouping": null,
+  "do_center_crop": null,
+  "do_convert_rgb": true,
+  "do_normalize": true,
+  "do_pad": null,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.48145466,
+    0.4578275,
+    0.40821073
+  ],
+  "image_processor_type": "Qwen2VLImageProcessorFast",
+  "image_std": [
+    0.26862954,
+    0.26130258,
+    0.27577711
+  ],
+  "input_data_format": null,
+  "max_pixels": 12845056,
+  "merge_size": 2,
+  "min_pixels": 3136,
+  "pad_size": null,
+  "patch_size": 14,
+  "processor_class": "Qwen2_5_VLProcessor",
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "return_tensors": null,
+  "size": {
+    "longest_edge": 12845056,
+    "shortest_edge": 3136
+  },
+  "temporal_patch_size": 2
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
+size 11421896

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,208 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": null,
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 131072,
+  "pad_token": "<|endoftext|>",
+  "processor_class": "Qwen2_5_VLProcessor",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

utils.py ADDED Viewed

	@@ -0,0 +1,385 @@

+from trl.models.utils import unwrap_model_for_generation
+# %%
+import re
+import openai
+import torch
+from transformers import (
+    GenerationConfig,
+    TrainerCallback,
+    Qwen2TokenizerFast,
+)
+import wandb
+import tqdm
+from accelerate.utils import gather_object
+import pandas as pd
+import io
+import numpy as np
+# Chat template for tabular models
+TABULAR_CHAT_TEMPLATE = """{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% set tabular_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif content['type'] == 'tabular' or 'tabular' in content %}{% set tabular_count.value = tabular_count.value + 1 %}{% if add_vision_id %}Table {{ tabular_count.value }}: {% endif %}<|vision_start|><|tabular_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"""
+def load_model_and_processor(
+    model_path: str,
+    device: str = "cuda:0",
+    torch_dtype=torch.bfloat16,
+) -> tuple:
+    """
+    Load a Qwen2_5_TabularModel and its processor.
+    Args:
+        model_path: Path to the model checkpoint or HuggingFace model name
+        device: Device to load the model on (e.g., "cuda:0", "cuda:1", "cpu")
+        torch_dtype: Torch dtype for the model (default: torch.bfloat16)
+    Returns:
+        tuple: (model, processor) ready to use
+    """
+    from TabularModel import (
+        TabularPreprocessor,
+        Qwen_2_5_TabularProcessor,
+        Qwen2_5_TabularModel,
+    )
+    # Create tabular preprocessor
+    tabular_processor = TabularPreprocessor()
+    # Create Qwen tabular processor
+    qwen_tabular_processor = Qwen_2_5_TabularProcessor(
+        tabular_processor=tabular_processor,
+        tokenizer=Qwen2TokenizerFast.from_pretrained(model_path),
+    )
+    # Add special tokens
+    qwen_tabular_processor.tabular_token = "<|tabular_pad|>"
+    qwen_tabular_processor.tokenizer.add_tokens([
+        qwen_tabular_processor.tabular_token,
+        "<|tabular_row|>",
+        "<|tabular_cell|>"
+    ])
+    qwen_tabular_processor.tokenizer.chat_template = TABULAR_CHAT_TEMPLATE
+    # Load model
+    model = Qwen2_5_TabularModel.from_pretrained(
+        model_path,
+        torch_dtype=torch_dtype,
+    ).to(device)
+    # Set token IDs in config
+    model.config.tabular_token_id = (
+        qwen_tabular_processor.tokenizer.convert_tokens_to_ids("<|tabular_pad|>")
+    )
+    model.config.tabular_row_token_id = (
+        qwen_tabular_processor.tokenizer.convert_tokens_to_ids("<|tabular_row|>")
+    )
+    model.config.tabular_cell_token_id = (
+        qwen_tabular_processor.tokenizer.convert_tokens_to_ids("<|tabular_cell|>")
+    )
+    return model, qwen_tabular_processor
+def get_role_by_idx(convo: list[dict[str, str]], role: str, idx: int) -> str:
+    found = 0
+    for message in convo:
+        if message["role"] == role:
+            if found == idx:
+                return message["content"]
+            found += 1
+    raise ValueError(f"Role {role} not found {idx} times")
+class LLMSampleCB(TrainerCallback):
+    def __init__(
+        self,
+        trainer,
+        test_dataset,
+        num_samples=10,
+        max_new_tokens=256,
+        log_model="checkpoint",
+    ):
+        "A CallBack to log samples a wandb.Table during training"
+        super().__init__()
+        self._log_model = log_model
+        self.trainer = trainer
+        # Get unique tasks from the dataset
+        tasks = set([i["task"] for i in test_dataset])
+        # Get num_samples from each task
+        task_samples = []
+        for task in tasks:
+            task_dataset = [i for i in test_dataset if i["task"] == task][:num_samples]
+            task_samples.extend(task_dataset)
+        # Combine samples from all tasks
+        self.sample_dataset = task_samples
+        self.model, self.tokenizer = trainer.model_wrapped, trainer.tokenizer
+        self.tokenizer.padding_side = "left"
+        self.gen_config = GenerationConfig.from_pretrained(
+            trainer.model.name_or_path, temperature=0.001, max_new_tokens=max_new_tokens
+        )
+        self.idx = 0
+    def generate(self, conversations: list[list[dict[str, str]]]) -> list[str]:
+        accelerator = self.trainer.accelerator
+        # Create original prompts before distribution to use as keys
+        original_prompts = self.tokenizer.apply_chat_template(conversations, tokenize=False)
+        original_prompt_to_idx = {self._normalize_string(prompt): idx for idx, prompt in enumerate(original_prompts)}
+        completions = [None] * len(conversations)  # Pre-allocate result array
+        with accelerator.split_between_processes(conversations) as conversation_subset:
+            model = self.trainer.model_wrapped
+            with unwrap_model_for_generation(model, accelerator) as unwrapped_model:
+                prompts = self.tokenizer.apply_chat_template(conversation_subset, tokenize=False)
+                tokenized_prompts = self.tokenizer(prompts, return_tensors="pt", padding=True).to(model.device)
+                with torch.inference_mode():
+                    print("Generating...")
+                    generations = unwrapped_model.generate(**tokenized_prompts, generation_config=self.gen_config).cpu()
+                    print("Generated!")
+                results = []
+                for prompt_str, prompt_tokens, generation in zip(prompts, tokenized_prompts.input_ids, generations):
+                    # Remove prompt from generation
+                    generation = generation[len(prompt_tokens) :]
+                    completion = self.tokenizer.decode(generation, skip_special_tokens=True)
+                    results.append((prompt_str, completion))
+        # Gather results from all processes
+        all_results = gather_object(results)
+        # Place completions in their original positions
+        for prompt_str, completion in all_results:
+            norm_prompt = self._normalize_string(prompt_str)
+            if norm_prompt in original_prompt_to_idx:
+                idx = original_prompt_to_idx[norm_prompt]
+                completions[idx] = completion
+        return completions
+    def samples_filtering_table(self, examples):
+        "Create a wandb.Table to store the generations"
+        records_table = wandb.Table(columns=["full_prompt", "question", "generation", "real_answer", "points"])
+        max_num = [0]
+        summary = [0]
+        batch_size = 32
+        all_data = []
+        for i in tqdm.trange(0, len(examples), batch_size):
+            batch = examples[i : i + batch_size]
+            batch_data = []
+            # Prepare batch inputs
+            batch_inputs = []
+            for row in batch:
+                row = row["messages"]
+                user = get_role_by_idx(row, "user", 0)
+                real_answer = get_role_by_idx(row, "assistant", 0)
+                # Extract the question from the user prompt
+                question = user.split("Zapytanie brzmi:")[1].strip() if "Zapytanie brzmi:" in user else user
+                prompt = user
+                batch_inputs.append(row[:-1])
+                batch_data.append((prompt, question, real_answer))
+            # Generate all responses in a single pass
+            generations = self.generate(batch_inputs)
+            # Process results
+            if self.trainer.accelerator.is_main_process:
+                for idx, (prompt, question, real_answer) in enumerate(batch_data):
+                    generation = generations[idx]
+                    # Get points for this example
+                    try:
+                        _, points = self.compare_filtering_answer(question, generation, real_answer)
+                        max_num[0] += 1
+                        summary[0] += points
+                    except Exception:
+                        points = 0
+                    records_table.add_data(prompt, question, generation, real_answer, points)
+                    batch_data[idx] = (prompt, question, generation, real_answer)
+            all_data.extend(batch_data)
+        return records_table, summary[0] / max_num[0] if max_num[0] > 0 else 0
+    def compare_filtering_answer(self, question, answer, expected):
+        client = openai.Client()
+        system = "Jesteś sztuczną inteligencją do oceniania odpowiedzi na zadania filtrowania dokumentów prawniczych."
+        user = f"Zapytanie: '{question}'.\nPoprawna odpowiedź: '{expected}'\nOdpowiedź modelu: '{answer}'."
+        user += "\nOceń, czy odpowiedź modelu poprawnie identyfikuje powiązanie i zawiera odpowiednią argumentację, podobnie jak w poprawnej odpowiedzi."  # noqa: E501
+        user += "\nOdpowiedz w formacie 'Argumentacja: (...)\nOcena: 0 lub 1', gdzie 0 to niepoprawna odpowiedź, a 1 to poprawna odpowiedź."  # noqa: E501
+        response = client.chat.completions.create(
+            model="gpt-4o",
+            messages=[
+                {"role": "system", "content": system},
+                {"role": "user", "content": user},
+            ],
+            temperature=0.0,
+            max_tokens=512,
+        )
+        resp = response.choices[0].message.content.rstrip(".").strip()
+        print(resp)
+        try:
+            return resp, int(resp.split(":")[-1].split()[0].strip())
+        except Exception:
+            print("Error: ", resp)
+            # Look for either 0 or 1 in the response
+            score = 1 if "ocena: 1" in resp.lower() else 0
+            return resp, score
+    def on_evaluate(self, *args, **kwargs):
+        "Log the wandb.Table after calling trainer.evaluate"
+        filtering_dataset = [i for i in self.sample_dataset if i["task"] == "filtering"]
+        records_table, recall = self.samples_filtering_table(filtering_dataset)
+        if self.trainer.accelerator.is_main_process:
+            try:
+                wandb.log({"filtering_predictions_" + str(self.idx): records_table})
+                wandb.log({"filtering_recall": recall})
+            except Exception:
+                pass
+        self.idx += 1
+    def compare_answer(self, question, answer, expected):
+        client = openai.Client()
+        system = "Jesteś sztuczną inteligencją do oceniania odpowiedzi na egzaminie. Oceniasz odpowiedzi jako poprawne (1 punkt) lub niepoprawne (0 punktów)."  # noqa: E501
+        user = f"Pytanie: '{question}'.\n Poprawna odpowiedź: '{expected}'\n Odpowiedź użytkownika: '{answer}'."
+        user += "\nCzy odpowiedź użytkownika jest poprawna? Przyznaj 1 punkt za poprawną odpowiedź lub 0 punktów za niepoprawną. Jeżeli poprawna odpowiedź sugeruje że nie da się odpowiedzieć na pytanie, to odpowiedź użytkownika powinna być taka sama. Nie dawaj punktów za chęci. Oceniaj odpowiedź tylko pod kątem poprawności."  # noqa: E501
+        user += "\nPodkreślam: jeżeli poprawna odpowiedź sugeruje że nie da się udzielić odpowiedzi na podstawie źródeł, to odpowiedź użytkownika powinna być taka sama."  # noqa: E501
+        user += (
+            "Odpowiedz w formacie 'Argumentacja: (...)\nOcena: 0 lub 1', gdzie 0 to brak punktów, a 1 to pełna ocena."
+        )
+        response = client.chat.completions.create(
+            model="gpt-4o",
+            messages=[
+                {"role": "system", "content": system},
+                {"role": "user", "content": user},
+            ],
+            temperature=0.0,
+            max_tokens=512,
+        )
+        resp = response.choices[0].message.content.rstrip(".").strip()
+        try:
+            return resp, int(resp.split(":")[-1].split()[0].strip())
+        except Exception:
+            print("Error: ", resp)
+            # Look for either 0 or 1 in the response
+            score = 1 if "1" in re.findall(r"\d+", resp) else 0
+            return resp, score
+    def _normalize_string(self, s):
+        """Normalize string to avoid whitespace/newline comparison issues"""
+        if s is None:
+            return ""
+        # Remove all whitespace and convert to lowercase for more robust matching
+        return re.sub(r'\s+', '', s).lower()
+def text_to_array(text):
+    if '```' not in text:
+        csv_text = text.strip()
+    elif '```csv' not in text:
+        csv_text = text.strip().split("```")[1].strip()
+    else:
+        csv_text = text.strip().split("```csv")[1].split("```")[0]
+    # Parse CSV into a DataFrame
+    df = pd.read_csv(io.StringIO(csv_text), header=None)
+    # Convert DataFrame to numpy array for comparison
+    generated_corr_matrix = df.values
+    return generated_corr_matrix
+def generate_answer(
+    model,
+    processor,
+    table: np.ndarray | torch.Tensor | list,
+    question: str,
+    max_new_tokens: int = 512,
+    do_sample: bool = False,
+    temperature: float | None = None,
+) -> str:
+    """
+    Generate an answer based on a table and a question.
+    Args:
+        model: The Qwen2_5_TabularModel instance
+        processor: The Qwen_2_5_TabularProcessor instance
+        table: The input table as numpy array (including dtype=object for mixed types),
+               torch tensor, or list of lists
+        question: The question to answer about the table
+        max_new_tokens: Maximum number of tokens to generate
+        do_sample: Whether to use sampling
+        temperature: Sampling temperature (if do_sample=True)
+    Returns:
+        Generated answer as a string
+    """
+    # Prepare messages in the expected format
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "Consider this table:"},
+                {"index": 0, "type": "tabular"},
+                {"type": "text", "text": question},
+            ],
+        }
+    ]
+    # Apply chat template
+    preprocessed = processor.tokenizer.apply_chat_template(
+        messages, tokenize=False
+    )
+    # Process inputs
+    processed = processor(
+        [table], text=preprocessed, return_tensors="pt"
+    )
+    # Move to model device
+    device = next(model.parameters()).device
+    processed = {
+        key: value.to(device) if isinstance(value, torch.Tensor) else value
+        for key, value in processed.items()
+    }
+    # Remove tabular_metadata as it's not a model parameter
+    processed.pop('tabular_metadata', None)
+    # Generate
+    gen_kwargs = {
+        "max_new_tokens": max_new_tokens,
+        "do_sample": do_sample,
+    }
+    if temperature is not None:
+        gen_kwargs["temperature"] = temperature
+    with torch.inference_mode():
+        res = model.generate(**processed, **gen_kwargs)
+    # Decode only the generated part (remove input)
+    generated_ids = [
+        output_ids[len(input_ids):]
+        for input_ids, output_ids in zip(processed["input_ids"], res, strict=True)
+    ]
+    output_text = processor.batch_decode(
+        generated_ids,
+        skip_special_tokens=True,
+        clean_up_tokenization_spaces=True
+    )
+    return output_text[0]

video_preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "crop_size": null,
+  "data_format": "channels_first",
+  "default_to_square": true,
+  "device": null,
+  "do_center_crop": null,
+  "do_convert_rgb": true,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "do_sample_frames": false,
+  "fps": null,
+  "image_mean": [
+    0.48145466,
+    0.4578275,
+    0.40821073
+  ],
+  "image_std": [
+    0.26862954,
+    0.26130258,
+    0.27577711
+  ],
+  "input_data_format": null,
+  "max_frames": 768,
+  "max_pixels": 12845056,
+  "merge_size": 2,
+  "min_frames": 4,
+  "min_pixels": 3136,
+  "num_frames": null,
+  "pad_size": null,
+  "patch_size": 14,
+  "processor_class": "Qwen2_5_VLProcessor",
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "return_metadata": false,
+  "size": {
+    "longest_edge": 12845056,
+    "shortest_edge": 3136
+  },
+  "temporal_patch_size": 2,
+  "video_metadata": null,
+  "video_processor_type": "Qwen2VLVideoProcessor"
+}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff