Ole1 commited on
Commit
61365ba
·
verified ·
1 Parent(s): f8d74f8

Upload 13 files

Browse files
Run_gui.py ADDED
@@ -0,0 +1,1523 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import os
3
+ import torch
4
+ from torch import nn
5
+ from transformers import (
6
+ AutoModel,
7
+ AutoProcessor,
8
+ AutoTokenizer,
9
+ PreTrainedTokenizer,
10
+ PreTrainedTokenizerFast,
11
+ AutoModelForCausalLM,
12
+ BitsAndBytesConfig,
13
+ )
14
+ from PIL import Image
15
+ import torchvision.transforms.functional as TVF
16
+ import contextlib
17
+ from typing import Union, List
18
+ from pathlib import Path
19
+ import re
20
+
21
+ from PyQt5.QtWidgets import (
22
+ QApplication,
23
+ QWidget,
24
+ QLabel,
25
+ QPushButton,
26
+ QFileDialog,
27
+ QLineEdit,
28
+ QTextEdit,
29
+ QComboBox,
30
+ QVBoxLayout,
31
+ QHBoxLayout,
32
+ QCheckBox,
33
+ QListWidget,
34
+ QListWidgetItem,
35
+ QMessageBox,
36
+ QSizePolicy,
37
+ QStatusBar,
38
+ QProgressBar,
39
+ QMainWindow,
40
+ )
41
+ from PyQt5.QtGui import QPixmap, QIcon
42
+ from PyQt5.QtCore import Qt, QTimer
43
+
44
+ # --- Constants and Mappings ---
45
+ CLIP_PATH = "google/siglip-so400m-patch14-384"
46
+ CAPTION_TYPE_MAP = {
47
+ "Descriptive": [
48
+ "Write a descriptive caption for this image in a formal tone.",
49
+ "Write a descriptive caption for this image in a formal tone within {word_count} words.",
50
+ "Write a {length} descriptive caption for this image in a formal tone.",
51
+ ],
52
+ "Descriptive (Informal)": [
53
+ "Write a descriptive caption for this image in a casual tone.",
54
+ "Write a descriptive caption for this image in a casual tone within {word_count} words.",
55
+ "Write a {length} descriptive caption for this image in a casual tone.",
56
+ ],
57
+ "Training Prompt": [
58
+ "Write a stable diffusion prompt for this image.",
59
+ "Write a stable diffusion prompt for this image within {word_count} words.",
60
+ "Write a {length} stable diffusion prompt for this image.",
61
+ ],
62
+ "MidJourney": [
63
+ "Write a MidJourney prompt for this image.",
64
+ "Write a MidJourney prompt for this image within {word_count} words.",
65
+ "Write a {length} MidJourney prompt for this image.",
66
+ ],
67
+ "Booru tag list": [
68
+ "Write a list of Booru tags for this image.",
69
+ "Write a list of Booru tags for this image within {word_count} words.",
70
+ "Write a {length} list of Booru tags for this image.",
71
+ ],
72
+ "Booru-like tag list": [
73
+ "Write a list of Booru-like tags for this image.",
74
+ "Write a list of Booru-like tags for this image within {word_count} words.",
75
+ "Write a {length} list of Booru-like tags for this image.",
76
+ ],
77
+ "Art Critic": [
78
+ "Analyze this image like an art critic would with information about its composition, style, symbolism, the use of color, light, any artistic movement it might belong to, etc.",
79
+ "Analyze this image like an art critic would with information about its composition, style, symbolism, the use of color, light, any artistic movement it might belong to, etc. Keep it within {word_count} words.",
80
+ "Analyze this image like an art critic would with information about its composition, style, symbolism, the use of color, light, any artistic movement it might belong to, etc. Keep it {length}.",
81
+ ],
82
+ "Product Listing": [
83
+ "Write a caption for this image as though it were a product listing.",
84
+ "Write a caption for this image as though it were a product listing. Keep it under {word_count} words.",
85
+ "Write a {length} caption for this image as though it were a product listing.",
86
+ ],
87
+ "Social Media Post": [
88
+ "Write a caption for this image as if it were being used for a social media post.",
89
+ "Write a caption for this image as if it were being used for a social media post. Limit the caption to {word_count} words.",
90
+ "Write a {length} caption for this image as if it were being used for a social media post.",
91
+ ],
92
+ }
93
+
94
+ EXTRA_OPTIONS_LIST = [
95
+ "If there is a person/character in the image you must refer to them as {name}.",
96
+ "Do NOT include information about people/characters that cannot be changed (like ethnicity, gender, etc), but do still include changeable attributes (like hair style).",
97
+ "Include information about lighting.",
98
+ "Include information about camera angle.",
99
+ "Include information about whether there is a watermark or not.",
100
+ "Include information about whether there are JPEG artifacts or not.",
101
+ "If it is a photo you MUST include information about what camera was likely used and details such as aperture, shutter speed, ISO, etc.",
102
+ "Do NOT include anything sexual; keep it PG.",
103
+ "Do NOT mention the image's resolution.",
104
+ "You MUST include information about the subjective aesthetic quality of the image from low to very high.",
105
+ "Include information on the image's composition style, such as leading lines, rule of thirds, or symmetry.",
106
+ "Do NOT mention any text that is in the image.",
107
+ "Specify the depth of field and whether the background is in focus or blurred.",
108
+ "If applicable, mention the likely use of artificial or natural lighting sources.",
109
+ "Do NOT use any ambiguous language.",
110
+ "Include whether the image is sfw, suggestive, or nsfw.",
111
+ "ONLY describe the most important elements of the image.",
112
+ ]
113
+
114
+ CAPTION_LENGTH_CHOICES = (
115
+ ["any", "very short", "short", "medium-length", "long", "very long"]
116
+ + [str(i) for i in range(20, 261, 10)]
117
+ )
118
+
119
+ HF_TOKEN = os.environ.get("HF_TOKEN", None)
120
+
121
+ # --- Device and Autocast Setup ---
122
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
123
+ if device.type == "cuda":
124
+ torch_dtype = torch.bfloat16
125
+ else:
126
+ torch_dtype = torch.float32
127
+
128
+ if device.type == "cuda":
129
+ autocast = lambda: torch.amp.autocast(device_type='cuda', dtype=torch_dtype)
130
+ else:
131
+ autocast = contextlib.nullcontext
132
+
133
+ # --- ImageAdapter Class ---
134
+ class ImageAdapter(nn.Module):
135
+ def __init__(
136
+ self,
137
+ input_features: int,
138
+ output_features: int,
139
+ ln1: bool,
140
+ pos_emb: bool,
141
+ num_image_tokens: int,
142
+ deep_extract: bool,
143
+ ):
144
+ super().__init__()
145
+ self.deep_extract = deep_extract
146
+
147
+ if self.deep_extract:
148
+ input_features = input_features * 5
149
+
150
+ self.linear1 = nn.Linear(input_features, output_features)
151
+ self.activation = nn.GELU()
152
+ self.linear2 = nn.Linear(output_features, output_features)
153
+ self.ln1 = nn.Identity() if not ln1 else nn.LayerNorm(input_features)
154
+ self.pos_emb = (
155
+ None if not pos_emb else nn.Parameter(torch.zeros(num_image_tokens, input_features))
156
+ )
157
+
158
+ # Other tokens (<|image_start|>, <|image_end|>, <|eot_id|>)
159
+ self.other_tokens = nn.Embedding(3, output_features)
160
+ self.other_tokens.weight.data.normal_(
161
+ mean=0.0, std=0.02
162
+ )
163
+
164
+ def forward(self, vision_outputs: torch.Tensor):
165
+ if self.deep_extract:
166
+ x = torch.concat(
167
+ (
168
+ vision_outputs[-2],
169
+ vision_outputs[3],
170
+ vision_outputs[7],
171
+ vision_outputs[13],
172
+ vision_outputs[20],
173
+ ),
174
+ dim=-1,
175
+ )
176
+ assert len(x.shape) == 3
177
+ assert x.shape[-1] == vision_outputs[-2].shape[-1] * 5
178
+ else:
179
+ x = vision_outputs[-2]
180
+
181
+ x = self.ln1(x)
182
+
183
+ if self.pos_emb is not None:
184
+ assert x.shape[-2:] == self.pos_emb.shape
185
+ x = x + self.pos_emb
186
+
187
+ x = self.linear1(x)
188
+ x = self.activation(x)
189
+ x = self.linear2(x)
190
+
191
+ other_tokens = self.other_tokens(
192
+ torch.tensor([0, 1], device=self.other_tokens.weight.device).expand(x.shape[0], -1)
193
+ )
194
+ assert other_tokens.shape == (x.shape[0], 2, x.shape[2])
195
+ x = torch.cat((other_tokens[:, 0:1], x, other_tokens[:, 1:2]), dim=1)
196
+
197
+ return x
198
+
199
+ def get_eot_embedding(self):
200
+ return self.other_tokens(torch.tensor([2], device=self.other_tokens.weight.device)).squeeze(0)
201
+
202
+ # --- load_models Function ---
203
+ def load_models(CHECKPOINT_PATH, status_callback=None):
204
+ def update_status(msg):
205
+ if status_callback:
206
+ status_callback(msg)
207
+ print(msg) # Keep console output
208
+
209
+ update_status("Loading CLIP processor...")
210
+ clip_processor = AutoProcessor.from_pretrained(CLIP_PATH)
211
+ update_status("Loading CLIP vision model...")
212
+ clip_model = AutoModel.from_pretrained(CLIP_PATH)
213
+ clip_model = clip_model.vision_model
214
+
215
+ clip_model_path = CHECKPOINT_PATH / "clip_model.pt"
216
+ if not clip_model_path.exists():
217
+ raise FileNotFoundError(f"clip_model.pt not found in {CHECKPOINT_PATH}")
218
+
219
+ update_status("Loading VLM's custom vision weights...")
220
+ checkpoint = torch.load(clip_model_path, map_location="cpu")
221
+ checkpoint = {k.replace("_orig_mod.module.", ""): v for k, v in checkpoint.items()}
222
+ clip_model.load_state_dict(checkpoint)
223
+ del checkpoint
224
+
225
+ clip_model.eval()
226
+ clip_model.requires_grad_(False)
227
+ update_status(f"Moving CLIP to {device}...")
228
+ clip_model.to(device)
229
+
230
+ update_status("Loading tokenizer...")
231
+ tokenizer = AutoTokenizer.from_pretrained(
232
+ CHECKPOINT_PATH / "text_model", use_fast=True
233
+ )
234
+ if not isinstance(tokenizer, (PreTrainedTokenizer, PreTrainedTokenizerFast)):
235
+ raise TypeError(f"Tokenizer is of type {type(tokenizer)}")
236
+
237
+ special_tokens_dict = {'additional_special_tokens': ['<|system|>', '<|user|>', '<|end|>', '<|eot_id|>']}
238
+ num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
239
+ update_status(f"Added {num_added_toks} special tokens.")
240
+
241
+ update_status("Loading LLM with 4-bit quantization (this may take time)...")
242
+ text_model = AutoModelForCausalLM.from_pretrained(
243
+ CHECKPOINT_PATH / "text_model",
244
+ device_map="auto",
245
+ quantization_config=BitsAndBytesConfig(
246
+ load_in_4bit=True,
247
+ bnb_4bit_use_double_quant=True,
248
+ bnb_4bit_quant_type='nf4',
249
+ bnb_4bit_compute_dtype=torch.float16
250
+ )
251
+ )
252
+ text_model.eval()
253
+
254
+ if num_added_toks > 0:
255
+ update_status("Resizing LLM token embeddings...")
256
+ text_model.resize_token_embeddings(len(tokenizer))
257
+
258
+ update_status("Loading image adapter...")
259
+ image_adapter = ImageAdapter(
260
+ clip_model.config.hidden_size, text_model.config.hidden_size, False, False, 38, False
261
+ )
262
+ image_adapter_path = CHECKPOINT_PATH / "image_adapter.pt"
263
+ if not image_adapter_path.exists():
264
+ raise FileNotFoundError(f"image_adapter.pt not found in {CHECKPOINT_PATH}")
265
+
266
+ image_adapter.load_state_dict(
267
+ torch.load(image_adapter_path, map_location="cpu")
268
+ )
269
+ image_adapter.eval()
270
+ update_status(f"Moving image adapter to {device}...")
271
+ image_adapter.to(device)
272
+
273
+ update_status("Models loaded successfully.")
274
+ return clip_processor, clip_model, tokenizer, text_model, image_adapter
275
+
276
+ # --- generate_caption Function ---
277
+ @torch.no_grad()
278
+ def generate_caption(
279
+ input_image: Image.Image,
280
+ caption_type: str,
281
+ caption_length: Union[str, int],
282
+ extra_options: List[str],
283
+ name_input: str,
284
+ custom_prompt: str,
285
+ clip_model,
286
+ tokenizer,
287
+ text_model,
288
+ image_adapter,
289
+ ) -> tuple:
290
+ if device.type == "cuda":
291
+ torch.cuda.empty_cache()
292
+
293
+ if custom_prompt.strip() != "":
294
+ prompt_str = custom_prompt.strip()
295
+ else:
296
+ length = None if caption_length == "any" else caption_length
297
+ if isinstance(length, str):
298
+ try:
299
+ length = int(length)
300
+ except ValueError:
301
+ pass
302
+
303
+ if length is None: map_idx = 0
304
+ elif isinstance(length, int): map_idx = 1
305
+ elif isinstance(length, str): map_idx = 2
306
+ else: raise ValueError(f"Invalid caption length: {length}")
307
+
308
+ prompt_str = CAPTION_TYPE_MAP[caption_type][map_idx]
309
+ if len(extra_options) > 0: prompt_str += " " + " ".join(extra_options)
310
+ prompt_str = prompt_str.format(name=name_input, length=caption_length, word_count=caption_length)
311
+
312
+ print(f"Prompt: {prompt_str}")
313
+
314
+ try:
315
+ image = input_image.convert("RGB")
316
+ except Exception as e: raise ValueError(f"Error converting image to RGB: {e}")
317
+ if image.mode != "RGB": raise ValueError(f"Image mode after conversion is {image.mode}, expected 'RGB'.")
318
+
319
+ image = image.resize((384, 384), Image.LANCZOS)
320
+ pixel_values = TVF.pil_to_tensor(image).unsqueeze(0) / 255.0
321
+ pixel_values = TVF.normalize(pixel_values, [0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
322
+ pixel_values = pixel_values.to(device)
323
+
324
+ with autocast():
325
+ vision_outputs = clip_model(pixel_values=pixel_values, output_hidden_states=True)
326
+ embedded_images = image_adapter(vision_outputs.hidden_states)
327
+ embedded_images = embedded_images.to(device)
328
+
329
+ convo = [
330
+ {"role": "system", "content": "You are a helpful image captioner."},
331
+ {"role": "user", "content": prompt_str},
332
+ ]
333
+
334
+ if hasattr(tokenizer, "apply_chat_template"):
335
+ convo_string = tokenizer.apply_chat_template(convo, tokenize=False, add_generation_prompt=True)
336
+ else:
337
+ convo_string = ("<|system|>\n" + convo[0]["content"] + "\n<|end|>\n<|user|>\n" + convo[1]["content"] + "\n<|end|>\n")
338
+ assert isinstance(convo_string, str)
339
+
340
+ convo_tokens = tokenizer.encode(convo_string, return_tensors="pt", add_special_tokens=False, truncation=False).to(device)
341
+ prompt_tokens = tokenizer.encode(prompt_str, return_tensors="pt", add_special_tokens=False, truncation=False).to(device)
342
+ assert isinstance(convo_tokens, torch.Tensor) and isinstance(prompt_tokens, torch.Tensor)
343
+ convo_tokens = convo_tokens.squeeze(0)
344
+ prompt_tokens = prompt_tokens.squeeze(0)
345
+
346
+ end_token_id = tokenizer.convert_tokens_to_ids("<|end|>")
347
+ if end_token_id is None: raise ValueError("Tokenizer missing '<|end|>' token.")
348
+ end_token_indices = (convo_tokens == end_token_id).nonzero(as_tuple=True)[0].tolist()
349
+ preamble_len = end_token_indices[0] + 1 if len(end_token_indices) >= 1 else 0
350
+
351
+ convo_embeds = text_model.model.embed_tokens(convo_tokens.unsqueeze(0).to(device))
352
+ input_embeds = torch.cat([
353
+ convo_embeds[:, :preamble_len],
354
+ embedded_images.to(dtype=convo_embeds.dtype),
355
+ convo_embeds[:, preamble_len:],
356
+ ], dim=1).to(device)
357
+
358
+ input_ids = torch.cat([
359
+ convo_tokens[:preamble_len].unsqueeze(0),
360
+ torch.full((1, embedded_images.shape[1]), tokenizer.pad_token_id, dtype=torch.long, device=device),
361
+ convo_tokens[preamble_len:].unsqueeze(0),
362
+ ], dim=1).to(device)
363
+ attention_mask = torch.ones_like(input_ids).to(device)
364
+
365
+ print(f"Input to model: {repr(tokenizer.decode(input_ids[0]))}")
366
+
367
+ generate_ids = text_model.generate(
368
+ input_ids=input_ids, inputs_embeds=input_embeds, attention_mask=attention_mask,
369
+ max_new_tokens=300, do_sample=True, temperature=0.6, top_p=0.9,
370
+ suppress_tokens=None, eos_token_id=[tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids("<|end|>")]
371
+ )
372
+
373
+ generate_ids = generate_ids[:, input_ids.shape[1]:]
374
+ caption = tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True)[0]
375
+ caption = caption.strip()
376
+ caption = re.sub(r'\s+', ' ', caption)
377
+
378
+ return prompt_str, caption
379
+
380
+ class CaptionApp(QMainWindow):
381
+ def __init__(self):
382
+ # ... (constructor unchanged) ...
383
+ super().__init__()
384
+ self.setWindowTitle("JoyCaption Alpha Two - Enhanced")
385
+ self.setGeometry(100, 100, 1200, 850)
386
+ self.setMinimumSize(1000, 750)
387
+
388
+ self.clip_processor = None
389
+ self.clip_model = None
390
+ self.tokenizer = None
391
+ self.text_model = None
392
+ self.image_adapter = None
393
+ self.models_loaded = False
394
+
395
+ self.input_dir = None
396
+ self.single_image_path = None
397
+ self.selected_image_path = None
398
+ self.image_files = []
399
+
400
+ self.dark_mode = False
401
+
402
+ self.central_widget = QWidget()
403
+ self.setCentralWidget(self.central_widget)
404
+ self.main_layout = QHBoxLayout(self.central_widget)
405
+
406
+ self.initUI() # Call initUI
407
+ self.update_button_states()
408
+ self.apply_theme()
409
+
410
+
411
+ def initUI(self):
412
+ # --- Left Panel ---
413
+ left_panel = QVBoxLayout()
414
+ left_panel.setSpacing(10)
415
+
416
+ # Input directory selection
417
+ dir_layout = QHBoxLayout()
418
+ self.input_dir_button = QPushButton("Select Input Directory")
419
+ self.input_dir_button.setToolTip("Select a folder containing images to process in batch.")
420
+ self.input_dir_button.clicked.connect(self.select_input_directory)
421
+ dir_layout.addWidget(self.input_dir_button)
422
+ self.input_dir_label = QLabel("No directory selected")
423
+ self.input_dir_label.setWordWrap(True)
424
+ dir_layout.addWidget(self.input_dir_label, 1)
425
+ left_panel.addLayout(dir_layout)
426
+
427
+ # Single image selection
428
+ single_img_layout = QHBoxLayout()
429
+ self.single_image_button = QPushButton("Select Single Image")
430
+ self.single_image_button.setToolTip("Select one image file to process.")
431
+ self.single_image_button.clicked.connect(self.select_single_image)
432
+ single_img_layout.addWidget(self.single_image_button)
433
+ self.single_image_label = QLabel("No image selected")
434
+ self.single_image_label.setWordWrap(True)
435
+ single_img_layout.addWidget(self.single_image_label, 1)
436
+ left_panel.addLayout(single_img_layout)
437
+
438
+ # Caption Type
439
+ self.caption_type_combo = QComboBox()
440
+ self.caption_type_combo.addItems(CAPTION_TYPE_MAP.keys())
441
+ self.caption_type_combo.setCurrentText("Descriptive")
442
+ self.caption_type_combo.setToolTip("Choose the style or purpose of the caption.")
443
+ left_panel.addWidget(QLabel("Caption Type:"))
444
+ left_panel.addWidget(self.caption_type_combo)
445
+
446
+ # Caption Length
447
+ self.caption_length_combo = QComboBox()
448
+ self.caption_length_combo.addItems(CAPTION_LENGTH_CHOICES)
449
+ self.caption_length_combo.setCurrentText("long")
450
+ self.caption_length_combo.setToolTip("Select desired caption length or word count.")
451
+ left_panel.addWidget(QLabel("Caption Length:"))
452
+ left_panel.addWidget(self.caption_length_combo)
453
+
454
+ # Extra Options
455
+ left_panel.addWidget(QLabel("Extra Options:"))
456
+ self.extra_options_checkboxes = []
457
+ for option in EXTRA_OPTIONS_LIST:
458
+ checkbox = QCheckBox(option)
459
+ checkbox.setToolTip(option)
460
+ self.extra_options_checkboxes.append(checkbox)
461
+ left_panel.addWidget(checkbox)
462
+
463
+ # Name Input
464
+ self.name_input_line = QLineEdit()
465
+ self.name_input_line.setPlaceholderText("e.g., 'the main character'")
466
+ self.name_input_line.setToolTip("If the first extra option is checked, this name will be used.")
467
+ left_panel.addWidget(QLabel("Person/Character Name (optional):"))
468
+ left_panel.addWidget(self.name_input_line)
469
+
470
+ # Custom Prompt
471
+ self.custom_prompt_text = QTextEdit()
472
+ self.custom_prompt_text.setPlaceholderText("Overrides Caption Type/Length/Options if used.")
473
+ self.custom_prompt_text.setToolTip("Enter a full custom prompt here to ignore other settings.")
474
+ self.custom_prompt_text.setFixedHeight(80)
475
+ left_panel.addWidget(QLabel("Custom Prompt (optional):"))
476
+ left_panel.addWidget(self.custom_prompt_text)
477
+
478
+ # Checkpoint Path
479
+ ckpt_layout = QHBoxLayout()
480
+ self.checkpoint_path_line = QLineEdit()
481
+ self.checkpoint_path_line.setToolTip("Path to the folder containing model files (clip_model.pt, etc.).")
482
+ ckpt_layout.addWidget(QLabel("Checkpoint Path:"))
483
+ ckpt_layout.addWidget(self.checkpoint_path_line)
484
+ self.browse_ckpt_button = QPushButton("...")
485
+ self.browse_ckpt_button.setToolTip("Browse for Checkpoint Directory")
486
+ self.browse_ckpt_button.clicked.connect(self.browse_checkpoint_path)
487
+ self.browse_ckpt_button.setMaximumWidth(30)
488
+ ckpt_layout.addWidget(self.browse_ckpt_button)
489
+ left_panel.addLayout(ckpt_layout)
490
+
491
+ # Load Models Button
492
+ self.load_models_button = QPushButton("Load Models")
493
+ self.load_models_button.setToolTip("Load the AI models into memory (requires checkpoint path).")
494
+ self.load_models_button.clicked.connect(self.load_models_action)
495
+ left_panel.addWidget(self.load_models_button)
496
+
497
+
498
+
499
+ # Run Buttons
500
+ self.run_button = QPushButton("Generate Captions for All Images in Directory")
501
+ self.run_button.setToolTip("Process all loaded images from the selected directory.")
502
+ self.run_button.clicked.connect(self.generate_captions_action)
503
+ left_panel.addWidget(self.run_button)
504
+
505
+ self.caption_selected_button = QPushButton("Caption Selected Image from List")
506
+ self.caption_selected_button.setToolTip("Process the image currently highlighted in the list.")
507
+ self.caption_selected_button.clicked.connect(self.caption_selected_image_action)
508
+ left_panel.addWidget(self.caption_selected_button)
509
+
510
+ self.caption_single_button = QPushButton("Caption Single Loaded Image")
511
+ self.caption_single_button.setToolTip("Process the image selected via 'Select Single Image'.")
512
+ self.caption_single_button.clicked.connect(self.caption_single_image_action)
513
+ left_panel.addWidget(self.caption_single_button)
514
+
515
+ # Theme Toggle Button
516
+ self.toggle_theme_button = QPushButton("Toggle Dark Mode")
517
+ self.toggle_theme_button.setToolTip("Switch between light and dark themes.")
518
+ self.toggle_theme_button.clicked.connect(self.toggle_theme)
519
+ left_panel.addWidget(self.toggle_theme_button)
520
+
521
+ left_panel.addStretch(1)
522
+
523
+ # --- Right Panel ---
524
+ right_panel = QVBoxLayout()
525
+ right_panel.setSpacing(10)
526
+
527
+ # List widget for images
528
+ right_panel.addWidget(QLabel("Images in Directory:"))
529
+ self.image_list_widget = QListWidget()
530
+ self.image_list_widget.setIconSize(self.image_list_widget.iconSize() * 2)
531
+ self.image_list_widget.itemClicked.connect(self.display_selected_image)
532
+ self.image_list_widget.setToolTip("Click an image to view it and enable 'Caption Selected Image'.")
533
+ right_panel.addWidget(self.image_list_widget, 1)
534
+
535
+ # Label to display the selected image
536
+ right_panel.addWidget(QLabel("Selected Image Preview:"))
537
+ self.selected_image_label = QLabel("No image selected")
538
+ self.selected_image_label.setAlignment(Qt.AlignCenter)
539
+ self.selected_image_label.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Expanding)
540
+ self.selected_image_label.setMinimumSize(300, 300)
541
+ self.selected_image_label.setStyleSheet("border: 1px solid gray;")
542
+ right_panel.addWidget(self.selected_image_label, 3)
543
+
544
+ # Generated Caption Area
545
+ right_panel.addWidget(QLabel("Generated/Editable Caption:"))
546
+ self.generated_caption_text = QTextEdit()
547
+ self.generated_caption_text.setReadOnly(False)
548
+ self.generated_caption_text.setPlaceholderText("Generated caption will appear here. You can edit it before saving.")
549
+ self.generated_caption_text.setToolTip("The generated caption appears here. Edit and use 'Save Edited Caption'.")
550
+ right_panel.addWidget(self.generated_caption_text, 1)
551
+
552
+
553
+
554
+ self.overwrite_checkbox = QCheckBox("Overwrite existing captions")
555
+ self.overwrite_checkbox.setToolTip("If checked, automatically overwrites existing .txt files without asking.")
556
+ self.append_checkbox = QCheckBox("Append to existing captions")
557
+ self.append_checkbox.setToolTip("If checked, adds the new caption to the end of the existing .txt file.")
558
+
559
+ # Layout for the save options
560
+ save_options_layout = QHBoxLayout()
561
+ save_options_layout.addWidget(self.overwrite_checkbox)
562
+ save_options_layout.addWidget(self.append_checkbox)
563
+ save_options_layout.addStretch(1)
564
+ right_panel.addLayout(save_options_layout)
565
+
566
+
567
+ self.append_checkbox.stateChanged.connect(
568
+ lambda state: self.overwrite_checkbox.setEnabled(state == Qt.Unchecked)
569
+ )
570
+
571
+
572
+ # Save Edited Caption Button
573
+ self.save_caption_button = QPushButton("Save Edited Caption to File")
574
+ self.save_caption_button.setToolTip("Save the text currently in the box above to the corresponding .txt file using the selected options.")
575
+ self.save_caption_button.clicked.connect(self.save_edited_caption_action)
576
+ right_panel.addWidget(self.save_caption_button)
577
+
578
+ # --- Main Layout Assembly
579
+ self.main_layout.addLayout(left_panel, 2)
580
+ self.main_layout.addLayout(right_panel, 5)
581
+
582
+ # --- Status Bar and Progress Bar
583
+ self.status_bar = QStatusBar()
584
+ self.setStatusBar(self.status_bar)
585
+ self.progress_bar = QProgressBar()
586
+ self.status_bar.addPermanentWidget(self.progress_bar)
587
+ self.progress_bar.hide()
588
+ self.show_status("Ready.", 5000)
589
+
590
+
591
+
592
+ import sys
593
+ import os
594
+ import torch
595
+ from torch import nn
596
+ from transformers import (
597
+ AutoModel,
598
+ AutoProcessor,
599
+ AutoTokenizer,
600
+ PreTrainedTokenizer,
601
+ PreTrainedTokenizerFast,
602
+ AutoModelForCausalLM,
603
+ BitsAndBytesConfig,
604
+ )
605
+ from PIL import Image
606
+ import torchvision.transforms.functional as TVF
607
+ import contextlib
608
+ from typing import Union, List
609
+ from pathlib import Path
610
+ import re # Added for spacing fix
611
+
612
+ from PyQt5.QtWidgets import (
613
+ QApplication,
614
+ QWidget,
615
+ QLabel,
616
+ QPushButton,
617
+ QFileDialog,
618
+ QLineEdit,
619
+ QTextEdit,
620
+ QComboBox,
621
+ QVBoxLayout,
622
+ QHBoxLayout,
623
+ QCheckBox,
624
+ QListWidget,
625
+ QListWidgetItem,
626
+ QMessageBox,
627
+ QSizePolicy,
628
+ QStatusBar,
629
+ QProgressBar,
630
+ QMainWindow,
631
+ )
632
+ from PyQt5.QtGui import QPixmap, QIcon
633
+ from PyQt5.QtCore import Qt, QTimer
634
+
635
+ # --- Constants and Mappings ---
636
+ CLIP_PATH = "google/siglip-so400m-patch14-384"
637
+ CAPTION_TYPE_MAP = {
638
+ "Descriptive": [
639
+ "Write a descriptive caption for this image in a formal tone.",
640
+ "Write a descriptive caption for this image in a formal tone within {word_count} words.",
641
+ "Write a {length} descriptive caption for this image in a formal tone.",
642
+ ],
643
+ "Descriptive (Informal)": [
644
+ "Write a descriptive caption for this image in a casual tone.",
645
+ "Write a descriptive caption for this image in a casual tone within {word_count} words.",
646
+ "Write a {length} descriptive caption for this image in a casual tone.",
647
+ ],
648
+ "Training Prompt": [
649
+ "Write a stable diffusion prompt for this image.",
650
+ "Write a stable diffusion prompt for this image within {word_count} words.",
651
+ "Write a {length} stable diffusion prompt for this image.",
652
+ ],
653
+ "MidJourney": [
654
+ "Write a MidJourney prompt for this image.",
655
+ "Write a MidJourney prompt for this image within {word_count} words.",
656
+ "Write a {length} MidJourney prompt for this image.",
657
+ ],
658
+ "Booru tag list": [
659
+ "Write a list of Booru tags for this image.",
660
+ "Write a list of Booru tags for this image within {word_count} words.",
661
+ "Write a {length} list of Booru tags for this image.",
662
+ ],
663
+ "Booru-like tag list": [
664
+ "Write a list of Booru-like tags for this image.",
665
+ "Write a list of Booru-like tags for this image within {word_count} words.",
666
+ "Write a {length} list of Booru-like tags for this image.",
667
+ ],
668
+ "Art Critic": [
669
+ "Analyze this image like an art critic would with information about its composition, style, symbolism, the use of color, light, any artistic movement it might belong to, etc.",
670
+ "Analyze this image like an art critic would with information about its composition, style, symbolism, the use of color, light, any artistic movement it might belong to, etc. Keep it within {word_count} words.",
671
+ "Analyze this image like an art critic would with information about its composition, style, symbolism, the use of color, light, any artistic movement it might belong to, etc. Keep it {length}.",
672
+ ],
673
+ "Product Listing": [
674
+ "Write a caption for this image as though it were a product listing.",
675
+ "Write a caption for this image as though it were a product listing. Keep it under {word_count} words.",
676
+ "Write a {length} caption for this image as though it were a product listing.",
677
+ ],
678
+ "Social Media Post": [
679
+ "Write a caption for this image as if it were being used for a social media post.",
680
+ "Write a caption for this image as if it were being used for a social media post. Limit the caption to {word_count} words.",
681
+ "Write a {length} caption for this image as if it were being used for a social media post.",
682
+ ],
683
+ }
684
+
685
+ EXTRA_OPTIONS_LIST = [
686
+ "If there is a person/character in the image you must refer to them as {name}.",
687
+ "Do NOT include information about people/characters that cannot be changed (like ethnicity, gender, etc), but do still include changeable attributes (like hair style).",
688
+ "Include information about lighting.",
689
+ "Include information about camera angle.",
690
+ "Include information about whether there is a watermark or not.",
691
+ "Include information about whether there are JPEG artifacts or not.",
692
+ "If it is a photo you MUST include information about what camera was likely used and details such as aperture, shutter speed, ISO, etc.",
693
+ "Do NOT include anything sexual; keep it PG.",
694
+ "Do NOT mention the image's resolution.",
695
+ "You MUST include information about the subjective aesthetic quality of the image from low to very high.",
696
+ "Include information on the image's composition style, such as leading lines, rule of thirds, or symmetry.",
697
+ "Do NOT mention any text that is in the image.",
698
+ "Specify the depth of field and whether the background is in focus or blurred.",
699
+ "If applicable, mention the likely use of artificial or natural lighting sources.",
700
+ "Do NOT use any ambiguous language.",
701
+ "Include whether the image is sfw, suggestive, or nsfw.",
702
+ "ONLY describe the most important elements of the image.",
703
+ ]
704
+
705
+ CAPTION_LENGTH_CHOICES = (
706
+ ["any", "very short", "short", "medium-length", "long", "very long"]
707
+ + [str(i) for i in range(20, 261, 10)]
708
+ )
709
+
710
+ HF_TOKEN = os.environ.get("HF_TOKEN", None)
711
+
712
+ # --- Device and Autocast Setup ---
713
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
714
+ if device.type == "cuda":
715
+ torch_dtype = torch.bfloat16
716
+ else:
717
+ torch_dtype = torch.float32
718
+
719
+ if device.type == "cuda":
720
+ autocast = lambda: torch.amp.autocast(device_type='cuda', dtype=torch_dtype)
721
+ else:
722
+ autocast = contextlib.nullcontext
723
+
724
+ # --- ImageAdapter Class ---
725
+ class ImageAdapter(nn.Module):
726
+ def __init__(
727
+ self,
728
+ input_features: int,
729
+ output_features: int,
730
+ ln1: bool,
731
+ pos_emb: bool,
732
+ num_image_tokens: int,
733
+ deep_extract: bool,
734
+ ):
735
+ super().__init__()
736
+ self.deep_extract = deep_extract
737
+
738
+ if self.deep_extract:
739
+ input_features = input_features * 5
740
+
741
+ self.linear1 = nn.Linear(input_features, output_features)
742
+ self.activation = nn.GELU()
743
+ self.linear2 = nn.Linear(output_features, output_features)
744
+ self.ln1 = nn.Identity() if not ln1 else nn.LayerNorm(input_features)
745
+ self.pos_emb = (
746
+ None if not pos_emb else nn.Parameter(torch.zeros(num_image_tokens, input_features))
747
+ )
748
+
749
+ # Other tokens (<|image_start|>, <|image_end|>, <|eot_id|>)
750
+ self.other_tokens = nn.Embedding(3, output_features)
751
+ self.other_tokens.weight.data.normal_(
752
+ mean=0.0, std=0.02
753
+ )
754
+
755
+ def forward(self, vision_outputs: torch.Tensor):
756
+ if self.deep_extract:
757
+ x = torch.concat(
758
+ (
759
+ vision_outputs[-2],
760
+ vision_outputs[3],
761
+ vision_outputs[7],
762
+ vision_outputs[13],
763
+ vision_outputs[20],
764
+ ),
765
+ dim=-1,
766
+ )
767
+ assert len(x.shape) == 3
768
+ assert x.shape[-1] == vision_outputs[-2].shape[-1] * 5
769
+ else:
770
+ x = vision_outputs[-2]
771
+
772
+ x = self.ln1(x)
773
+
774
+ if self.pos_emb is not None:
775
+ assert x.shape[-2:] == self.pos_emb.shape
776
+ x = x + self.pos_emb
777
+
778
+ x = self.linear1(x)
779
+ x = self.activation(x)
780
+ x = self.linear2(x)
781
+
782
+ other_tokens = self.other_tokens(
783
+ torch.tensor([0, 1], device=self.other_tokens.weight.device).expand(x.shape[0], -1)
784
+ )
785
+ assert other_tokens.shape == (x.shape[0], 2, x.shape[2])
786
+ x = torch.cat((other_tokens[:, 0:1], x, other_tokens[:, 1:2]), dim=1)
787
+
788
+ return x
789
+
790
+ def get_eot_embedding(self):
791
+ return self.other_tokens(torch.tensor([2], device=self.other_tokens.weight.device)).squeeze(0)
792
+
793
+ # --- load_models Function ---
794
+ def load_models(CHECKPOINT_PATH, status_callback=None):
795
+ def update_status(msg):
796
+ if status_callback:
797
+ status_callback(msg)
798
+ print(msg) # Keep console output
799
+
800
+ update_status("Loading CLIP processor...")
801
+ clip_processor = AutoProcessor.from_pretrained(CLIP_PATH)
802
+ update_status("Loading CLIP vision model...")
803
+ clip_model = AutoModel.from_pretrained(CLIP_PATH)
804
+ clip_model = clip_model.vision_model
805
+
806
+ clip_model_path = CHECKPOINT_PATH / "clip_model.pt"
807
+ if not clip_model_path.exists():
808
+ raise FileNotFoundError(f"clip_model.pt not found in {CHECKPOINT_PATH}")
809
+
810
+ update_status("Loading VLM's custom vision weights...")
811
+ checkpoint = torch.load(clip_model_path, map_location="cpu")
812
+ checkpoint = {k.replace("_orig_mod.module.", ""): v for k, v in checkpoint.items()}
813
+ clip_model.load_state_dict(checkpoint)
814
+ del checkpoint
815
+
816
+ clip_model.eval()
817
+ clip_model.requires_grad_(False)
818
+ update_status(f"Moving CLIP to {device}...")
819
+ clip_model.to(device)
820
+
821
+ update_status("Loading tokenizer...")
822
+ tokenizer = AutoTokenizer.from_pretrained(
823
+ CHECKPOINT_PATH / "text_model", use_fast=True
824
+ )
825
+ if not isinstance(tokenizer, (PreTrainedTokenizer, PreTrainedTokenizerFast)):
826
+ raise TypeError(f"Tokenizer is of type {type(tokenizer)}")
827
+
828
+ special_tokens_dict = {'additional_special_tokens': ['<|system|>', '<|user|>', '<|end|>', '<|eot_id|>']}
829
+ num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
830
+ update_status(f"Added {num_added_toks} special tokens.")
831
+
832
+ update_status("Loading LLM with 4-bit quantization (this may take time)...")
833
+ text_model = AutoModelForCausalLM.from_pretrained(
834
+ CHECKPOINT_PATH / "text_model",
835
+ device_map="auto",
836
+ quantization_config=BitsAndBytesConfig(
837
+ load_in_4bit=True,
838
+ bnb_4bit_use_double_quant=True,
839
+ bnb_4bit_quant_type='nf4',
840
+ bnb_4bit_compute_dtype=torch.float16
841
+ )
842
+ )
843
+ text_model.eval()
844
+
845
+ if num_added_toks > 0:
846
+ update_status("Resizing LLM token embeddings...")
847
+ text_model.resize_token_embeddings(len(tokenizer))
848
+
849
+ update_status("Loading image adapter...")
850
+ image_adapter = ImageAdapter(
851
+ clip_model.config.hidden_size, text_model.config.hidden_size, False, False, 38, False
852
+ )
853
+ image_adapter_path = CHECKPOINT_PATH / "image_adapter.pt"
854
+ if not image_adapter_path.exists():
855
+ raise FileNotFoundError(f"image_adapter.pt not found in {CHECKPOINT_PATH}")
856
+
857
+ image_adapter.load_state_dict(
858
+ torch.load(image_adapter_path, map_location="cpu")
859
+ )
860
+ image_adapter.eval()
861
+ update_status(f"Moving image adapter to {device}...")
862
+ image_adapter.to(device)
863
+
864
+ update_status("Models loaded successfully.")
865
+ return clip_processor, clip_model, tokenizer, text_model, image_adapter
866
+
867
+ # --- generate_caption Function ---
868
+ @torch.no_grad()
869
+ def generate_caption(
870
+ input_image: Image.Image,
871
+ caption_type: str,
872
+ caption_length: Union[str, int],
873
+ extra_options: List[str],
874
+ name_input: str,
875
+ custom_prompt: str,
876
+ clip_model,
877
+ tokenizer,
878
+ text_model,
879
+ image_adapter,
880
+ ) -> tuple:
881
+ if device.type == "cuda":
882
+ torch.cuda.empty_cache()
883
+
884
+ if custom_prompt.strip() != "":
885
+ prompt_str = custom_prompt.strip()
886
+ else:
887
+ length = None if caption_length == "any" else caption_length
888
+ if isinstance(length, str):
889
+ try:
890
+ length = int(length)
891
+ except ValueError:
892
+ pass
893
+
894
+ if length is None: map_idx = 0
895
+ elif isinstance(length, int): map_idx = 1
896
+ elif isinstance(length, str): map_idx = 2
897
+ else: raise ValueError(f"Invalid caption length: {length}")
898
+
899
+ prompt_str = CAPTION_TYPE_MAP[caption_type][map_idx]
900
+ if len(extra_options) > 0: prompt_str += " " + " ".join(extra_options)
901
+ prompt_str = prompt_str.format(name=name_input, length=caption_length, word_count=caption_length)
902
+
903
+ print(f"Prompt: {prompt_str}")
904
+
905
+ try:
906
+ image = input_image.convert("RGB")
907
+ except Exception as e: raise ValueError(f"Error converting image to RGB: {e}")
908
+ if image.mode != "RGB": raise ValueError(f"Image mode after conversion is {image.mode}, expected 'RGB'.")
909
+
910
+ image = image.resize((384, 384), Image.LANCZOS)
911
+ pixel_values = TVF.pil_to_tensor(image).unsqueeze(0) / 255.0
912
+ pixel_values = TVF.normalize(pixel_values, [0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
913
+ pixel_values = pixel_values.to(device)
914
+
915
+ with autocast():
916
+ vision_outputs = clip_model(pixel_values=pixel_values, output_hidden_states=True)
917
+ embedded_images = image_adapter(vision_outputs.hidden_states)
918
+ embedded_images = embedded_images.to(device)
919
+
920
+ convo = [
921
+ {"role": "system", "content": "You are a helpful image captioner."},
922
+ {"role": "user", "content": prompt_str},
923
+ ]
924
+
925
+ if hasattr(tokenizer, "apply_chat_template"):
926
+ convo_string = tokenizer.apply_chat_template(convo, tokenize=False, add_generation_prompt=True)
927
+ else:
928
+ convo_string = ("<|system|>\n" + convo[0]["content"] + "\n<|end|>\n<|user|>\n" + convo[1]["content"] + "\n<|end|>\n")
929
+ assert isinstance(convo_string, str)
930
+
931
+ convo_tokens = tokenizer.encode(convo_string, return_tensors="pt", add_special_tokens=False, truncation=False).to(device)
932
+ prompt_tokens = tokenizer.encode(prompt_str, return_tensors="pt", add_special_tokens=False, truncation=False).to(device)
933
+ assert isinstance(convo_tokens, torch.Tensor) and isinstance(prompt_tokens, torch.Tensor)
934
+ convo_tokens = convo_tokens.squeeze(0)
935
+ prompt_tokens = prompt_tokens.squeeze(0)
936
+
937
+ end_token_id = tokenizer.convert_tokens_to_ids("<|end|>")
938
+ if end_token_id is None: raise ValueError("Tokenizer missing '<|end|>' token.")
939
+ end_token_indices = (convo_tokens == end_token_id).nonzero(as_tuple=True)[0].tolist()
940
+ preamble_len = end_token_indices[0] + 1 if len(end_token_indices) >= 1 else 0
941
+
942
+ convo_embeds = text_model.model.embed_tokens(convo_tokens.unsqueeze(0).to(device))
943
+ input_embeds = torch.cat([
944
+ convo_embeds[:, :preamble_len],
945
+ embedded_images.to(dtype=convo_embeds.dtype),
946
+ convo_embeds[:, preamble_len:],
947
+ ], dim=1).to(device)
948
+
949
+ input_ids = torch.cat([
950
+ convo_tokens[:preamble_len].unsqueeze(0),
951
+ torch.full((1, embedded_images.shape[1]), tokenizer.pad_token_id, dtype=torch.long, device=device),
952
+ convo_tokens[preamble_len:].unsqueeze(0),
953
+ ], dim=1).to(device)
954
+ attention_mask = torch.ones_like(input_ids).to(device)
955
+
956
+ print(f"Input to model: {repr(tokenizer.decode(input_ids[0]))}")
957
+
958
+ generate_ids = text_model.generate(
959
+ input_ids=input_ids, inputs_embeds=input_embeds, attention_mask=attention_mask,
960
+ max_new_tokens=300, do_sample=True, temperature=0.6, top_p=0.9,
961
+ suppress_tokens=None, eos_token_id=[tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids("<|end|>")]
962
+ )
963
+
964
+ generate_ids = generate_ids[:, input_ids.shape[1]:]
965
+ caption = tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True)[0]
966
+ caption = caption.strip()
967
+ caption = re.sub(r'\s+', ' ', caption)
968
+
969
+ return prompt_str, caption
970
+
971
+ # --- CaptionApp Class ---
972
+ class CaptionApp(QMainWindow):
973
+ def __init__(self):
974
+ super().__init__()
975
+ self.setWindowTitle("JoyCaption Alpha Two - Enhanced")
976
+ self.setGeometry(100, 100, 1200, 850)
977
+ self.setMinimumSize(1000, 750)
978
+
979
+ self.clip_processor = None
980
+ self.clip_model = None
981
+ self.tokenizer = None
982
+ self.text_model = None
983
+ self.image_adapter = None
984
+ self.models_loaded = False
985
+
986
+ self.input_dir = None
987
+ self.single_image_path = None
988
+ self.selected_image_path = None
989
+ self.image_files = []
990
+
991
+ self.dark_mode = False
992
+
993
+ self.central_widget = QWidget()
994
+ self.setCentralWidget(self.central_widget)
995
+ self.main_layout = QHBoxLayout(self.central_widget)
996
+
997
+ self.initUI() # Call initUI
998
+ self.update_button_states()
999
+ self.apply_theme()
1000
+
1001
+ def initUI(self):
1002
+ # --- Left Panel ---
1003
+ left_panel = QVBoxLayout()
1004
+ left_panel.setSpacing(10)
1005
+
1006
+ # Input directory selection
1007
+ dir_layout = QHBoxLayout()
1008
+ self.input_dir_button = QPushButton("Select Input Directory")
1009
+ self.input_dir_button.setToolTip("Select a folder containing images to process in batch.")
1010
+ self.input_dir_button.clicked.connect(self.select_input_directory)
1011
+ dir_layout.addWidget(self.input_dir_button)
1012
+ self.input_dir_label = QLabel("No directory selected")
1013
+ self.input_dir_label.setWordWrap(True)
1014
+ dir_layout.addWidget(self.input_dir_label, 1)
1015
+ left_panel.addLayout(dir_layout)
1016
+
1017
+ # Single image selection
1018
+ single_img_layout = QHBoxLayout()
1019
+ self.single_image_button = QPushButton("Select Single Image")
1020
+ self.single_image_button.setToolTip("Select one image file to process.")
1021
+ self.single_image_button.clicked.connect(self.select_single_image)
1022
+ single_img_layout.addWidget(self.single_image_button)
1023
+ self.single_image_label = QLabel("No image selected")
1024
+ self.single_image_label.setWordWrap(True)
1025
+ single_img_layout.addWidget(self.single_image_label, 1)
1026
+ left_panel.addLayout(single_img_layout)
1027
+
1028
+ # Caption Type
1029
+ self.caption_type_combo = QComboBox()
1030
+ self.caption_type_combo.addItems(CAPTION_TYPE_MAP.keys())
1031
+ self.caption_type_combo.setCurrentText("Descriptive")
1032
+ self.caption_type_combo.setToolTip("Choose the style or purpose of the caption.")
1033
+ left_panel.addWidget(QLabel("Caption Type:"))
1034
+ left_panel.addWidget(self.caption_type_combo)
1035
+
1036
+ # Caption Length
1037
+ self.caption_length_combo = QComboBox()
1038
+ self.caption_length_combo.addItems(CAPTION_LENGTH_CHOICES)
1039
+ self.caption_length_combo.setCurrentText("long")
1040
+ self.caption_length_combo.setToolTip("Select desired caption length or word count.")
1041
+ left_panel.addWidget(QLabel("Caption Length:"))
1042
+ left_panel.addWidget(self.caption_length_combo)
1043
+
1044
+ # Extra Options
1045
+ left_panel.addWidget(QLabel("Extra Options:"))
1046
+ self.extra_options_checkboxes = []
1047
+ for option in EXTRA_OPTIONS_LIST:
1048
+ checkbox = QCheckBox(option)
1049
+ checkbox.setToolTip(option)
1050
+ self.extra_options_checkboxes.append(checkbox)
1051
+ left_panel.addWidget(checkbox)
1052
+
1053
+ # Name Input
1054
+ self.name_input_line = QLineEdit()
1055
+ self.name_input_line.setPlaceholderText("e.g., 'the main character'")
1056
+ self.name_input_line.setToolTip("If the first extra option is checked, this name will be used.")
1057
+ left_panel.addWidget(QLabel("Person/Character Name (optional):"))
1058
+ left_panel.addWidget(self.name_input_line)
1059
+
1060
+ # Custom Prompt
1061
+ self.custom_prompt_text = QTextEdit()
1062
+ self.custom_prompt_text.setPlaceholderText("Overrides Caption Type/Length/Options if used.")
1063
+ self.custom_prompt_text.setToolTip("Enter a full custom prompt here to ignore other settings.")
1064
+ self.custom_prompt_text.setFixedHeight(80)
1065
+ left_panel.addWidget(QLabel("Custom Prompt (optional):"))
1066
+ left_panel.addWidget(self.custom_prompt_text)
1067
+
1068
+ # Checkpoint Path
1069
+ ckpt_layout = QHBoxLayout()
1070
+ self.checkpoint_path_line = QLineEdit()
1071
+ self.checkpoint_path_line.setToolTip("Path to the folder containing model files (clip_model.pt, etc.).")
1072
+ ckpt_layout.addWidget(QLabel("Checkpoint Path:"))
1073
+ ckpt_layout.addWidget(self.checkpoint_path_line)
1074
+ self.browse_ckpt_button = QPushButton("...")
1075
+ self.browse_ckpt_button.setToolTip("Browse for Checkpoint Directory")
1076
+ self.browse_ckpt_button.clicked.connect(self.browse_checkpoint_path)
1077
+ self.browse_ckpt_button.setMaximumWidth(30)
1078
+ ckpt_layout.addWidget(self.browse_ckpt_button)
1079
+ left_panel.addLayout(ckpt_layout)
1080
+
1081
+ # Load Models Button
1082
+ self.load_models_button = QPushButton("Load Models")
1083
+ self.load_models_button.setToolTip("Load the AI models into memory (requires checkpoint path).")
1084
+ self.load_models_button.clicked.connect(self.load_models_action)
1085
+ left_panel.addWidget(self.load_models_button)
1086
+
1087
+ # Run Buttons
1088
+ self.run_button = QPushButton("Generate Captions for All Images in Directory")
1089
+ self.run_button.setToolTip("Process all loaded images from the selected directory.")
1090
+ self.run_button.clicked.connect(self.generate_captions_action)
1091
+ left_panel.addWidget(self.run_button)
1092
+
1093
+ self.caption_selected_button = QPushButton("Caption Selected Image from List")
1094
+ self.caption_selected_button.setToolTip("Process the image currently highlighted in the list.")
1095
+ self.caption_selected_button.clicked.connect(self.caption_selected_image_action)
1096
+ left_panel.addWidget(self.caption_selected_button)
1097
+
1098
+ self.caption_single_button = QPushButton("Caption Single Loaded Image")
1099
+ self.caption_single_button.setToolTip("Process the image selected via 'Select Single Image'.")
1100
+ self.caption_single_button.clicked.connect(self.caption_single_image_action)
1101
+ left_panel.addWidget(self.caption_single_button)
1102
+
1103
+ # Theme Toggle Button
1104
+ self.toggle_theme_button = QPushButton("Toggle Dark Mode")
1105
+ self.toggle_theme_button.setToolTip("Switch between light and dark themes.")
1106
+ self.toggle_theme_button.clicked.connect(self.toggle_theme)
1107
+ left_panel.addWidget(self.toggle_theme_button)
1108
+
1109
+ left_panel.addStretch(1)
1110
+
1111
+ # --- Right Panel ---
1112
+ right_panel = QVBoxLayout()
1113
+ right_panel.setSpacing(10)
1114
+
1115
+ # List widget for images
1116
+ right_panel.addWidget(QLabel("Images in Directory:"))
1117
+ self.image_list_widget = QListWidget()
1118
+ self.image_list_widget.setIconSize(self.image_list_widget.iconSize() * 2)
1119
+ self.image_list_widget.itemClicked.connect(self.display_selected_image)
1120
+ self.image_list_widget.setToolTip("Click an image to view it and enable 'Caption Selected Image'.")
1121
+ right_panel.addWidget(self.image_list_widget, 1)
1122
+
1123
+ # Label to display the selected image
1124
+ right_panel.addWidget(QLabel("Selected Image Preview:"))
1125
+ self.selected_image_label = QLabel("No image selected")
1126
+ self.selected_image_label.setAlignment(Qt.AlignCenter)
1127
+ self.selected_image_label.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Expanding)
1128
+ self.selected_image_label.setMinimumSize(300, 300)
1129
+ self.selected_image_label.setStyleSheet("border: 1px solid gray;")
1130
+ right_panel.addWidget(self.selected_image_label, 3)
1131
+
1132
+ # Generated Caption Area
1133
+ right_panel.addWidget(QLabel("Generated/Editable Caption:"))
1134
+ self.generated_caption_text = QTextEdit()
1135
+ self.generated_caption_text.setReadOnly(False)
1136
+ self.generated_caption_text.setPlaceholderText("Generated caption will appear here. You can edit it before saving.")
1137
+ self.generated_caption_text.setToolTip("The generated caption appears here. Edit and use 'Save Edited Caption'.")
1138
+ right_panel.addWidget(self.generated_caption_text, 1)
1139
+
1140
+ # Saving Options
1141
+ self.overwrite_checkbox = QCheckBox("Overwrite existing captions")
1142
+ self.overwrite_checkbox.setToolTip("If checked, automatically overwrites existing .txt files without asking.")
1143
+ self.append_checkbox = QCheckBox("Append to existing captions")
1144
+ self.append_checkbox.setToolTip("If checked, adds the new caption to the end of the existing .txt file.")
1145
+
1146
+ save_options_layout = QHBoxLayout()
1147
+ save_options_layout.addWidget(self.overwrite_checkbox)
1148
+ save_options_layout.addWidget(self.append_checkbox)
1149
+ save_options_layout.addStretch(1)
1150
+ right_panel.addLayout(save_options_layout) # Add layout here
1151
+
1152
+ self.append_checkbox.stateChanged.connect(
1153
+ lambda state: self.overwrite_checkbox.setEnabled(state == Qt.Unchecked)
1154
+ )
1155
+
1156
+ # Save Edited Caption Button
1157
+ self.save_caption_button = QPushButton("Save Edited Caption to File")
1158
+ self.save_caption_button.setToolTip("Save the text currently in the box above to the corresponding .txt file using the selected options.")
1159
+ self.save_caption_button.clicked.connect(self.save_edited_caption_action)
1160
+ right_panel.addWidget(self.save_caption_button)
1161
+
1162
+ # --- Main Layout Assembly ---
1163
+ self.main_layout.addLayout(left_panel, 2)
1164
+ self.main_layout.addLayout(right_panel, 5)
1165
+
1166
+ # --- Status Bar and Progress Bar ---
1167
+ self.status_bar = QStatusBar()
1168
+ self.setStatusBar(self.status_bar)
1169
+ self.progress_bar = QProgressBar()
1170
+ self.status_bar.addPermanentWidget(self.progress_bar)
1171
+ self.progress_bar.hide()
1172
+ self.show_status("Ready.", 5000)
1173
+
1174
+ def browse_checkpoint_path(self):
1175
+ directory = QFileDialog.getExistingDirectory(self, "Select Checkpoint Directory")
1176
+ if directory:
1177
+ self.checkpoint_path_line.setText(directory)
1178
+ self.update_button_states()
1179
+
1180
+ def show_status(self, message, timeout=0):
1181
+ self.status_bar.showMessage(message, timeout)
1182
+ QApplication.processEvents()
1183
+
1184
+ def update_button_states(self):
1185
+ self.load_models_button.setEnabled(bool(self.checkpoint_path_line.text()))
1186
+ models_ready = self.models_loaded
1187
+ dir_selected = self.input_dir is not None and bool(self.image_files)
1188
+ single_img_selected = self.single_image_path is not None
1189
+ list_img_selected = self.selected_image_path is not None
1190
+ caption_present = bool(self.generated_caption_text.toPlainText().strip())
1191
+
1192
+ self.run_button.setEnabled(models_ready and dir_selected)
1193
+ self.caption_selected_button.setEnabled(models_ready and list_img_selected)
1194
+ self.caption_single_button.setEnabled(models_ready and single_img_selected)
1195
+ self.save_caption_button.setEnabled(caption_present and (list_img_selected or single_img_selected))
1196
+
1197
+ def apply_theme(self):
1198
+ dark_stylesheet = """
1199
+ QMainWindow, QWidget { background-color: #2E2E2E; color: #FFFFFF; font-family: Arial, sans-serif; }
1200
+ QPushButton { background-color: #3A3A3A; color: #FFFFFF; border: 1px solid #555555; padding: 5px; min-height: 20px; }
1201
+ QPushButton:hover { background-color: #555555; }
1202
+ QPushButton:disabled { background-color: #454545; color: #888888; }
1203
+ QLabel { color: #FFFFFF; }
1204
+ QLineEdit, QTextEdit, QComboBox { background-color: #3A3A3A; color: #FFFFFF; border: 1px solid #555555; padding: 4px; }
1205
+ QLineEdit:disabled, QTextEdit:disabled, QComboBox:disabled { background-color: #454545; color: #888888; }
1206
+ QListWidget { background-color: #3A3A3A; color: #FFFFFF; border: 1px solid #555555; alternate-background-color: #424242; }
1207
+ QCheckBox { color: #FFFFFF; spacing: 5px; }
1208
+ QCheckBox::indicator { width: 13px; height: 13px; }
1209
+ QStatusBar { color: #FFFFFF; } QStatusBar::item { border: none; }
1210
+ QProgressBar { border: 1px solid #555555; text-align: center; color: #FFFFFF; background-color: #3A3A3A; }
1211
+ QProgressBar::chunk { background-color: #007ADF; width: 10px; margin: 0.5px; }
1212
+ QToolTip { background-color: #464646; color: #FFFFFF; border: 1px solid #555555; padding: 4px; }
1213
+ QTextEdit { placeholderTextColor: gray; } QLineEdit { placeholderTextColor: gray; }
1214
+ """
1215
+ if self.dark_mode: self.setStyleSheet(dark_stylesheet)
1216
+ else: self.setStyleSheet("")
1217
+
1218
+ placeholder_style = "QTextEdit { placeholderTextColor: gray; } QLineEdit { placeholderTextColor: gray; }"
1219
+ current_style = self.styleSheet()
1220
+ if self.dark_mode:
1221
+ if "placeholderTextColor" not in current_style: self.setStyleSheet(current_style + placeholder_style)
1222
+ else: self.setStyleSheet(current_style.replace(placeholder_style, ""))
1223
+
1224
+ def toggle_theme(self):
1225
+ self.dark_mode = not self.dark_mode
1226
+ self.apply_theme()
1227
+
1228
+ def select_input_directory(self):
1229
+ directory = QFileDialog.getExistingDirectory(self, "Select Input Directory")
1230
+ if directory:
1231
+ self.input_dir = Path(directory)
1232
+ self.input_dir_label.setText(str(self.input_dir))
1233
+ self.single_image_path = None; self.single_image_label.setText("No image selected")
1234
+ self.selected_image_path = None; self.selected_image_label.setText("No image selected")
1235
+ self.generated_caption_text.clear()
1236
+ self.load_images()
1237
+ self.show_status(f"Selected directory: {self.input_dir.name}", 5000)
1238
+ else:
1239
+ self.input_dir_label.setText("No directory selected"); self.input_dir = None
1240
+ self.image_list_widget.clear(); self.image_files = []
1241
+ self.show_status("Directory selection cancelled.", 3000)
1242
+ self.update_button_states()
1243
+
1244
+ def select_single_image(self):
1245
+ file_filter = "Image Files (*.jpg *.jpeg *.png *.bmp *.gif *.tiff *.webp)"
1246
+ file_path, _ = QFileDialog.getOpenFileName(self, "Select Single Image", "", file_filter)
1247
+ if file_path:
1248
+ self.single_image_path = Path(file_path)
1249
+ self.single_image_label.setText(str(self.single_image_path.name))
1250
+ self.input_dir = None; self.input_dir_label.setText("No directory selected")
1251
+ self.image_list_widget.clear(); self.image_files = []
1252
+ self.selected_image_path = None
1253
+ self.display_image(self.single_image_path)
1254
+ self.show_status(f"Selected single image: {self.single_image_path.name}", 5000)
1255
+ else:
1256
+ self.single_image_label.setText("No image selected"); self.single_image_path = None
1257
+ self.show_status("Single image selection cancelled.", 3000)
1258
+ self.update_button_states()
1259
+
1260
+ def load_images(self):
1261
+ if not self.input_dir: return
1262
+ self.show_status(f"Loading images from {self.input_dir.name}...")
1263
+ image_extensions = [".jpg", ".jpeg", ".png", ".bmp", ".gif", ".tiff", ".webp"]
1264
+ try:
1265
+ self.image_files = sorted([f for f in self.input_dir.iterdir() if f.is_file() and f.suffix.lower() in image_extensions])
1266
+ except Exception as e:
1267
+ QMessageBox.critical(self, "Directory Error", f"Could not read directory contents:\n{e}")
1268
+ self.show_status(f"Error reading directory {self.input_dir.name}", 5000)
1269
+ self.image_files = []; self.input_dir = None; self.input_dir_label.setText("Error reading directory")
1270
+
1271
+ self.image_list_widget.clear()
1272
+ if not self.image_files:
1273
+ if self.input_dir:
1274
+ QMessageBox.warning(self, "No Images", "No supported image files found.")
1275
+ self.show_status("No images found in directory.", 3000)
1276
+ self.update_button_states()
1277
+ return
1278
+
1279
+ thumb_size = 100
1280
+ for image_path in self.image_files:
1281
+ item = QListWidgetItem(str(image_path.name))
1282
+ try:
1283
+ pixmap = QPixmap(str(image_path))
1284
+ if not pixmap.isNull():
1285
+ scaled_pixmap = pixmap.scaled(thumb_size, thumb_size, Qt.KeepAspectRatio, Qt.SmoothTransformation)
1286
+ item.setIcon(QIcon(scaled_pixmap))
1287
+ else: print(f"Warning: QPixmap is null for {image_path.name}")
1288
+ except Exception as e: print(f"Warning: Could not create thumbnail for {image_path.name}: {e}")
1289
+ self.image_list_widget.addItem(item)
1290
+
1291
+ self.show_status(f"Loaded {len(self.image_files)} images.", 5000)
1292
+ self.update_button_states()
1293
+
1294
+ def display_selected_image(self, item):
1295
+ if not self.input_dir or not item: return
1296
+ try:
1297
+ image_name = item.text()
1298
+ image_path = self.input_dir / image_name
1299
+ if not image_path.exists():
1300
+ QMessageBox.warning(self, "File Not Found", f"Image file '{image_name}' no longer exists.")
1301
+ self.selected_image_label.setText("File not found")
1302
+ self.selected_image_label.setPixmap(QPixmap())
1303
+ self.generated_caption_text.clear()
1304
+ self.selected_image_path = None
1305
+ return
1306
+
1307
+ self.selected_image_path = image_path
1308
+ self.single_image_path = None
1309
+ self.single_image_label.setText("No image selected")
1310
+ self.display_image(image_path)
1311
+ caption_file_path = image_path.with_suffix('.txt')
1312
+ if caption_file_path.exists():
1313
+ try:
1314
+ with open(caption_file_path, 'r', encoding='utf-8') as f:
1315
+ caption_content = f.read()
1316
+ self.generated_caption_text.setText(caption_content)
1317
+ status_message = f"Displayed {image_name} and loaded existing caption."
1318
+ except Exception as e:
1319
+ print(f"Warning: Could not read caption file {caption_file_path.name}: {e}")
1320
+ # Keep caption box clear or show error placeholder
1321
+ self.generated_caption_text.setPlaceholderText(f"Error reading caption file for {image_name}.")
1322
+ status_message = f"Displayed {image_name}, but failed to load caption file."
1323
+ else:
1324
+ # Keep caption box clear (already done by display_image)
1325
+ self.generated_caption_text.setPlaceholderText("Generate or edit caption here.")
1326
+ status_message = f"Displayed {image_name}. No existing caption found."
1327
+ self.show_status(f"Selected {image_name} from list.", 4000)
1328
+ except Exception as e:
1329
+ self.selected_image_label.setText("Error loading preview")
1330
+ self.selected_image_path = None
1331
+ QMessageBox.warning(self, "Preview Error", f"Could not load preview for {item.text()}: {e}")
1332
+ self.show_status(f"Error loading preview for {item.text()}", 4000)
1333
+ self.update_button_states()
1334
+
1335
+ def display_image(self, image_path):
1336
+ try:
1337
+ pixmap = QPixmap(str(image_path))
1338
+ if not pixmap.isNull():
1339
+ self.scale_and_set_pixmap(pixmap)
1340
+ self.generated_caption_text.clear()
1341
+ else:
1342
+ self.selected_image_label.setText(f"Cannot display image:\n{image_path.name}")
1343
+ self.selected_image_label.setPixmap(QPixmap())
1344
+ except Exception as e:
1345
+ self.selected_image_label.setText(f"Error loading preview:\n{image_path.name}")
1346
+ self.selected_image_label.setPixmap(QPixmap())
1347
+ print(f"Error displaying image {image_path}: {e}")
1348
+ self.show_status(f"Error displaying image {image_path.name}", 4000)
1349
+ self.update_button_states()
1350
+
1351
+ def scale_and_set_pixmap(self, pixmap):
1352
+ if not pixmap or pixmap.isNull():
1353
+ self.selected_image_label.clear()
1354
+ self.selected_image_label.setText("No image selected")
1355
+ return
1356
+ label_size = self.selected_image_label.contentsRect().size()
1357
+ scaled_pixmap = pixmap.scaled(label_size * self.devicePixelRatioF(), Qt.KeepAspectRatio, Qt.SmoothTransformation)
1358
+ self.selected_image_label.setPixmap(scaled_pixmap)
1359
+
1360
+ def load_models_action(self):
1361
+ checkpoint_path_str = self.checkpoint_path_line.text()
1362
+ if not checkpoint_path_str: QMessageBox.warning(self, "Checkpoint Error", "Please specify the checkpoint path."); return
1363
+ checkpoint_path = Path(checkpoint_path_str)
1364
+ if not checkpoint_path.exists() or not checkpoint_path.is_dir():
1365
+ QMessageBox.warning(self, "Checkpoint Error", f"Checkpoint path does not exist or is not a directory:\n{checkpoint_path}"); return
1366
+
1367
+ self.show_status("Loading models... This might take a while.", 0)
1368
+ self.progress_bar.setRange(0, 0); self.progress_bar.show(); QApplication.processEvents()
1369
+ try:
1370
+ (self.clip_processor, self.clip_model, self.tokenizer, self.text_model, self.image_adapter) = load_models(checkpoint_path, status_callback=self.show_status)
1371
+ self.models_loaded = True
1372
+ QMessageBox.information(self, "Models Loaded", "Models have been loaded successfully.")
1373
+ self.show_status("Models loaded successfully. Ready to caption.", 5000)
1374
+ except Exception as e:
1375
+ self.models_loaded = False
1376
+ QMessageBox.critical(self, "Model Loading Error", f"An error occurred while loading models:\n{e}\n\nCheck console for details.")
1377
+ self.show_status(f"Model loading failed. Check console.", 0)
1378
+ print(f"--- Model Loading Error ---"); import traceback; traceback.print_exc(); print(f"--- End Error Traceback ---")
1379
+ finally:
1380
+ self.progress_bar.hide(); self.progress_bar.setRange(0, 100); self.update_button_states()
1381
+
1382
+ def collect_parameters(self):
1383
+ return (self.caption_type_combo.currentText(), self.caption_length_combo.currentText(),
1384
+ [cb.text() for cb in self.extra_options_checkboxes if cb.isChecked()],
1385
+ self.name_input_line.text(), self.custom_prompt_text.toPlainText())
1386
+
1387
+ def _confirm_overwrite(self, file_path: Path) -> bool:
1388
+ if file_path.exists():
1389
+ reply = QMessageBox.question(self, 'Confirm Overwrite', f"Caption file '{file_path.name}' already exists.\nOverwrite?", QMessageBox.Yes | QMessageBox.No, QMessageBox.No)
1390
+ return reply == QMessageBox.Yes
1391
+ return True
1392
+
1393
+ def _save_caption_to_file(self, image_path: Path, caption: str) -> bool:
1394
+ if not image_path: self.show_status("Error: No image path associated.", 5000); return False
1395
+ caption_file = image_path.with_suffix('.txt')
1396
+ mode = 'a' if self.append_checkbox.isChecked() else 'w'
1397
+ prefix = '\n' if mode == 'a' and caption_file.exists() and caption_file.stat().st_size > 0 else ''
1398
+
1399
+ if mode == 'w' and caption_file.exists() and not self.overwrite_checkbox.isChecked():
1400
+ if not self._confirm_overwrite(caption_file):
1401
+ self.show_status(f"Skipped saving {image_path.name}.", 3000); return False
1402
+ try:
1403
+ with open(caption_file, mode, encoding='utf-8') as f: f.write(f"{prefix}{caption}")
1404
+ self.show_status(f"Caption {'appended to' if mode == 'a' else 'saved to'} {caption_file.name}", 4000); return True
1405
+ except Exception as e:
1406
+ QMessageBox.critical(self, "Save Error", f"Error saving caption for {image_path.name}:\n{e}")
1407
+ self.show_status(f"Error saving caption for {image_path.name}", 5000); print(f"Error saving caption to {caption_file}: {e}"); return False
1408
+
1409
+ def _run_caption_generation(self, image_path: Path):
1410
+ if not self.models_loaded: QMessageBox.warning(self, "Models Not Loaded", "Please load models first."); return None
1411
+ if not image_path or not image_path.exists():
1412
+ QMessageBox.warning(self, "Image Not Found", f"Image file does not exist:\n{image_path}")
1413
+ self.show_status(f"Image not found: {image_path.name if image_path else 'None'}", 5000); return None
1414
+
1415
+ self.show_status(f"Processing: {image_path.name}...", 0); QApplication.processEvents()
1416
+ params = self.collect_parameters()
1417
+ try: input_image = Image.open(image_path)
1418
+ except Exception as e:
1419
+ QMessageBox.critical(self, "Image Open Error", f"Failed to open {image_path.name}:\n{e}")
1420
+ self.show_status(f"Error opening {image_path.name}", 5000); print(f"Error opening image {image_path}: {e}"); return None
1421
+ try:
1422
+ prompt_str, caption = generate_caption(input_image, *params, self.clip_model, self.tokenizer, self.text_model, self.image_adapter)
1423
+ current_viewed_path = self.selected_image_path or self.single_image_path
1424
+ if image_path == current_viewed_path: self.generated_caption_text.setText(caption)
1425
+ if self._save_caption_to_file(image_path, caption): print(f"Caption generated and saved for {image_path.name}")
1426
+ else: print(f"Caption generated but NOT saved for {image_path.name}")
1427
+ return caption
1428
+ except Exception as e:
1429
+ QMessageBox.critical(self, "Processing Error", f"Failed to process {image_path.name}:\n{e}\n\nCheck console.")
1430
+ self.show_status(f"Error processing {image_path.name}. Check console.", 0)
1431
+ print(f"--- Processing Error for {image_path.name} ---"); import traceback; traceback.print_exc(); print(f"--- End Error Traceback ---")
1432
+ current_viewed_path = self.selected_image_path or self.single_image_path
1433
+ if image_path == current_viewed_path: self.generated_caption_text.setText(f"Error generating caption. See console.")
1434
+ return None
1435
+ finally: QApplication.processEvents()
1436
+
1437
+ def generate_captions_action(self):
1438
+ if not self.input_dir or not self.image_files: QMessageBox.warning(self, "No Images", "Select directory with images first."); return
1439
+ if not self.models_loaded: QMessageBox.warning(self, "Models Not Loaded", "Load models first."); return
1440
+
1441
+ num_images = len(self.image_files)
1442
+ self.progress_bar.setRange(0, num_images); self.progress_bar.setValue(0); self.progress_bar.show()
1443
+ self.show_status(f"Starting batch captioning for {num_images} images...", 0)
1444
+
1445
+ processed_count, error_count, skipped_explicitly = 0, 0, 0
1446
+ original_overwrite_state = self.overwrite_checkbox.isChecked() # Remember original state
1447
+ ask_all = False # Flag to check if user agreed to overwrite all
1448
+
1449
+ # Pre-check for overwrites if needed
1450
+ files_to_confirm = []
1451
+ if not self.overwrite_checkbox.isChecked() and not self.append_checkbox.isChecked():
1452
+ files_to_confirm = [img.with_suffix('.txt').name for img in self.image_files if img.with_suffix('.txt').exists()]
1453
+
1454
+ if files_to_confirm:
1455
+ reply = QMessageBox.question(self, 'Confirm Overwrite Multiple', f"{len(files_to_confirm)} existing caption file(s) found.\nOverwrite ALL existing files?", QMessageBox.Yes | QMessageBox.No | QMessageBox.Cancel, QMessageBox.Cancel)
1456
+ if reply == QMessageBox.Cancel: self.show_status("Batch cancelled.", 3000); self.progress_bar.hide(); return
1457
+ elif reply == QMessageBox.Yes: ask_all = True; self.overwrite_checkbox.setChecked(True) # Temporarily check it
1458
+
1459
+ # Process images
1460
+ for i, image_path in enumerate(self.image_files):
1461
+ # Run generation. _save_caption_to_file handles individual confirmation if ask_all is False
1462
+ caption_result = self._run_caption_generation(image_path)
1463
+
1464
+ # Track results (Approximate - relies on _save reporting skips)
1465
+ if caption_result is not None:
1466
+ processed_count += 1
1467
+ else:
1468
+ # If None, assume error unless status bar indicates skip (imperfect)
1469
+ if "Skipped saving" not in self.status_bar.currentMessage():
1470
+ error_count += 1
1471
+ # No reliable way to count skips here without modifying _save return value
1472
+
1473
+ self.progress_bar.setValue(i + 1)
1474
+ QApplication.processEvents()
1475
+
1476
+ # Restore overwrite checkbox state if changed
1477
+ if ask_all: self.overwrite_checkbox.setChecked(original_overwrite_state)
1478
+
1479
+ self.progress_bar.hide()
1480
+ final_message = f"Batch finished. {processed_count} captions generated/saved."
1481
+ if error_count > 0: final_message += f" {error_count} errors."
1482
+ # Cannot reliably report skips here
1483
+ QMessageBox.information(self, "Batch Complete", final_message)
1484
+ self.show_status(final_message, 10000)
1485
+ self.update_button_states()
1486
+
1487
+ def caption_selected_image_action(self):
1488
+ if not self.selected_image_path: QMessageBox.warning(self, "No Image Selected", "Select image from list first."); return
1489
+ self._run_caption_generation(self.selected_image_path); self.update_button_states()
1490
+
1491
+ def caption_single_image_action(self):
1492
+ if not self.single_image_path: QMessageBox.warning(self, "No Image Selected", "Select single image first."); return
1493
+ self._run_caption_generation(self.single_image_path); self.update_button_states()
1494
+
1495
+ def save_edited_caption_action(self):
1496
+ edited_caption = self.generated_caption_text.toPlainText().strip()
1497
+ if not edited_caption: QMessageBox.warning(self, "Empty Caption", "Caption text is empty."); return
1498
+ current_image_path = self.selected_image_path or self.single_image_path
1499
+ if not current_image_path: QMessageBox.warning(self, "No Associated Image", "Select image first."); return
1500
+ self._save_caption_to_file(current_image_path, edited_caption)
1501
+
1502
+ def resizeEvent(self, event):
1503
+ super().resizeEvent(event)
1504
+ current_path = None
1505
+ if self.selected_image_label.pixmap() and not self.selected_image_label.pixmap().isNull():
1506
+ current_path = self.selected_image_path or self.single_image_path
1507
+ if current_path and current_path.exists():
1508
+ try:
1509
+ pixmap = QPixmap(str(current_path))
1510
+ if not pixmap.isNull(): self.scale_and_set_pixmap(pixmap)
1511
+ except Exception as e: print(f"Error reloading pixmap on resize for {current_path}: {e}")
1512
+ elif not self.selected_image_label.text() or self.selected_image_label.text().startswith(("Cannot", "Error", "No image")):
1513
+ self.selected_image_label.clear(); self.selected_image_label.setText("No image selected")
1514
+
1515
+
1516
+ if __name__ == "__main__":
1517
+ QApplication.setAttribute(Qt.AA_EnableHighDpiScaling, True) # Optional
1518
+ QApplication.setAttribute(Qt.AA_UseHighDpiPixmaps, True) # Optional
1519
+ app = QApplication(sys.argv)
1520
+ app.setStyle("Fusion") # Optional
1521
+ window = CaptionApp()
1522
+ window.show()
1523
+ sys.exit(app.exec_())
adapter_config_4bit.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 64,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "q_proj",
24
+ "v_proj",
25
+ "gate_proj",
26
+ "down_proj",
27
+ "o_proj",
28
+ "k_proj",
29
+ "up_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "use_dora": false,
33
+ "use_rslora": false
34
+ }
cgrkzexw-599808/clip_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9277e041aab3e7f20a8e6ecf7248b663aac1c281daf4472c12a6e5013cf9f0cc
3
+ size 1713067838
cgrkzexw-599808/config.yaml ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_project: joy-caption-1
2
+ device_batch_size: 2
3
+ batch_size: 256
4
+ learning_rate: 0.0002
5
+ warmup_samples: 18000
6
+ max_samples: 600000
7
+ save_every: 50000
8
+ test_every: 50000
9
+ use_amp: true
10
+ grad_scaler: true
11
+ lr_scheduler_type: cosine
12
+ min_lr_ratio: 0.0
13
+ allow_tf32: true
14
+ seed: 69
15
+ num_workers: 8
16
+ optimizer_type: adamw
17
+ adam_beta1: 0.9
18
+ adam_beta2: 0.999
19
+ adam_eps: 1.0e-08
20
+ adam_weight_decay: 0.0
21
+ clip_grad_norm: 1.0
22
+ dataset: fancyfeast/joy-captioning-20240924a
23
+ clip_model: google/siglip-so400m-patch14-384
24
+ text_model: ../lora-train/lora_model_vwbzycxh
25
+ resume: null
26
+ gradient_checkpointing: false
27
+ test_size: 2048
28
+ grad_scaler_init: 65536.0
29
+ max_caption_length: 257
30
+ num_image_tokens: 32
31
+ adapter_type: mlp
32
+ text_model_dtype: bfloat16
33
+ pre_test: false
34
+ train_image_model: true
35
+ image_model_lr: null
36
+ train_lora: true
37
+ lora_r: 64
38
+ lora_alpha: 16
39
+ lora_dropout: 0.1
cgrkzexw-599808/image_adapter.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38db2fe263be2d494a50be4a7bbfd7b23b76f9d03e4008a1b7df97d6b27894ef
3
+ size 86067714
cgrkzexw-599808/text_model/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: unsloth/Meta-Llama-3.1-8B-Instruct
3
+ library_name: peft
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.12.0
cgrkzexw-599808/text_model/adapter_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B-Instruct",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 64,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "q_proj",
24
+ "v_proj",
25
+ "gate_proj",
26
+ "down_proj",
27
+ "o_proj",
28
+ "k_proj",
29
+ "up_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "use_dora": false,
33
+ "use_rslora": false
34
+ }
cgrkzexw-599808/text_model/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd883ebd089f87e0fab7f17960c5f4451ceae43aecead44a9984b3369018dbdb
3
+ size 671149168
cgrkzexw-599808/text_model/special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|begin_of_text|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|eot_id|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|finetune_right_pad_id|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
cgrkzexw-599808/text_model/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
cgrkzexw-599808/text_model/tokenizer_config.json ADDED
@@ -0,0 +1,2064 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "128000": {
4
+ "content": "<|begin_of_text|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "128001": {
12
+ "content": "<|end_of_text|>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "128002": {
20
+ "content": "<|reserved_special_token_0|>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "128003": {
28
+ "content": "<|reserved_special_token_1|>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128004": {
36
+ "content": "<|finetune_right_pad_id|>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "128005": {
44
+ "content": "<|reserved_special_token_2|>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "128006": {
52
+ "content": "<|start_header_id|>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "128007": {
60
+ "content": "<|end_header_id|>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "128008": {
68
+ "content": "<|eom_id|>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "128009": {
76
+ "content": "<|eot_id|>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "128010": {
84
+ "content": "<|python_tag|>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "128011": {
92
+ "content": "<|reserved_special_token_3|>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "128012": {
100
+ "content": "<|reserved_special_token_4|>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "128013": {
108
+ "content": "<|reserved_special_token_5|>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "128014": {
116
+ "content": "<|reserved_special_token_6|>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "128015": {
124
+ "content": "<|reserved_special_token_7|>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "128016": {
132
+ "content": "<|reserved_special_token_8|>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "128017": {
140
+ "content": "<|reserved_special_token_9|>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "128018": {
148
+ "content": "<|reserved_special_token_10|>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "128019": {
156
+ "content": "<|reserved_special_token_11|>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "128020": {
164
+ "content": "<|reserved_special_token_12|>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "128021": {
172
+ "content": "<|reserved_special_token_13|>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "128022": {
180
+ "content": "<|reserved_special_token_14|>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "128023": {
188
+ "content": "<|reserved_special_token_15|>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "128024": {
196
+ "content": "<|reserved_special_token_16|>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "128025": {
204
+ "content": "<|reserved_special_token_17|>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "128026": {
212
+ "content": "<|reserved_special_token_18|>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "128027": {
220
+ "content": "<|reserved_special_token_19|>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "128028": {
228
+ "content": "<|reserved_special_token_20|>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "128029": {
236
+ "content": "<|reserved_special_token_21|>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "128030": {
244
+ "content": "<|reserved_special_token_22|>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "128031": {
252
+ "content": "<|reserved_special_token_23|>",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "128032": {
260
+ "content": "<|reserved_special_token_24|>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "128033": {
268
+ "content": "<|reserved_special_token_25|>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "128034": {
276
+ "content": "<|reserved_special_token_26|>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "128035": {
284
+ "content": "<|reserved_special_token_27|>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "128036": {
292
+ "content": "<|reserved_special_token_28|>",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "128037": {
300
+ "content": "<|reserved_special_token_29|>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "128038": {
308
+ "content": "<|reserved_special_token_30|>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "128039": {
316
+ "content": "<|reserved_special_token_31|>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "128040": {
324
+ "content": "<|reserved_special_token_32|>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "128041": {
332
+ "content": "<|reserved_special_token_33|>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "128042": {
340
+ "content": "<|reserved_special_token_34|>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "128043": {
348
+ "content": "<|reserved_special_token_35|>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": true
354
+ },
355
+ "128044": {
356
+ "content": "<|reserved_special_token_36|>",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": true
362
+ },
363
+ "128045": {
364
+ "content": "<|reserved_special_token_37|>",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": true
370
+ },
371
+ "128046": {
372
+ "content": "<|reserved_special_token_38|>",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": true
378
+ },
379
+ "128047": {
380
+ "content": "<|reserved_special_token_39|>",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": true
386
+ },
387
+ "128048": {
388
+ "content": "<|reserved_special_token_40|>",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "128049": {
396
+ "content": "<|reserved_special_token_41|>",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "128050": {
404
+ "content": "<|reserved_special_token_42|>",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "128051": {
412
+ "content": "<|reserved_special_token_43|>",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ },
419
+ "128052": {
420
+ "content": "<|reserved_special_token_44|>",
421
+ "lstrip": false,
422
+ "normalized": false,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": true
426
+ },
427
+ "128053": {
428
+ "content": "<|reserved_special_token_45|>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": true
434
+ },
435
+ "128054": {
436
+ "content": "<|reserved_special_token_46|>",
437
+ "lstrip": false,
438
+ "normalized": false,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": true
442
+ },
443
+ "128055": {
444
+ "content": "<|reserved_special_token_47|>",
445
+ "lstrip": false,
446
+ "normalized": false,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": true
450
+ },
451
+ "128056": {
452
+ "content": "<|reserved_special_token_48|>",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": true
458
+ },
459
+ "128057": {
460
+ "content": "<|reserved_special_token_49|>",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "128058": {
468
+ "content": "<|reserved_special_token_50|>",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "128059": {
476
+ "content": "<|reserved_special_token_51|>",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
+ },
483
+ "128060": {
484
+ "content": "<|reserved_special_token_52|>",
485
+ "lstrip": false,
486
+ "normalized": false,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": true
490
+ },
491
+ "128061": {
492
+ "content": "<|reserved_special_token_53|>",
493
+ "lstrip": false,
494
+ "normalized": false,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": true
498
+ },
499
+ "128062": {
500
+ "content": "<|reserved_special_token_54|>",
501
+ "lstrip": false,
502
+ "normalized": false,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": true
506
+ },
507
+ "128063": {
508
+ "content": "<|reserved_special_token_55|>",
509
+ "lstrip": false,
510
+ "normalized": false,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": true
514
+ },
515
+ "128064": {
516
+ "content": "<|reserved_special_token_56|>",
517
+ "lstrip": false,
518
+ "normalized": false,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": true
522
+ },
523
+ "128065": {
524
+ "content": "<|reserved_special_token_57|>",
525
+ "lstrip": false,
526
+ "normalized": false,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": true
530
+ },
531
+ "128066": {
532
+ "content": "<|reserved_special_token_58|>",
533
+ "lstrip": false,
534
+ "normalized": false,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": true
538
+ },
539
+ "128067": {
540
+ "content": "<|reserved_special_token_59|>",
541
+ "lstrip": false,
542
+ "normalized": false,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": true
546
+ },
547
+ "128068": {
548
+ "content": "<|reserved_special_token_60|>",
549
+ "lstrip": false,
550
+ "normalized": false,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": true
554
+ },
555
+ "128069": {
556
+ "content": "<|reserved_special_token_61|>",
557
+ "lstrip": false,
558
+ "normalized": false,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": true
562
+ },
563
+ "128070": {
564
+ "content": "<|reserved_special_token_62|>",
565
+ "lstrip": false,
566
+ "normalized": false,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": true
570
+ },
571
+ "128071": {
572
+ "content": "<|reserved_special_token_63|>",
573
+ "lstrip": false,
574
+ "normalized": false,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": true
578
+ },
579
+ "128072": {
580
+ "content": "<|reserved_special_token_64|>",
581
+ "lstrip": false,
582
+ "normalized": false,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": true
586
+ },
587
+ "128073": {
588
+ "content": "<|reserved_special_token_65|>",
589
+ "lstrip": false,
590
+ "normalized": false,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": true
594
+ },
595
+ "128074": {
596
+ "content": "<|reserved_special_token_66|>",
597
+ "lstrip": false,
598
+ "normalized": false,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": true
602
+ },
603
+ "128075": {
604
+ "content": "<|reserved_special_token_67|>",
605
+ "lstrip": false,
606
+ "normalized": false,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": true
610
+ },
611
+ "128076": {
612
+ "content": "<|reserved_special_token_68|>",
613
+ "lstrip": false,
614
+ "normalized": false,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": true
618
+ },
619
+ "128077": {
620
+ "content": "<|reserved_special_token_69|>",
621
+ "lstrip": false,
622
+ "normalized": false,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": true
626
+ },
627
+ "128078": {
628
+ "content": "<|reserved_special_token_70|>",
629
+ "lstrip": false,
630
+ "normalized": false,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": true
634
+ },
635
+ "128079": {
636
+ "content": "<|reserved_special_token_71|>",
637
+ "lstrip": false,
638
+ "normalized": false,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": true
642
+ },
643
+ "128080": {
644
+ "content": "<|reserved_special_token_72|>",
645
+ "lstrip": false,
646
+ "normalized": false,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": true
650
+ },
651
+ "128081": {
652
+ "content": "<|reserved_special_token_73|>",
653
+ "lstrip": false,
654
+ "normalized": false,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": true
658
+ },
659
+ "128082": {
660
+ "content": "<|reserved_special_token_74|>",
661
+ "lstrip": false,
662
+ "normalized": false,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": true
666
+ },
667
+ "128083": {
668
+ "content": "<|reserved_special_token_75|>",
669
+ "lstrip": false,
670
+ "normalized": false,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": true
674
+ },
675
+ "128084": {
676
+ "content": "<|reserved_special_token_76|>",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": true
682
+ },
683
+ "128085": {
684
+ "content": "<|reserved_special_token_77|>",
685
+ "lstrip": false,
686
+ "normalized": false,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": true
690
+ },
691
+ "128086": {
692
+ "content": "<|reserved_special_token_78|>",
693
+ "lstrip": false,
694
+ "normalized": false,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": true
698
+ },
699
+ "128087": {
700
+ "content": "<|reserved_special_token_79|>",
701
+ "lstrip": false,
702
+ "normalized": false,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": true
706
+ },
707
+ "128088": {
708
+ "content": "<|reserved_special_token_80|>",
709
+ "lstrip": false,
710
+ "normalized": false,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": true
714
+ },
715
+ "128089": {
716
+ "content": "<|reserved_special_token_81|>",
717
+ "lstrip": false,
718
+ "normalized": false,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": true
722
+ },
723
+ "128090": {
724
+ "content": "<|reserved_special_token_82|>",
725
+ "lstrip": false,
726
+ "normalized": false,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": true
730
+ },
731
+ "128091": {
732
+ "content": "<|reserved_special_token_83|>",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": true
738
+ },
739
+ "128092": {
740
+ "content": "<|reserved_special_token_84|>",
741
+ "lstrip": false,
742
+ "normalized": false,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": true
746
+ },
747
+ "128093": {
748
+ "content": "<|reserved_special_token_85|>",
749
+ "lstrip": false,
750
+ "normalized": false,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": true
754
+ },
755
+ "128094": {
756
+ "content": "<|reserved_special_token_86|>",
757
+ "lstrip": false,
758
+ "normalized": false,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": true
762
+ },
763
+ "128095": {
764
+ "content": "<|reserved_special_token_87|>",
765
+ "lstrip": false,
766
+ "normalized": false,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": true
770
+ },
771
+ "128096": {
772
+ "content": "<|reserved_special_token_88|>",
773
+ "lstrip": false,
774
+ "normalized": false,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": true
778
+ },
779
+ "128097": {
780
+ "content": "<|reserved_special_token_89|>",
781
+ "lstrip": false,
782
+ "normalized": false,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": true
786
+ },
787
+ "128098": {
788
+ "content": "<|reserved_special_token_90|>",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": true
794
+ },
795
+ "128099": {
796
+ "content": "<|reserved_special_token_91|>",
797
+ "lstrip": false,
798
+ "normalized": false,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": true
802
+ },
803
+ "128100": {
804
+ "content": "<|reserved_special_token_92|>",
805
+ "lstrip": false,
806
+ "normalized": false,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": true
810
+ },
811
+ "128101": {
812
+ "content": "<|reserved_special_token_93|>",
813
+ "lstrip": false,
814
+ "normalized": false,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": true
818
+ },
819
+ "128102": {
820
+ "content": "<|reserved_special_token_94|>",
821
+ "lstrip": false,
822
+ "normalized": false,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": true
826
+ },
827
+ "128103": {
828
+ "content": "<|reserved_special_token_95|>",
829
+ "lstrip": false,
830
+ "normalized": false,
831
+ "rstrip": false,
832
+ "single_word": false,
833
+ "special": true
834
+ },
835
+ "128104": {
836
+ "content": "<|reserved_special_token_96|>",
837
+ "lstrip": false,
838
+ "normalized": false,
839
+ "rstrip": false,
840
+ "single_word": false,
841
+ "special": true
842
+ },
843
+ "128105": {
844
+ "content": "<|reserved_special_token_97|>",
845
+ "lstrip": false,
846
+ "normalized": false,
847
+ "rstrip": false,
848
+ "single_word": false,
849
+ "special": true
850
+ },
851
+ "128106": {
852
+ "content": "<|reserved_special_token_98|>",
853
+ "lstrip": false,
854
+ "normalized": false,
855
+ "rstrip": false,
856
+ "single_word": false,
857
+ "special": true
858
+ },
859
+ "128107": {
860
+ "content": "<|reserved_special_token_99|>",
861
+ "lstrip": false,
862
+ "normalized": false,
863
+ "rstrip": false,
864
+ "single_word": false,
865
+ "special": true
866
+ },
867
+ "128108": {
868
+ "content": "<|reserved_special_token_100|>",
869
+ "lstrip": false,
870
+ "normalized": false,
871
+ "rstrip": false,
872
+ "single_word": false,
873
+ "special": true
874
+ },
875
+ "128109": {
876
+ "content": "<|reserved_special_token_101|>",
877
+ "lstrip": false,
878
+ "normalized": false,
879
+ "rstrip": false,
880
+ "single_word": false,
881
+ "special": true
882
+ },
883
+ "128110": {
884
+ "content": "<|reserved_special_token_102|>",
885
+ "lstrip": false,
886
+ "normalized": false,
887
+ "rstrip": false,
888
+ "single_word": false,
889
+ "special": true
890
+ },
891
+ "128111": {
892
+ "content": "<|reserved_special_token_103|>",
893
+ "lstrip": false,
894
+ "normalized": false,
895
+ "rstrip": false,
896
+ "single_word": false,
897
+ "special": true
898
+ },
899
+ "128112": {
900
+ "content": "<|reserved_special_token_104|>",
901
+ "lstrip": false,
902
+ "normalized": false,
903
+ "rstrip": false,
904
+ "single_word": false,
905
+ "special": true
906
+ },
907
+ "128113": {
908
+ "content": "<|reserved_special_token_105|>",
909
+ "lstrip": false,
910
+ "normalized": false,
911
+ "rstrip": false,
912
+ "single_word": false,
913
+ "special": true
914
+ },
915
+ "128114": {
916
+ "content": "<|reserved_special_token_106|>",
917
+ "lstrip": false,
918
+ "normalized": false,
919
+ "rstrip": false,
920
+ "single_word": false,
921
+ "special": true
922
+ },
923
+ "128115": {
924
+ "content": "<|reserved_special_token_107|>",
925
+ "lstrip": false,
926
+ "normalized": false,
927
+ "rstrip": false,
928
+ "single_word": false,
929
+ "special": true
930
+ },
931
+ "128116": {
932
+ "content": "<|reserved_special_token_108|>",
933
+ "lstrip": false,
934
+ "normalized": false,
935
+ "rstrip": false,
936
+ "single_word": false,
937
+ "special": true
938
+ },
939
+ "128117": {
940
+ "content": "<|reserved_special_token_109|>",
941
+ "lstrip": false,
942
+ "normalized": false,
943
+ "rstrip": false,
944
+ "single_word": false,
945
+ "special": true
946
+ },
947
+ "128118": {
948
+ "content": "<|reserved_special_token_110|>",
949
+ "lstrip": false,
950
+ "normalized": false,
951
+ "rstrip": false,
952
+ "single_word": false,
953
+ "special": true
954
+ },
955
+ "128119": {
956
+ "content": "<|reserved_special_token_111|>",
957
+ "lstrip": false,
958
+ "normalized": false,
959
+ "rstrip": false,
960
+ "single_word": false,
961
+ "special": true
962
+ },
963
+ "128120": {
964
+ "content": "<|reserved_special_token_112|>",
965
+ "lstrip": false,
966
+ "normalized": false,
967
+ "rstrip": false,
968
+ "single_word": false,
969
+ "special": true
970
+ },
971
+ "128121": {
972
+ "content": "<|reserved_special_token_113|>",
973
+ "lstrip": false,
974
+ "normalized": false,
975
+ "rstrip": false,
976
+ "single_word": false,
977
+ "special": true
978
+ },
979
+ "128122": {
980
+ "content": "<|reserved_special_token_114|>",
981
+ "lstrip": false,
982
+ "normalized": false,
983
+ "rstrip": false,
984
+ "single_word": false,
985
+ "special": true
986
+ },
987
+ "128123": {
988
+ "content": "<|reserved_special_token_115|>",
989
+ "lstrip": false,
990
+ "normalized": false,
991
+ "rstrip": false,
992
+ "single_word": false,
993
+ "special": true
994
+ },
995
+ "128124": {
996
+ "content": "<|reserved_special_token_116|>",
997
+ "lstrip": false,
998
+ "normalized": false,
999
+ "rstrip": false,
1000
+ "single_word": false,
1001
+ "special": true
1002
+ },
1003
+ "128125": {
1004
+ "content": "<|reserved_special_token_117|>",
1005
+ "lstrip": false,
1006
+ "normalized": false,
1007
+ "rstrip": false,
1008
+ "single_word": false,
1009
+ "special": true
1010
+ },
1011
+ "128126": {
1012
+ "content": "<|reserved_special_token_118|>",
1013
+ "lstrip": false,
1014
+ "normalized": false,
1015
+ "rstrip": false,
1016
+ "single_word": false,
1017
+ "special": true
1018
+ },
1019
+ "128127": {
1020
+ "content": "<|reserved_special_token_119|>",
1021
+ "lstrip": false,
1022
+ "normalized": false,
1023
+ "rstrip": false,
1024
+ "single_word": false,
1025
+ "special": true
1026
+ },
1027
+ "128128": {
1028
+ "content": "<|reserved_special_token_120|>",
1029
+ "lstrip": false,
1030
+ "normalized": false,
1031
+ "rstrip": false,
1032
+ "single_word": false,
1033
+ "special": true
1034
+ },
1035
+ "128129": {
1036
+ "content": "<|reserved_special_token_121|>",
1037
+ "lstrip": false,
1038
+ "normalized": false,
1039
+ "rstrip": false,
1040
+ "single_word": false,
1041
+ "special": true
1042
+ },
1043
+ "128130": {
1044
+ "content": "<|reserved_special_token_122|>",
1045
+ "lstrip": false,
1046
+ "normalized": false,
1047
+ "rstrip": false,
1048
+ "single_word": false,
1049
+ "special": true
1050
+ },
1051
+ "128131": {
1052
+ "content": "<|reserved_special_token_123|>",
1053
+ "lstrip": false,
1054
+ "normalized": false,
1055
+ "rstrip": false,
1056
+ "single_word": false,
1057
+ "special": true
1058
+ },
1059
+ "128132": {
1060
+ "content": "<|reserved_special_token_124|>",
1061
+ "lstrip": false,
1062
+ "normalized": false,
1063
+ "rstrip": false,
1064
+ "single_word": false,
1065
+ "special": true
1066
+ },
1067
+ "128133": {
1068
+ "content": "<|reserved_special_token_125|>",
1069
+ "lstrip": false,
1070
+ "normalized": false,
1071
+ "rstrip": false,
1072
+ "single_word": false,
1073
+ "special": true
1074
+ },
1075
+ "128134": {
1076
+ "content": "<|reserved_special_token_126|>",
1077
+ "lstrip": false,
1078
+ "normalized": false,
1079
+ "rstrip": false,
1080
+ "single_word": false,
1081
+ "special": true
1082
+ },
1083
+ "128135": {
1084
+ "content": "<|reserved_special_token_127|>",
1085
+ "lstrip": false,
1086
+ "normalized": false,
1087
+ "rstrip": false,
1088
+ "single_word": false,
1089
+ "special": true
1090
+ },
1091
+ "128136": {
1092
+ "content": "<|reserved_special_token_128|>",
1093
+ "lstrip": false,
1094
+ "normalized": false,
1095
+ "rstrip": false,
1096
+ "single_word": false,
1097
+ "special": true
1098
+ },
1099
+ "128137": {
1100
+ "content": "<|reserved_special_token_129|>",
1101
+ "lstrip": false,
1102
+ "normalized": false,
1103
+ "rstrip": false,
1104
+ "single_word": false,
1105
+ "special": true
1106
+ },
1107
+ "128138": {
1108
+ "content": "<|reserved_special_token_130|>",
1109
+ "lstrip": false,
1110
+ "normalized": false,
1111
+ "rstrip": false,
1112
+ "single_word": false,
1113
+ "special": true
1114
+ },
1115
+ "128139": {
1116
+ "content": "<|reserved_special_token_131|>",
1117
+ "lstrip": false,
1118
+ "normalized": false,
1119
+ "rstrip": false,
1120
+ "single_word": false,
1121
+ "special": true
1122
+ },
1123
+ "128140": {
1124
+ "content": "<|reserved_special_token_132|>",
1125
+ "lstrip": false,
1126
+ "normalized": false,
1127
+ "rstrip": false,
1128
+ "single_word": false,
1129
+ "special": true
1130
+ },
1131
+ "128141": {
1132
+ "content": "<|reserved_special_token_133|>",
1133
+ "lstrip": false,
1134
+ "normalized": false,
1135
+ "rstrip": false,
1136
+ "single_word": false,
1137
+ "special": true
1138
+ },
1139
+ "128142": {
1140
+ "content": "<|reserved_special_token_134|>",
1141
+ "lstrip": false,
1142
+ "normalized": false,
1143
+ "rstrip": false,
1144
+ "single_word": false,
1145
+ "special": true
1146
+ },
1147
+ "128143": {
1148
+ "content": "<|reserved_special_token_135|>",
1149
+ "lstrip": false,
1150
+ "normalized": false,
1151
+ "rstrip": false,
1152
+ "single_word": false,
1153
+ "special": true
1154
+ },
1155
+ "128144": {
1156
+ "content": "<|reserved_special_token_136|>",
1157
+ "lstrip": false,
1158
+ "normalized": false,
1159
+ "rstrip": false,
1160
+ "single_word": false,
1161
+ "special": true
1162
+ },
1163
+ "128145": {
1164
+ "content": "<|reserved_special_token_137|>",
1165
+ "lstrip": false,
1166
+ "normalized": false,
1167
+ "rstrip": false,
1168
+ "single_word": false,
1169
+ "special": true
1170
+ },
1171
+ "128146": {
1172
+ "content": "<|reserved_special_token_138|>",
1173
+ "lstrip": false,
1174
+ "normalized": false,
1175
+ "rstrip": false,
1176
+ "single_word": false,
1177
+ "special": true
1178
+ },
1179
+ "128147": {
1180
+ "content": "<|reserved_special_token_139|>",
1181
+ "lstrip": false,
1182
+ "normalized": false,
1183
+ "rstrip": false,
1184
+ "single_word": false,
1185
+ "special": true
1186
+ },
1187
+ "128148": {
1188
+ "content": "<|reserved_special_token_140|>",
1189
+ "lstrip": false,
1190
+ "normalized": false,
1191
+ "rstrip": false,
1192
+ "single_word": false,
1193
+ "special": true
1194
+ },
1195
+ "128149": {
1196
+ "content": "<|reserved_special_token_141|>",
1197
+ "lstrip": false,
1198
+ "normalized": false,
1199
+ "rstrip": false,
1200
+ "single_word": false,
1201
+ "special": true
1202
+ },
1203
+ "128150": {
1204
+ "content": "<|reserved_special_token_142|>",
1205
+ "lstrip": false,
1206
+ "normalized": false,
1207
+ "rstrip": false,
1208
+ "single_word": false,
1209
+ "special": true
1210
+ },
1211
+ "128151": {
1212
+ "content": "<|reserved_special_token_143|>",
1213
+ "lstrip": false,
1214
+ "normalized": false,
1215
+ "rstrip": false,
1216
+ "single_word": false,
1217
+ "special": true
1218
+ },
1219
+ "128152": {
1220
+ "content": "<|reserved_special_token_144|>",
1221
+ "lstrip": false,
1222
+ "normalized": false,
1223
+ "rstrip": false,
1224
+ "single_word": false,
1225
+ "special": true
1226
+ },
1227
+ "128153": {
1228
+ "content": "<|reserved_special_token_145|>",
1229
+ "lstrip": false,
1230
+ "normalized": false,
1231
+ "rstrip": false,
1232
+ "single_word": false,
1233
+ "special": true
1234
+ },
1235
+ "128154": {
1236
+ "content": "<|reserved_special_token_146|>",
1237
+ "lstrip": false,
1238
+ "normalized": false,
1239
+ "rstrip": false,
1240
+ "single_word": false,
1241
+ "special": true
1242
+ },
1243
+ "128155": {
1244
+ "content": "<|reserved_special_token_147|>",
1245
+ "lstrip": false,
1246
+ "normalized": false,
1247
+ "rstrip": false,
1248
+ "single_word": false,
1249
+ "special": true
1250
+ },
1251
+ "128156": {
1252
+ "content": "<|reserved_special_token_148|>",
1253
+ "lstrip": false,
1254
+ "normalized": false,
1255
+ "rstrip": false,
1256
+ "single_word": false,
1257
+ "special": true
1258
+ },
1259
+ "128157": {
1260
+ "content": "<|reserved_special_token_149|>",
1261
+ "lstrip": false,
1262
+ "normalized": false,
1263
+ "rstrip": false,
1264
+ "single_word": false,
1265
+ "special": true
1266
+ },
1267
+ "128158": {
1268
+ "content": "<|reserved_special_token_150|>",
1269
+ "lstrip": false,
1270
+ "normalized": false,
1271
+ "rstrip": false,
1272
+ "single_word": false,
1273
+ "special": true
1274
+ },
1275
+ "128159": {
1276
+ "content": "<|reserved_special_token_151|>",
1277
+ "lstrip": false,
1278
+ "normalized": false,
1279
+ "rstrip": false,
1280
+ "single_word": false,
1281
+ "special": true
1282
+ },
1283
+ "128160": {
1284
+ "content": "<|reserved_special_token_152|>",
1285
+ "lstrip": false,
1286
+ "normalized": false,
1287
+ "rstrip": false,
1288
+ "single_word": false,
1289
+ "special": true
1290
+ },
1291
+ "128161": {
1292
+ "content": "<|reserved_special_token_153|>",
1293
+ "lstrip": false,
1294
+ "normalized": false,
1295
+ "rstrip": false,
1296
+ "single_word": false,
1297
+ "special": true
1298
+ },
1299
+ "128162": {
1300
+ "content": "<|reserved_special_token_154|>",
1301
+ "lstrip": false,
1302
+ "normalized": false,
1303
+ "rstrip": false,
1304
+ "single_word": false,
1305
+ "special": true
1306
+ },
1307
+ "128163": {
1308
+ "content": "<|reserved_special_token_155|>",
1309
+ "lstrip": false,
1310
+ "normalized": false,
1311
+ "rstrip": false,
1312
+ "single_word": false,
1313
+ "special": true
1314
+ },
1315
+ "128164": {
1316
+ "content": "<|reserved_special_token_156|>",
1317
+ "lstrip": false,
1318
+ "normalized": false,
1319
+ "rstrip": false,
1320
+ "single_word": false,
1321
+ "special": true
1322
+ },
1323
+ "128165": {
1324
+ "content": "<|reserved_special_token_157|>",
1325
+ "lstrip": false,
1326
+ "normalized": false,
1327
+ "rstrip": false,
1328
+ "single_word": false,
1329
+ "special": true
1330
+ },
1331
+ "128166": {
1332
+ "content": "<|reserved_special_token_158|>",
1333
+ "lstrip": false,
1334
+ "normalized": false,
1335
+ "rstrip": false,
1336
+ "single_word": false,
1337
+ "special": true
1338
+ },
1339
+ "128167": {
1340
+ "content": "<|reserved_special_token_159|>",
1341
+ "lstrip": false,
1342
+ "normalized": false,
1343
+ "rstrip": false,
1344
+ "single_word": false,
1345
+ "special": true
1346
+ },
1347
+ "128168": {
1348
+ "content": "<|reserved_special_token_160|>",
1349
+ "lstrip": false,
1350
+ "normalized": false,
1351
+ "rstrip": false,
1352
+ "single_word": false,
1353
+ "special": true
1354
+ },
1355
+ "128169": {
1356
+ "content": "<|reserved_special_token_161|>",
1357
+ "lstrip": false,
1358
+ "normalized": false,
1359
+ "rstrip": false,
1360
+ "single_word": false,
1361
+ "special": true
1362
+ },
1363
+ "128170": {
1364
+ "content": "<|reserved_special_token_162|>",
1365
+ "lstrip": false,
1366
+ "normalized": false,
1367
+ "rstrip": false,
1368
+ "single_word": false,
1369
+ "special": true
1370
+ },
1371
+ "128171": {
1372
+ "content": "<|reserved_special_token_163|>",
1373
+ "lstrip": false,
1374
+ "normalized": false,
1375
+ "rstrip": false,
1376
+ "single_word": false,
1377
+ "special": true
1378
+ },
1379
+ "128172": {
1380
+ "content": "<|reserved_special_token_164|>",
1381
+ "lstrip": false,
1382
+ "normalized": false,
1383
+ "rstrip": false,
1384
+ "single_word": false,
1385
+ "special": true
1386
+ },
1387
+ "128173": {
1388
+ "content": "<|reserved_special_token_165|>",
1389
+ "lstrip": false,
1390
+ "normalized": false,
1391
+ "rstrip": false,
1392
+ "single_word": false,
1393
+ "special": true
1394
+ },
1395
+ "128174": {
1396
+ "content": "<|reserved_special_token_166|>",
1397
+ "lstrip": false,
1398
+ "normalized": false,
1399
+ "rstrip": false,
1400
+ "single_word": false,
1401
+ "special": true
1402
+ },
1403
+ "128175": {
1404
+ "content": "<|reserved_special_token_167|>",
1405
+ "lstrip": false,
1406
+ "normalized": false,
1407
+ "rstrip": false,
1408
+ "single_word": false,
1409
+ "special": true
1410
+ },
1411
+ "128176": {
1412
+ "content": "<|reserved_special_token_168|>",
1413
+ "lstrip": false,
1414
+ "normalized": false,
1415
+ "rstrip": false,
1416
+ "single_word": false,
1417
+ "special": true
1418
+ },
1419
+ "128177": {
1420
+ "content": "<|reserved_special_token_169|>",
1421
+ "lstrip": false,
1422
+ "normalized": false,
1423
+ "rstrip": false,
1424
+ "single_word": false,
1425
+ "special": true
1426
+ },
1427
+ "128178": {
1428
+ "content": "<|reserved_special_token_170|>",
1429
+ "lstrip": false,
1430
+ "normalized": false,
1431
+ "rstrip": false,
1432
+ "single_word": false,
1433
+ "special": true
1434
+ },
1435
+ "128179": {
1436
+ "content": "<|reserved_special_token_171|>",
1437
+ "lstrip": false,
1438
+ "normalized": false,
1439
+ "rstrip": false,
1440
+ "single_word": false,
1441
+ "special": true
1442
+ },
1443
+ "128180": {
1444
+ "content": "<|reserved_special_token_172|>",
1445
+ "lstrip": false,
1446
+ "normalized": false,
1447
+ "rstrip": false,
1448
+ "single_word": false,
1449
+ "special": true
1450
+ },
1451
+ "128181": {
1452
+ "content": "<|reserved_special_token_173|>",
1453
+ "lstrip": false,
1454
+ "normalized": false,
1455
+ "rstrip": false,
1456
+ "single_word": false,
1457
+ "special": true
1458
+ },
1459
+ "128182": {
1460
+ "content": "<|reserved_special_token_174|>",
1461
+ "lstrip": false,
1462
+ "normalized": false,
1463
+ "rstrip": false,
1464
+ "single_word": false,
1465
+ "special": true
1466
+ },
1467
+ "128183": {
1468
+ "content": "<|reserved_special_token_175|>",
1469
+ "lstrip": false,
1470
+ "normalized": false,
1471
+ "rstrip": false,
1472
+ "single_word": false,
1473
+ "special": true
1474
+ },
1475
+ "128184": {
1476
+ "content": "<|reserved_special_token_176|>",
1477
+ "lstrip": false,
1478
+ "normalized": false,
1479
+ "rstrip": false,
1480
+ "single_word": false,
1481
+ "special": true
1482
+ },
1483
+ "128185": {
1484
+ "content": "<|reserved_special_token_177|>",
1485
+ "lstrip": false,
1486
+ "normalized": false,
1487
+ "rstrip": false,
1488
+ "single_word": false,
1489
+ "special": true
1490
+ },
1491
+ "128186": {
1492
+ "content": "<|reserved_special_token_178|>",
1493
+ "lstrip": false,
1494
+ "normalized": false,
1495
+ "rstrip": false,
1496
+ "single_word": false,
1497
+ "special": true
1498
+ },
1499
+ "128187": {
1500
+ "content": "<|reserved_special_token_179|>",
1501
+ "lstrip": false,
1502
+ "normalized": false,
1503
+ "rstrip": false,
1504
+ "single_word": false,
1505
+ "special": true
1506
+ },
1507
+ "128188": {
1508
+ "content": "<|reserved_special_token_180|>",
1509
+ "lstrip": false,
1510
+ "normalized": false,
1511
+ "rstrip": false,
1512
+ "single_word": false,
1513
+ "special": true
1514
+ },
1515
+ "128189": {
1516
+ "content": "<|reserved_special_token_181|>",
1517
+ "lstrip": false,
1518
+ "normalized": false,
1519
+ "rstrip": false,
1520
+ "single_word": false,
1521
+ "special": true
1522
+ },
1523
+ "128190": {
1524
+ "content": "<|reserved_special_token_182|>",
1525
+ "lstrip": false,
1526
+ "normalized": false,
1527
+ "rstrip": false,
1528
+ "single_word": false,
1529
+ "special": true
1530
+ },
1531
+ "128191": {
1532
+ "content": "<|reserved_special_token_183|>",
1533
+ "lstrip": false,
1534
+ "normalized": false,
1535
+ "rstrip": false,
1536
+ "single_word": false,
1537
+ "special": true
1538
+ },
1539
+ "128192": {
1540
+ "content": "<|reserved_special_token_184|>",
1541
+ "lstrip": false,
1542
+ "normalized": false,
1543
+ "rstrip": false,
1544
+ "single_word": false,
1545
+ "special": true
1546
+ },
1547
+ "128193": {
1548
+ "content": "<|reserved_special_token_185|>",
1549
+ "lstrip": false,
1550
+ "normalized": false,
1551
+ "rstrip": false,
1552
+ "single_word": false,
1553
+ "special": true
1554
+ },
1555
+ "128194": {
1556
+ "content": "<|reserved_special_token_186|>",
1557
+ "lstrip": false,
1558
+ "normalized": false,
1559
+ "rstrip": false,
1560
+ "single_word": false,
1561
+ "special": true
1562
+ },
1563
+ "128195": {
1564
+ "content": "<|reserved_special_token_187|>",
1565
+ "lstrip": false,
1566
+ "normalized": false,
1567
+ "rstrip": false,
1568
+ "single_word": false,
1569
+ "special": true
1570
+ },
1571
+ "128196": {
1572
+ "content": "<|reserved_special_token_188|>",
1573
+ "lstrip": false,
1574
+ "normalized": false,
1575
+ "rstrip": false,
1576
+ "single_word": false,
1577
+ "special": true
1578
+ },
1579
+ "128197": {
1580
+ "content": "<|reserved_special_token_189|>",
1581
+ "lstrip": false,
1582
+ "normalized": false,
1583
+ "rstrip": false,
1584
+ "single_word": false,
1585
+ "special": true
1586
+ },
1587
+ "128198": {
1588
+ "content": "<|reserved_special_token_190|>",
1589
+ "lstrip": false,
1590
+ "normalized": false,
1591
+ "rstrip": false,
1592
+ "single_word": false,
1593
+ "special": true
1594
+ },
1595
+ "128199": {
1596
+ "content": "<|reserved_special_token_191|>",
1597
+ "lstrip": false,
1598
+ "normalized": false,
1599
+ "rstrip": false,
1600
+ "single_word": false,
1601
+ "special": true
1602
+ },
1603
+ "128200": {
1604
+ "content": "<|reserved_special_token_192|>",
1605
+ "lstrip": false,
1606
+ "normalized": false,
1607
+ "rstrip": false,
1608
+ "single_word": false,
1609
+ "special": true
1610
+ },
1611
+ "128201": {
1612
+ "content": "<|reserved_special_token_193|>",
1613
+ "lstrip": false,
1614
+ "normalized": false,
1615
+ "rstrip": false,
1616
+ "single_word": false,
1617
+ "special": true
1618
+ },
1619
+ "128202": {
1620
+ "content": "<|reserved_special_token_194|>",
1621
+ "lstrip": false,
1622
+ "normalized": false,
1623
+ "rstrip": false,
1624
+ "single_word": false,
1625
+ "special": true
1626
+ },
1627
+ "128203": {
1628
+ "content": "<|reserved_special_token_195|>",
1629
+ "lstrip": false,
1630
+ "normalized": false,
1631
+ "rstrip": false,
1632
+ "single_word": false,
1633
+ "special": true
1634
+ },
1635
+ "128204": {
1636
+ "content": "<|reserved_special_token_196|>",
1637
+ "lstrip": false,
1638
+ "normalized": false,
1639
+ "rstrip": false,
1640
+ "single_word": false,
1641
+ "special": true
1642
+ },
1643
+ "128205": {
1644
+ "content": "<|reserved_special_token_197|>",
1645
+ "lstrip": false,
1646
+ "normalized": false,
1647
+ "rstrip": false,
1648
+ "single_word": false,
1649
+ "special": true
1650
+ },
1651
+ "128206": {
1652
+ "content": "<|reserved_special_token_198|>",
1653
+ "lstrip": false,
1654
+ "normalized": false,
1655
+ "rstrip": false,
1656
+ "single_word": false,
1657
+ "special": true
1658
+ },
1659
+ "128207": {
1660
+ "content": "<|reserved_special_token_199|>",
1661
+ "lstrip": false,
1662
+ "normalized": false,
1663
+ "rstrip": false,
1664
+ "single_word": false,
1665
+ "special": true
1666
+ },
1667
+ "128208": {
1668
+ "content": "<|reserved_special_token_200|>",
1669
+ "lstrip": false,
1670
+ "normalized": false,
1671
+ "rstrip": false,
1672
+ "single_word": false,
1673
+ "special": true
1674
+ },
1675
+ "128209": {
1676
+ "content": "<|reserved_special_token_201|>",
1677
+ "lstrip": false,
1678
+ "normalized": false,
1679
+ "rstrip": false,
1680
+ "single_word": false,
1681
+ "special": true
1682
+ },
1683
+ "128210": {
1684
+ "content": "<|reserved_special_token_202|>",
1685
+ "lstrip": false,
1686
+ "normalized": false,
1687
+ "rstrip": false,
1688
+ "single_word": false,
1689
+ "special": true
1690
+ },
1691
+ "128211": {
1692
+ "content": "<|reserved_special_token_203|>",
1693
+ "lstrip": false,
1694
+ "normalized": false,
1695
+ "rstrip": false,
1696
+ "single_word": false,
1697
+ "special": true
1698
+ },
1699
+ "128212": {
1700
+ "content": "<|reserved_special_token_204|>",
1701
+ "lstrip": false,
1702
+ "normalized": false,
1703
+ "rstrip": false,
1704
+ "single_word": false,
1705
+ "special": true
1706
+ },
1707
+ "128213": {
1708
+ "content": "<|reserved_special_token_205|>",
1709
+ "lstrip": false,
1710
+ "normalized": false,
1711
+ "rstrip": false,
1712
+ "single_word": false,
1713
+ "special": true
1714
+ },
1715
+ "128214": {
1716
+ "content": "<|reserved_special_token_206|>",
1717
+ "lstrip": false,
1718
+ "normalized": false,
1719
+ "rstrip": false,
1720
+ "single_word": false,
1721
+ "special": true
1722
+ },
1723
+ "128215": {
1724
+ "content": "<|reserved_special_token_207|>",
1725
+ "lstrip": false,
1726
+ "normalized": false,
1727
+ "rstrip": false,
1728
+ "single_word": false,
1729
+ "special": true
1730
+ },
1731
+ "128216": {
1732
+ "content": "<|reserved_special_token_208|>",
1733
+ "lstrip": false,
1734
+ "normalized": false,
1735
+ "rstrip": false,
1736
+ "single_word": false,
1737
+ "special": true
1738
+ },
1739
+ "128217": {
1740
+ "content": "<|reserved_special_token_209|>",
1741
+ "lstrip": false,
1742
+ "normalized": false,
1743
+ "rstrip": false,
1744
+ "single_word": false,
1745
+ "special": true
1746
+ },
1747
+ "128218": {
1748
+ "content": "<|reserved_special_token_210|>",
1749
+ "lstrip": false,
1750
+ "normalized": false,
1751
+ "rstrip": false,
1752
+ "single_word": false,
1753
+ "special": true
1754
+ },
1755
+ "128219": {
1756
+ "content": "<|reserved_special_token_211|>",
1757
+ "lstrip": false,
1758
+ "normalized": false,
1759
+ "rstrip": false,
1760
+ "single_word": false,
1761
+ "special": true
1762
+ },
1763
+ "128220": {
1764
+ "content": "<|reserved_special_token_212|>",
1765
+ "lstrip": false,
1766
+ "normalized": false,
1767
+ "rstrip": false,
1768
+ "single_word": false,
1769
+ "special": true
1770
+ },
1771
+ "128221": {
1772
+ "content": "<|reserved_special_token_213|>",
1773
+ "lstrip": false,
1774
+ "normalized": false,
1775
+ "rstrip": false,
1776
+ "single_word": false,
1777
+ "special": true
1778
+ },
1779
+ "128222": {
1780
+ "content": "<|reserved_special_token_214|>",
1781
+ "lstrip": false,
1782
+ "normalized": false,
1783
+ "rstrip": false,
1784
+ "single_word": false,
1785
+ "special": true
1786
+ },
1787
+ "128223": {
1788
+ "content": "<|reserved_special_token_215|>",
1789
+ "lstrip": false,
1790
+ "normalized": false,
1791
+ "rstrip": false,
1792
+ "single_word": false,
1793
+ "special": true
1794
+ },
1795
+ "128224": {
1796
+ "content": "<|reserved_special_token_216|>",
1797
+ "lstrip": false,
1798
+ "normalized": false,
1799
+ "rstrip": false,
1800
+ "single_word": false,
1801
+ "special": true
1802
+ },
1803
+ "128225": {
1804
+ "content": "<|reserved_special_token_217|>",
1805
+ "lstrip": false,
1806
+ "normalized": false,
1807
+ "rstrip": false,
1808
+ "single_word": false,
1809
+ "special": true
1810
+ },
1811
+ "128226": {
1812
+ "content": "<|reserved_special_token_218|>",
1813
+ "lstrip": false,
1814
+ "normalized": false,
1815
+ "rstrip": false,
1816
+ "single_word": false,
1817
+ "special": true
1818
+ },
1819
+ "128227": {
1820
+ "content": "<|reserved_special_token_219|>",
1821
+ "lstrip": false,
1822
+ "normalized": false,
1823
+ "rstrip": false,
1824
+ "single_word": false,
1825
+ "special": true
1826
+ },
1827
+ "128228": {
1828
+ "content": "<|reserved_special_token_220|>",
1829
+ "lstrip": false,
1830
+ "normalized": false,
1831
+ "rstrip": false,
1832
+ "single_word": false,
1833
+ "special": true
1834
+ },
1835
+ "128229": {
1836
+ "content": "<|reserved_special_token_221|>",
1837
+ "lstrip": false,
1838
+ "normalized": false,
1839
+ "rstrip": false,
1840
+ "single_word": false,
1841
+ "special": true
1842
+ },
1843
+ "128230": {
1844
+ "content": "<|reserved_special_token_222|>",
1845
+ "lstrip": false,
1846
+ "normalized": false,
1847
+ "rstrip": false,
1848
+ "single_word": false,
1849
+ "special": true
1850
+ },
1851
+ "128231": {
1852
+ "content": "<|reserved_special_token_223|>",
1853
+ "lstrip": false,
1854
+ "normalized": false,
1855
+ "rstrip": false,
1856
+ "single_word": false,
1857
+ "special": true
1858
+ },
1859
+ "128232": {
1860
+ "content": "<|reserved_special_token_224|>",
1861
+ "lstrip": false,
1862
+ "normalized": false,
1863
+ "rstrip": false,
1864
+ "single_word": false,
1865
+ "special": true
1866
+ },
1867
+ "128233": {
1868
+ "content": "<|reserved_special_token_225|>",
1869
+ "lstrip": false,
1870
+ "normalized": false,
1871
+ "rstrip": false,
1872
+ "single_word": false,
1873
+ "special": true
1874
+ },
1875
+ "128234": {
1876
+ "content": "<|reserved_special_token_226|>",
1877
+ "lstrip": false,
1878
+ "normalized": false,
1879
+ "rstrip": false,
1880
+ "single_word": false,
1881
+ "special": true
1882
+ },
1883
+ "128235": {
1884
+ "content": "<|reserved_special_token_227|>",
1885
+ "lstrip": false,
1886
+ "normalized": false,
1887
+ "rstrip": false,
1888
+ "single_word": false,
1889
+ "special": true
1890
+ },
1891
+ "128236": {
1892
+ "content": "<|reserved_special_token_228|>",
1893
+ "lstrip": false,
1894
+ "normalized": false,
1895
+ "rstrip": false,
1896
+ "single_word": false,
1897
+ "special": true
1898
+ },
1899
+ "128237": {
1900
+ "content": "<|reserved_special_token_229|>",
1901
+ "lstrip": false,
1902
+ "normalized": false,
1903
+ "rstrip": false,
1904
+ "single_word": false,
1905
+ "special": true
1906
+ },
1907
+ "128238": {
1908
+ "content": "<|reserved_special_token_230|>",
1909
+ "lstrip": false,
1910
+ "normalized": false,
1911
+ "rstrip": false,
1912
+ "single_word": false,
1913
+ "special": true
1914
+ },
1915
+ "128239": {
1916
+ "content": "<|reserved_special_token_231|>",
1917
+ "lstrip": false,
1918
+ "normalized": false,
1919
+ "rstrip": false,
1920
+ "single_word": false,
1921
+ "special": true
1922
+ },
1923
+ "128240": {
1924
+ "content": "<|reserved_special_token_232|>",
1925
+ "lstrip": false,
1926
+ "normalized": false,
1927
+ "rstrip": false,
1928
+ "single_word": false,
1929
+ "special": true
1930
+ },
1931
+ "128241": {
1932
+ "content": "<|reserved_special_token_233|>",
1933
+ "lstrip": false,
1934
+ "normalized": false,
1935
+ "rstrip": false,
1936
+ "single_word": false,
1937
+ "special": true
1938
+ },
1939
+ "128242": {
1940
+ "content": "<|reserved_special_token_234|>",
1941
+ "lstrip": false,
1942
+ "normalized": false,
1943
+ "rstrip": false,
1944
+ "single_word": false,
1945
+ "special": true
1946
+ },
1947
+ "128243": {
1948
+ "content": "<|reserved_special_token_235|>",
1949
+ "lstrip": false,
1950
+ "normalized": false,
1951
+ "rstrip": false,
1952
+ "single_word": false,
1953
+ "special": true
1954
+ },
1955
+ "128244": {
1956
+ "content": "<|reserved_special_token_236|>",
1957
+ "lstrip": false,
1958
+ "normalized": false,
1959
+ "rstrip": false,
1960
+ "single_word": false,
1961
+ "special": true
1962
+ },
1963
+ "128245": {
1964
+ "content": "<|reserved_special_token_237|>",
1965
+ "lstrip": false,
1966
+ "normalized": false,
1967
+ "rstrip": false,
1968
+ "single_word": false,
1969
+ "special": true
1970
+ },
1971
+ "128246": {
1972
+ "content": "<|reserved_special_token_238|>",
1973
+ "lstrip": false,
1974
+ "normalized": false,
1975
+ "rstrip": false,
1976
+ "single_word": false,
1977
+ "special": true
1978
+ },
1979
+ "128247": {
1980
+ "content": "<|reserved_special_token_239|>",
1981
+ "lstrip": false,
1982
+ "normalized": false,
1983
+ "rstrip": false,
1984
+ "single_word": false,
1985
+ "special": true
1986
+ },
1987
+ "128248": {
1988
+ "content": "<|reserved_special_token_240|>",
1989
+ "lstrip": false,
1990
+ "normalized": false,
1991
+ "rstrip": false,
1992
+ "single_word": false,
1993
+ "special": true
1994
+ },
1995
+ "128249": {
1996
+ "content": "<|reserved_special_token_241|>",
1997
+ "lstrip": false,
1998
+ "normalized": false,
1999
+ "rstrip": false,
2000
+ "single_word": false,
2001
+ "special": true
2002
+ },
2003
+ "128250": {
2004
+ "content": "<|reserved_special_token_242|>",
2005
+ "lstrip": false,
2006
+ "normalized": false,
2007
+ "rstrip": false,
2008
+ "single_word": false,
2009
+ "special": true
2010
+ },
2011
+ "128251": {
2012
+ "content": "<|reserved_special_token_243|>",
2013
+ "lstrip": false,
2014
+ "normalized": false,
2015
+ "rstrip": false,
2016
+ "single_word": false,
2017
+ "special": true
2018
+ },
2019
+ "128252": {
2020
+ "content": "<|reserved_special_token_244|>",
2021
+ "lstrip": false,
2022
+ "normalized": false,
2023
+ "rstrip": false,
2024
+ "single_word": false,
2025
+ "special": true
2026
+ },
2027
+ "128253": {
2028
+ "content": "<|reserved_special_token_245|>",
2029
+ "lstrip": false,
2030
+ "normalized": false,
2031
+ "rstrip": false,
2032
+ "single_word": false,
2033
+ "special": true
2034
+ },
2035
+ "128254": {
2036
+ "content": "<|reserved_special_token_246|>",
2037
+ "lstrip": false,
2038
+ "normalized": false,
2039
+ "rstrip": false,
2040
+ "single_word": false,
2041
+ "special": true
2042
+ },
2043
+ "128255": {
2044
+ "content": "<|reserved_special_token_247|>",
2045
+ "lstrip": false,
2046
+ "normalized": false,
2047
+ "rstrip": false,
2048
+ "single_word": false,
2049
+ "special": true
2050
+ }
2051
+ },
2052
+ "bos_token": "<|begin_of_text|>",
2053
+ "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 July 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\n\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\n\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\n\" }}\n{{- \"Today Date: \" + date_string + \"\n\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\n\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\n\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content'] %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\n\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\n\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\n\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\n\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}\n{%- endif %}\n",
2054
+ "clean_up_tokenization_spaces": true,
2055
+ "eos_token": "<|eot_id|>",
2056
+ "model_input_names": [
2057
+ "input_ids",
2058
+ "attention_mask"
2059
+ ],
2060
+ "model_max_length": 131072,
2061
+ "pad_token": "<|finetune_right_pad_id|>",
2062
+ "padding_side": "right",
2063
+ "tokenizer_class": "PreTrainedTokenizerFast"
2064
+ }
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ huggingface_hub==0.23.4
2
+ accelerate==1.0.0
3
+ torch
4
+ transformers==4.44.0
5
+ sentencepiece
6
+ peft==0.12.0
7
+ torchvision
8
+ protobuf
9
+ PyQt5>=5.15.4
10
+ bitsandbytes
11
+ pillow
text_model/adapter_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B-Instruct",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 64,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "q_proj",
24
+ "v_proj",
25
+ "gate_proj",
26
+ "down_proj",
27
+ "o_proj",
28
+ "k_proj",
29
+ "up_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "use_dora": false,
33
+ "use_rslora": false
34
+ }