Integrate with Sentence Transformers v5.4

#7
by tomaarsen HF Staff - opened
Files changed (2) hide show
  1. config_sentence_transformers.json +1 -4
  2. custom_st.py +12 -7
config_sentence_transformers.json CHANGED
@@ -4,10 +4,7 @@
4
  "transformers": "4.46.2",
5
  "pytorch": "2.2.2"
6
  },
7
- "prompts":{
8
- "image": "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n<|vision_start|><|image_pad|><|vision_end|>What is shown in this image?<|im_end|>\n<|endoftext|>",
9
- "query": "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n<|vision_start|><|image_pad|><|vision_end|>Query: %s<|im_end|>\n<|endoftext|>"
10
- },
11
  "default_prompt_name": null,
12
  "similarity_fn_name": "cosine"
13
  }
 
4
  "transformers": "4.46.2",
5
  "pytorch": "2.2.2"
6
  },
7
+ "prompts": {},
 
 
 
8
  "default_prompt_name": null,
9
  "similarity_fn_name": "cosine"
10
  }
custom_st.py CHANGED
@@ -42,22 +42,23 @@ class Transformer(nn.Module):
42
  self.max_pixels = max_pixels
43
  self.min_pixels = min_pixels
44
  self.max_seq_length = max_seq_length
45
-
46
  # Handle args
47
  model_kwargs = model_args or {}
48
- model_kwargs.update(kwargs)
49
-
50
  processor_kwargs = processor_args or {}
 
 
 
 
 
51
  processor_kwargs.update({
52
  'min_pixels': min_pixels,
53
  'max_pixels': max_pixels,
54
- 'cache_dir': cache_dir
55
  })
56
 
57
  # Initialize model
58
  self.model = Qwen2VLForConditionalGeneration.from_pretrained(
59
  model_name_or_path,
60
- cache_dir=cache_dir,
61
  **model_kwargs
62
  ).eval()
63
 
@@ -271,7 +272,7 @@ class Transformer(nn.Module):
271
  )
272
  return features
273
 
274
- def tokenize(self, texts: List[Union[str, Image.Image, bytes]], padding: str = 'longest') -> Dict[str, torch.Tensor]:
275
  processed_texts, processed_images = self._process_input(texts)
276
 
277
  return self.processor(
@@ -311,4 +312,8 @@ class Transformer(nn.Module):
311
  else:
312
  config = {'model_name_or_path': input_path}
313
 
314
- return Transformer(**config)
 
 
 
 
 
42
  self.max_pixels = max_pixels
43
  self.min_pixels = min_pixels
44
  self.max_seq_length = max_seq_length
45
+
46
  # Handle args
47
  model_kwargs = model_args or {}
 
 
48
  processor_kwargs = processor_args or {}
49
+
50
+ if cache_dir is not None:
51
+ model_kwargs['cache_dir'] = cache_dir
52
+ processor_kwargs['cache_dir'] = cache_dir
53
+
54
  processor_kwargs.update({
55
  'min_pixels': min_pixels,
56
  'max_pixels': max_pixels,
 
57
  })
58
 
59
  # Initialize model
60
  self.model = Qwen2VLForConditionalGeneration.from_pretrained(
61
  model_name_or_path,
 
62
  **model_kwargs
63
  ).eval()
64
 
 
272
  )
273
  return features
274
 
275
+ def tokenize(self, texts: List[Union[str, Image.Image, bytes]], padding: str = 'longest', **kwargs) -> Dict[str, torch.Tensor]:
276
  processed_texts, processed_images = self._process_input(texts)
277
 
278
  return self.processor(
 
312
  else:
313
  config = {'model_name_or_path': input_path}
314
 
315
+ return Transformer(**config)
316
+
317
+ @property
318
+ def modalities(self) -> List[str]:
319
+ return ['text', 'image']