dfsandovalp01 commited on
Commit
7c5b118
·
verified ·
1 Parent(s): 4300b5e

Upload 3 files

Browse files
src/embeddings/instructor_embeddings.py CHANGED
@@ -8,16 +8,14 @@ class InstructorEmbeddings:
8
  self.cache_dir = Path(cache_dir)
9
  self.cache_dir.mkdir(parents=True, exist_ok=True)
10
 
11
- # Configurar modelo sin warning de tied weights
12
- from transformers import AutoConfig
13
- config = AutoConfig.from_pretrained(model_name)
14
- config.tie_word_embeddings = False
15
 
16
  # HF Spaces descargará automáticamente el modelo
17
  self.model = SentenceTransformer(
18
  model_name,
19
- cache_folder=str(self.cache_dir),
20
- model_kwargs={"config": config}
21
  )
22
 
23
  def encode(self, texts, instruction="", **kwargs):
 
8
  self.cache_dir = Path(cache_dir)
9
  self.cache_dir.mkdir(parents=True, exist_ok=True)
10
 
11
+ # Silenciar warning sobre tied weights que es inofensivo
12
+ import warnings
13
+ warnings.filterwarnings('ignore', message='.*tied weights mapping.*')
 
14
 
15
  # HF Spaces descargará automáticamente el modelo
16
  self.model = SentenceTransformer(
17
  model_name,
18
+ cache_folder=str(self.cache_dir)
 
19
  )
20
 
21
  def encode(self, texts, instruction="", **kwargs):
src/embeddings/mass_modelos_nlp_db.py CHANGED
@@ -133,13 +133,12 @@ def genCache(cache_name:str, tbl_input_dir:str, out_dir:str, instruction:str, ba
133
 
134
  # Lazy import model to allow quick --help
135
  from sentence_transformers import SentenceTransformer
136
- from transformers import AutoConfig
137
 
138
- # Cargar configuración y silenciar warning de tied weights
139
- config = AutoConfig.from_pretrained(model_name)
140
- config.tie_word_embeddings = False
141
 
142
- model = SentenceTransformer(model_name, model_kwargs={"config": config})
143
  input_pairs = make_text_pairs(instruction, input_texts)
144
  emb_input = compute_embeddings(model, input_pairs, batch_size=batch_size, normalize=normalize)
145
  emb_input_np = emb_input.cpu().numpy()
 
133
 
134
  # Lazy import model to allow quick --help
135
  from sentence_transformers import SentenceTransformer
136
+ import warnings
137
 
138
+ # Silenciar warning sobre tied weights que es inofensivo
139
+ warnings.filterwarnings('ignore', message='.*tied weights mapping.*')
 
140
 
141
+ model = SentenceTransformer(model_name)
142
  input_pairs = make_text_pairs(instruction, input_texts)
143
  emb_input = compute_embeddings(model, input_pairs, batch_size=batch_size, normalize=normalize)
144
  emb_input_np = emb_input.cpu().numpy()
src/embeddings/modelos_nlp_db.py CHANGED
@@ -266,11 +266,10 @@ def search(query):
266
 
267
  # Lazy import model to allow quick --help
268
  from sentence_transformers import SentenceTransformer
269
- from transformers import AutoConfig
270
 
271
- # Configurar para silenciar warning de tied weights
272
- config = AutoConfig.from_pretrained(model_name)
273
- config.tie_word_embeddings = False
274
 
275
  # Load / compute ODS embeddings with cache
276
  ods_use_cache = (not force_recompute) and os.path.exists(ods_cache_path)
@@ -308,7 +307,7 @@ def search(query):
308
  # emb_unfpa_np = emb_ods.cpu().numpy()
309
  # save_cache(cache_paths[idx], {"model_name": model_name, "instr": instruc_bases[idx], "count": len(texts[idx])}, emb_unfpa_np)
310
  else:
311
- model = SentenceTransformer(model_name, model_kwargs={"config": config}) # still needed for project embeddings
312
 
313
  # Compute PATR embeddings
314
  patr_pairs = make_text_pairs(instruc_iniciativas[idx], patr_texts)
 
266
 
267
  # Lazy import model to allow quick --help
268
  from sentence_transformers import SentenceTransformer
269
+ import warnings
270
 
271
+ # Silenciar warning sobre tied weights que es inofensivo
272
+ warnings.filterwarnings('ignore', message='.*tied weights mapping.*')
 
273
 
274
  # Load / compute ODS embeddings with cache
275
  ods_use_cache = (not force_recompute) and os.path.exists(ods_cache_path)
 
307
  # emb_unfpa_np = emb_ods.cpu().numpy()
308
  # save_cache(cache_paths[idx], {"model_name": model_name, "instr": instruc_bases[idx], "count": len(texts[idx])}, emb_unfpa_np)
309
  else:
310
+ model = SentenceTransformer(model_name) # still needed for project embeddings
311
 
312
  # Compute PATR embeddings
313
  patr_pairs = make_text_pairs(instruc_iniciativas[idx], patr_texts)