visualizar-ods

Running

dfsandovalp01 commited on Feb 26

Commit

7c5b118

verified ·

1 Parent(s): 4300b5e

Upload 3 files

Files changed (3) hide show

src/embeddings/instructor_embeddings.py CHANGED Viewed

@@ -8,16 +8,14 @@ class InstructorEmbeddings:
         self.cache_dir = Path(cache_dir)
         self.cache_dir.mkdir(parents=True, exist_ok=True)
-        # Configurar modelo sin warning de tied weights
-        from transformers import AutoConfig
-        config = AutoConfig.from_pretrained(model_name)
-        config.tie_word_embeddings = False
         # HF Spaces descargará automáticamente el modelo
         self.model = SentenceTransformer(
             model_name,
-            cache_folder=str(self.cache_dir),
-            model_kwargs={"config": config}
         )
     def encode(self, texts, instruction="", **kwargs):

         self.cache_dir = Path(cache_dir)
         self.cache_dir.mkdir(parents=True, exist_ok=True)
+        # Silenciar warning sobre tied weights que es inofensivo
+        import warnings
+        warnings.filterwarnings('ignore', message='.*tied weights mapping.*')
         # HF Spaces descargará automáticamente el modelo
         self.model = SentenceTransformer(
             model_name,
+            cache_folder=str(self.cache_dir)
         )
     def encode(self, texts, instruction="", **kwargs):

src/embeddings/mass_modelos_nlp_db.py CHANGED Viewed

@@ -133,13 +133,12 @@ def genCache(cache_name:str, tbl_input_dir:str, out_dir:str, instruction:str, ba
   # Lazy import model to allow quick --help
   from sentence_transformers import SentenceTransformer
-  from transformers import AutoConfig
-  # Cargar configuración y silenciar warning de tied weights
-  config = AutoConfig.from_pretrained(model_name)
-  config.tie_word_embeddings = False
-  model = SentenceTransformer(model_name, model_kwargs={"config": config})
   input_pairs = make_text_pairs(instruction, input_texts)
   emb_input = compute_embeddings(model, input_pairs, batch_size=batch_size, normalize=normalize)
   emb_input_np = emb_input.cpu().numpy()

   # Lazy import model to allow quick --help
   from sentence_transformers import SentenceTransformer
+  import warnings
+  # Silenciar warning sobre tied weights que es inofensivo
+  warnings.filterwarnings('ignore', message='.*tied weights mapping.*')
+  model = SentenceTransformer(model_name)
   input_pairs = make_text_pairs(instruction, input_texts)
   emb_input = compute_embeddings(model, input_pairs, batch_size=batch_size, normalize=normalize)
   emb_input_np = emb_input.cpu().numpy()

src/embeddings/modelos_nlp_db.py CHANGED Viewed

@@ -266,11 +266,10 @@ def search(query):
   # Lazy import model to allow quick --help
   from sentence_transformers import SentenceTransformer
-  from transformers import AutoConfig
-  # Configurar para silenciar warning de tied weights
-  config = AutoConfig.from_pretrained(model_name)
-  config.tie_word_embeddings = False
   # Load / compute ODS embeddings with cache
   ods_use_cache = (not force_recompute) and os.path.exists(ods_cache_path)
@@ -308,7 +307,7 @@ def search(query):
         # emb_unfpa_np = emb_ods.cpu().numpy()
         # save_cache(cache_paths[idx], {"model_name": model_name, "instr": instruc_bases[idx], "count": len(texts[idx])}, emb_unfpa_np)
     else:
-        model = SentenceTransformer(model_name, model_kwargs={"config": config})  # still needed for project embeddings
     # Compute PATR embeddings
     patr_pairs = make_text_pairs(instruc_iniciativas[idx], patr_texts)

   # Lazy import model to allow quick --help
   from sentence_transformers import SentenceTransformer
+  import warnings
+  # Silenciar warning sobre tied weights que es inofensivo
+  warnings.filterwarnings('ignore', message='.*tied weights mapping.*')
   # Load / compute ODS embeddings with cache
   ods_use_cache = (not force_recompute) and os.path.exists(ods_cache_path)
         # emb_unfpa_np = emb_ods.cpu().numpy()
         # save_cache(cache_paths[idx], {"model_name": model_name, "instr": instruc_bases[idx], "count": len(texts[idx])}, emb_unfpa_np)
     else:
+        model = SentenceTransformer(model_name)  # still needed for project embeddings
     # Compute PATR embeddings
     patr_pairs = make_text_pairs(instruc_iniciativas[idx], patr_texts)