Spaces:

Blessmore
/

fasttext_embedding_Pipeline

Build error

App Files Files Community

Blessmore commited on May 23, 2024

Commit

2105cf1

verified ·

1 Parent(s): 94a4017

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -17

app.py CHANGED Viewed

@@ -9,7 +9,6 @@ import io
 import tempfile
 import numpy as np
 from concurrent.futures import ThreadPoolExecutor
-from huggingface_hub import hf_hub_download
 from sklearn.metrics.pairwise import cosine_similarity
 # Function to preprocess text
@@ -67,20 +66,15 @@ def clean_text_multithreaded(text):
         cleaned_chunks = list(executor.map(clean_text_chunk, chunks))
     return '\n'.join(cleaned_chunks)
-# Function to load the FastText model from Hugging Face
 @st.cache_resource
-def load_fasttext_model(model_path):
-    #model_path = os.path.join(model_dir, "fasttext_model.model")
-    #vectors_path = os.path.join(model_dir, "fasttext_model_vectors.kv")
-    #vectors_ngrams_path = os.path.join(model_dir, "fasttext_model.model.wv.vectors_ngrams.npy")
-    model = FastText.load(model_path)
-    #model.wv = KeyedVectors.load(vectors_path, mmap='r')
-    #model.wv.vectors_ngrams = np.load(vectors_ngrams_path, mmap_mode='r')
     return model
 # Function to generate embeddings for a given word
 def generate_word_embedding(word, model):
     return model.wv.get_vector(word, norm=True) if word in model.wv else None
@@ -206,12 +200,11 @@ def main():
     elif option == "Generate Embeddings":
         st.header("Generate Embeddings with Pretrained FastText Model")
-        repo_id = "Blessmore/Fasttext_embeddings/Fast_text_50_dim"
-        model_path = "Fast_text_50_dim"
-        vectors_file = "fasttext_model_vectors.kv"
-        vectors_ngrams_file = "fasttext_model.model.wv.vectors_ngrams.npy"
-        model = load_fasttext_model(model_path)
         st.subheader("Generate Word Embedding")
         word = st.text_input("Enter a word:")

 import tempfile
 import numpy as np
 from concurrent.futures import ThreadPoolExecutor
 from sklearn.metrics.pairwise import cosine_similarity
 # Function to preprocess text
         cleaned_chunks = list(executor.map(clean_text_chunk, chunks))
     return '\n'.join(cleaned_chunks)
+# Function to load the FastText model from the specified folder
 @st.cache_resource
+def load_fasttext_model(model_folder):
+    model_file = os.path.join(model_folder, "fasttext_model.model")
+    vectors_file = os.path.join(model_folder, "fasttext_model_vectors.kv")
+    model = FastText.load(model_file)
+    model.wv = KeyedVectors.load(vectors_file)
     return model
 # Function to generate embeddings for a given word
 def generate_word_embedding(word, model):
     return model.wv.get_vector(word, norm=True) if word in model.wv else None
     elif option == "Generate Embeddings":
         st.header("Generate Embeddings with Pretrained FastText Model")
+        # Specify the path to the model folder
+        model_folder = "Fast_text_50_dim"
+        # Load the model from the specified folder
+        model = load_fasttext_model(model_folder)
         st.subheader("Generate Word Embedding")
         word = st.text_input("Enter a word:")