|
|
from huggingface_hub import hf_hub_download
|
|
|
import gensim
|
|
|
import os
|
|
|
|
|
|
class Blessmore:
|
|
|
def __init__(self, repo_id="Blessmore/Fasttext_embeddings", model_dir="Fast_text_50_dim", subfolder="Fast_text_50_dim"):
|
|
|
self.repo_id = repo_id
|
|
|
self.model_dir = model_dir
|
|
|
self.subfolder = subfolder
|
|
|
self.model_files = [
|
|
|
"shona_fasttext_50d.model",
|
|
|
"shona_fasttext_50d.model.wv.vectors_ngrams.npy",
|
|
|
"shona_fasttext_vectors_50d.kv",
|
|
|
"shona_fasttext_vectors_50d.kv.vectors_ngrams.npy"
|
|
|
]
|
|
|
self.model = None
|
|
|
|
|
|
def download_model_files(self):
|
|
|
os.makedirs(self.model_dir, exist_ok=True)
|
|
|
for file_name in self.model_files:
|
|
|
hf_hub_download(repo_id=self.repo_id, filename=f"{self.subfolder}/{file_name}", cache_dir=self.model_dir)
|
|
|
|
|
|
def load_model(self):
|
|
|
model_path = os.path.join(self.model_dir, "shona_fasttext_50d.model")
|
|
|
self.model = gensim.models.FastText.load(model_path)
|
|
|
|
|
|
@classmethod
|
|
|
def from_pretrained(cls, repo_id="Blessmore/Fasttext_embeddings"):
|
|
|
instance = cls(repo_id=repo_id)
|
|
|
instance.download_model_files()
|
|
|
instance.load_model()
|
|
|
return instance
|
|
|
|
|
|
def get_model(self):
|
|
|
return self.model
|
|
|
|