sentence-transformers
Somali
English
Italian
semantic-search
lexical-retrieval
somali
multilingual
dictionary
terminology
Instructions to use haajidheere/ErayNet-nirig with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- sentence-transformers
How to use haajidheere/ErayNet-nirig with sentence-transformers:
from sentence_transformers import SentenceTransformer model = SentenceTransformer("haajidheere/ErayNet-nirig") sentences = [ "The weather is lovely today.", "It's so sunny outside!", "He drove to the stadium." ] embeddings = model.encode(sentences) similarities = model.similarity(embeddings, embeddings) print(similarities.shape) # [3, 3] - Notebooks
- Google Colab
- Kaggle
| import numpy as np | |
| from sentence_transformers import SentenceTransformer, util | |
| import pandas as pd | |
| import os | |
| MODEL_DIR = "ai_model" | |
| model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2") | |
| df = pd.read_csv(f"{MODEL_DIR}/search_data.csv") | |
| embeddings = np.load(f"{MODEL_DIR}/embeddings.npy") | |
| def get_confidence_label(score): | |
| if score >= 0.7: | |
| return "high" | |
| elif score >= 0.5: | |
| return "medium" | |
| return "low" | |
| def search(query, top_k=5, threshold=0.3): | |
| """Semantic search with confidence scores""" | |
| q_emb = model.encode([query.lower()], normalize_embeddings=True)[0] | |
| scores = util.cos_sim(q_emb, embeddings)[0].numpy() | |
| valid_idx = np.where(scores >= threshold)[0] | |
| if len(valid_idx) == 0: | |
| return pd.DataFrame(columns=["rank", "somali", "english", "italian", "domain", "similarity_score", "confidence_label"]) | |
| top_idx = valid_idx[np.argsort(scores[valid_idx])[-top_k:][::-1]] | |
| results = df.iloc[top_idx][["somali", "english", "italian", "domain"]].copy() | |
| results["similarity_score"] = np.round(scores[top_idx], 4) | |
| results["confidence_label"] = [get_confidence_label(s) for s in scores[top_idx]] | |
| results.insert(0, "rank", range(1, len(results) + 1)) | |
| results = results.reset_index(drop=True) | |
| return results | |
| if __name__ == "__main__": | |
| import sys | |
| query = sys.argv[1] if len(sys.argv) > 1 else "dhaqaale" | |
| print(f"Searching for: {query}\n") | |
| results = search(query) | |
| print(results.to_string(index=False)) |