Add inference script
Browse files- hindi_embeddings.py +13 -1
hindi_embeddings.py
CHANGED
|
@@ -510,6 +510,13 @@ class HindiEmbedder:
|
|
| 510 |
Returns:
|
| 511 |
Similarity scores
|
| 512 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 513 |
embeddings1 = self.encode(texts1)
|
| 514 |
|
| 515 |
if texts2 is None:
|
|
@@ -522,10 +529,15 @@ class HindiEmbedder:
|
|
| 522 |
|
| 523 |
if len(texts1) == len(texts2):
|
| 524 |
# Compute pairwise similarity when the number of texts match
|
| 525 |
-
|
| 526 |
cosine_similarity([e1], [e2])[0][0]
|
| 527 |
for e1, e2 in zip(embeddings1, embeddings2)
|
| 528 |
])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 529 |
else:
|
| 530 |
# Return full similarity matrix
|
| 531 |
return cosine_similarity(embeddings1, embeddings2)
|
|
|
|
| 510 |
Returns:
|
| 511 |
Similarity scores
|
| 512 |
"""
|
| 513 |
+
# Convert single strings to lists for consistent handling
|
| 514 |
+
if isinstance(texts1, str):
|
| 515 |
+
texts1 = [texts1]
|
| 516 |
+
|
| 517 |
+
if texts2 is not None and isinstance(texts2, str):
|
| 518 |
+
texts2 = [texts2]
|
| 519 |
+
|
| 520 |
embeddings1 = self.encode(texts1)
|
| 521 |
|
| 522 |
if texts2 is None:
|
|
|
|
| 529 |
|
| 530 |
if len(texts1) == len(texts2):
|
| 531 |
# Compute pairwise similarity when the number of texts match
|
| 532 |
+
similarities = np.array([
|
| 533 |
cosine_similarity([e1], [e2])[0][0]
|
| 534 |
for e1, e2 in zip(embeddings1, embeddings2)
|
| 535 |
])
|
| 536 |
+
|
| 537 |
+
# If there's just one pair, return a scalar
|
| 538 |
+
if len(similarities) == 1:
|
| 539 |
+
return similarities[0]
|
| 540 |
+
return similarities
|
| 541 |
else:
|
| 542 |
# Return full similarity matrix
|
| 543 |
return cosine_similarity(embeddings1, embeddings2)
|