Spaces:

sasukae
/

vectors

Build error

App Files Files Community

sasukae commited on Nov 24, 2024

Commit

f253543

verified ·

1 Parent(s): f86f15b

similarity score

Browse files

Files changed (1) hide show

app.py +56 -0

app.py ADDED Viewed

	@@ -0,0 +1,56 @@

+from transformers import AutoTokenizer, AutoModel
+import torch
+import numpy as np
+from sklearn.metrics.pairwise import cosine_similarity
+# Load pre-trained model and tokenizer
+model_name = "sentence-transformers/all-MiniLM-L6-v2"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModel.from_pretrained(model_name)
+def generate_embedding(sentence):
+    inputs = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True)
+    with torch.no_grad():
+        outputs = model(**inputs)
+    # Get the CLS token embedding
+    embeddings = outputs.last_hidden_state[:, 0, :].squeeze().numpy()
+    return embeddings
+def find_word_embedding(sentence, word):
+    inputs = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True)
+    with torch.no_grad():
+        outputs = model(**inputs)
+    # Get the embedding for the word
+    word_id = tokenizer.convert_tokens_to_ids(word)
+    input_ids = inputs['input_ids'][0]
+    word_indices = (input_ids == word_id).nonzero(as_tuple=True)[0]
+    if len(word_indices) > 0:
+        word_embedding = outputs.last_hidden_state[0, word_indices[0], :].numpy()
+        return word_embedding
+    else:
+        return None  # Word not found
+def similarity_score(embedding1, embedding2):
+    return cosine_similarity([embedding1], [embedding2])[0][0]
+# Main program
+sentence1 = "Jeff live in Delhi."
+sentence2 = "Person is a Human"
+# Generate sentence embeddings
+embedding1 = generate_embedding(sentence1)
+embedding2 = generate_embedding(sentence2)
+# Print sentence embedding for the first sentence
+print("Embedding for Sentence 1:", embedding1)
+# Find and print the embedding for the word 'Jeff'
+word_embedding = find_word_embedding(sentence1, "Jeff")
+if word_embedding is not None:
+    print("Embedding for the word 'Jeff':", word_embedding)
+else:
+    print("The word 'Jeff' was not found in the sentence.")
+# Compare and print similarity score
+similarity = similarity_score(embedding1, embedding2)
+print("Similarity Score between sentences:", similarity)