File size: 1,175 Bytes
fd7ab08 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 | import torch
from transformers import AutoTokenizer, AutoModel
from torch.nn.functional import cosine_similarity
import gradio as gr
model_name = 'bert-base-multilingual-cased'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)
# Function to compute embeddings
def compute_embedding(text):
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
with torch.no_grad():
outputs = model(**inputs)
embedding = outputs.last_hidden_state.mean(dim=1)
return embedding
# Function to compute similarity between two sentences
def compare_sentences(text1, text2):
embedding1 = compute_embedding(text1)
embedding2 = compute_embedding(text2)
similarity_score = cosine_similarity(embedding1, embedding2).item()
return f"Similarity Score: {similarity_score:.4f}"
# Gradio interface for input
iface = gr.Interface(fn=compare_sentences,
inputs=["text", "text"],
outputs="text",
title="Sentence Similarity",
description="Enter two sentences to compute their similarity.")
iface.launch()
|