| | import torch |
| | from transformers import AutoTokenizer, AutoModel |
| | from torch.nn.functional import cosine_similarity |
| | import gradio as gr |
| |
|
| | model_name = 'bert-base-multilingual-cased' |
| | tokenizer = AutoTokenizer.from_pretrained(model_name) |
| | model = AutoModel.from_pretrained(model_name) |
| |
|
| | |
| | def compute_embedding(text): |
| | inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True) |
| | with torch.no_grad(): |
| | outputs = model(**inputs) |
| | embedding = outputs.last_hidden_state.mean(dim=1) |
| | return embedding |
| |
|
| | |
| | def compare_sentences(text1, text2): |
| | embedding1 = compute_embedding(text1) |
| | embedding2 = compute_embedding(text2) |
| | similarity_score = cosine_similarity(embedding1, embedding2).item() |
| | return f"Similarity Score: {similarity_score:.4f}" |
| |
|
| | |
| | iface = gr.Interface(fn=compare_sentences, |
| | inputs=["text", "text"], |
| | outputs="text", |
| | title="Sentence Similarity", |
| | description="Enter two sentences to compute their similarity.") |
| |
|
| | iface.launch() |
| |
|