File size: 1,320 Bytes
c0268c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import gradio as gr
from transformers import AutoTokenizer, AutoModel
import torch
import torch.nn.functional as F

# Load model + tokenizer
model_name = "nikhil061307/contrastive-learning-bert-added-token-v5"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

# Function for inference: encode text -> embedding
def encode(text):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    with torch.no_grad():
        outputs = model(**inputs)
        # mean pooling over sequence length
        embedding = outputs.last_hidden_state.mean(dim=1)
    return embedding

# Compare two sentences
def compute_similarity(sentence1, sentence2):
    emb1 = encode(sentence1)
    emb2 = encode(sentence2)
    similarity = F.cosine_similarity(emb1, emb2).item()
    return {"similarity": round(similarity, 4)}

# Gradio UI
demo = gr.Interface(
    fn=compute_similarity,
    inputs=[
        gr.Textbox(lines=2, placeholder="Enter first sentence..."),
        gr.Textbox(lines=2, placeholder="Enter second sentence...")
    ],
    outputs="json",
    title="Contrastive Learning BERT Similarity",
    description="Enter two sentences to compute their semantic similarity using the fine-tuned BERT model."
)

if __name__ == "__main__":
    demo.launch()