| | import os |
| | import requests |
| | import torch |
| | import joblib |
| | import numpy as np |
| | from fastapi import FastAPI, HTTPException |
| | from pydantic import BaseModel |
| | from sklearn.metrics.pairwise import cosine_similarity |
| | from transformers import AutoModel, AutoTokenizer |
| |
|
| |
|
| |
|
| | |
| | app = FastAPI() |
| |
|
| |
|
| | |
| | model_path = os.path.join(os.path.dirname(__file__), "model.joblib") |
| | tokenizer_path = os.path.join(os.path.dirname(__file__), "tokenizer.joblib") |
| |
|
| | model = joblib.load(model_path) |
| | tokenizer = joblib.load(tokenizer_path) |
| |
|
| |
|
| | def get_embedding(text: str) -> np.ndarray: |
| | """Generate text embeddings.""" |
| | inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True) |
| | with torch.no_grad(): |
| | outputs = model(**inputs) |
| | return outputs.last_hidden_state.mean(dim=1).numpy() |
| |
|
| |
|
| | @app.post("/compare_texts") |
| | def compare_texts(text1: str, text2: str): |
| | """Compare the similarity between two text inputs.""" |
| | text1_embedding = get_embedding(text1) |
| | text2_embedding = get_embedding(text2) |
| | similarity = cosine_similarity(text1_embedding, text2_embedding)[0][0] |
| | return {"similarity_score": round(similarity, 4)} |
| |
|