medextract / app /models /icd_model.py
harsh-dev's picture
Add
ec563fd
raw
history blame contribute delete
662 Bytes
from sentence_transformers import SentenceTransformer
import pandas as pd
from app.config import settings
df = None
model = None
def load_icd_dataset():
global df, model
try:
df = pd.read_pickle(settings.ICD_DATA_PATH)
model = SentenceTransformer('all-MiniLM-L6-v2')
if "encoded" not in df.columns:
df["encoded"] = df["Description"].apply(lambda x: model.encode(x, normalize_embeddings=True))
df.to_pickle(settings.ICD_DATA_PATH)
return df, model
except FileNotFoundError:
df, model = None, None
return df, model
# Load dataset once at import
df, model = load_icd_dataset()