SeverityAnalysis / utils.py
bohraanuj23's picture
Changes in the model architecture implemented
14f41f0
import re
import pdfplumber
import pandas as pd
import torch
def extract_text_from_pdf(pdf_path):
with pdfplumber.open(pdf_path) as pdf:
return "\n".join(
page.extract_text() for page in pdf.pages if page.extract_text()
)
def extract_lab_tests_dict(text):
pattern = r"[-•]?\s*([\w\s/()%.-]+?):\s*([\d.]+)\s*(\w+/?.*)?"
matches = re.findall(pattern, text)
lab_dict = {}
for test, value, unit in matches:
test = test.strip()
try:
lab_dict[test] = float(value)
except ValueError:
continue
return lab_dict
def prepare_lab_tensor(lab_data, feature_list):
values = [lab_data.get(feature, -1) for feature in feature_list]
return torch.tensor([values], dtype=torch.float32)
def load_icd_mapping(csv_path):
df = pd.read_csv(csv_path)
if not {"ICD_Label", "ICD Code", "Diagnosis"}.issubset(df.columns):
raise ValueError("CSV must include ICD_Label, ICD Code, Diagnosis columns.")
df = df.drop_duplicates(subset="ICD_Label")
return {
int(row["ICD_Label"]): (row["ICD Code"], row["ICD_Label"], row["Diagnosis"])
for _, row in df.iterrows()
}