Spaces:
Runtime error
Runtime error
File size: 1,192 Bytes
f27e986 14f41f0 f27e986 14f41f0 f27e986 14f41f0 f27e986 14f41f0 f27e986 14f41f0 f27e986 14f41f0 f27e986 14f41f0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
import re
import pdfplumber
import pandas as pd
import torch
def extract_text_from_pdf(pdf_path):
with pdfplumber.open(pdf_path) as pdf:
return "\n".join(
page.extract_text() for page in pdf.pages if page.extract_text()
)
def extract_lab_tests_dict(text):
pattern = r"[-•]?\s*([\w\s/()%.-]+?):\s*([\d.]+)\s*(\w+/?.*)?"
matches = re.findall(pattern, text)
lab_dict = {}
for test, value, unit in matches:
test = test.strip()
try:
lab_dict[test] = float(value)
except ValueError:
continue
return lab_dict
def prepare_lab_tensor(lab_data, feature_list):
values = [lab_data.get(feature, -1) for feature in feature_list]
return torch.tensor([values], dtype=torch.float32)
def load_icd_mapping(csv_path):
df = pd.read_csv(csv_path)
if not {"ICD_Label", "ICD Code", "Diagnosis"}.issubset(df.columns):
raise ValueError("CSV must include ICD_Label, ICD Code, Diagnosis columns.")
df = df.drop_duplicates(subset="ICD_Label")
return {
int(row["ICD_Label"]): (row["ICD Code"], row["ICD_Label"], row["Diagnosis"])
for _, row in df.iterrows()
}
|