Spaces:
Runtime error
Runtime error
| import re | |
| import pdfplumber | |
| import pandas as pd | |
| import torch | |
| def extract_text_from_pdf(pdf_path): | |
| with pdfplumber.open(pdf_path) as pdf: | |
| return "\n".join( | |
| page.extract_text() for page in pdf.pages if page.extract_text() | |
| ) | |
| def extract_lab_tests_dict(text): | |
| pattern = r"[-•]?\s*([\w\s/()%.-]+?):\s*([\d.]+)\s*(\w+/?.*)?" | |
| matches = re.findall(pattern, text) | |
| lab_dict = {} | |
| for test, value, unit in matches: | |
| test = test.strip() | |
| try: | |
| lab_dict[test] = float(value) | |
| except ValueError: | |
| continue | |
| return lab_dict | |
| def prepare_lab_tensor(lab_data, feature_list): | |
| values = [lab_data.get(feature, -1) for feature in feature_list] | |
| return torch.tensor([values], dtype=torch.float32) | |
| def load_icd_mapping(csv_path): | |
| df = pd.read_csv(csv_path) | |
| if not {"ICD_Label", "ICD Code", "Diagnosis"}.issubset(df.columns): | |
| raise ValueError("CSV must include ICD_Label, ICD Code, Diagnosis columns.") | |
| df = df.drop_duplicates(subset="ICD_Label") | |
| return { | |
| int(row["ICD_Label"]): (row["ICD Code"], row["ICD_Label"], row["Diagnosis"]) | |
| for _, row in df.iterrows() | |
| } | |