medextract / app /utils /formatters.py
harsh-dev's picture
Add
ec563fd
raw
history blame contribute delete
789 Bytes
import json
import pandas as pd
from app.utils.icd_utils import get_ICD_Code
def clean_json_response(response_text: str):
response_text = response_text.replace("```json", "").replace("```", "").strip()
return json.loads(response_text)
def format_ICD_output(sample: dict):
# Ensure "records" is a list (not a JSON string)
if isinstance(sample["records"], str):
sample["records"] = json.loads(sample["records"]) # Convert JSON string back to list
# Convert to DataFrame
records = pd.DataFrame(sample["records"])
# Replace old IDs with new unique IDs
records['ID'] = records['TestName'].apply(lambda x: get_ICD_Code(x))
# Convert DataFrame back to list of dictionaries
sample["records"] = records.to_dict(orient="records")
return sample