Update app.py
Browse files
app.py
CHANGED
|
@@ -1,38 +1,29 @@
|
|
| 1 |
-
import
|
|
|
|
|
|
|
| 2 |
import pandas as pd
|
| 3 |
-
import
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
"
|
| 8 |
-
|
| 9 |
-
"
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
"MCH": (27, 33),
|
| 13 |
-
"MCHC": (31, 36),
|
| 14 |
-
"Neutrophil %": (49, 74),
|
| 15 |
-
"Lymphocyte %": (26, 46),
|
| 16 |
-
"Monocyte %": (2, 12),
|
| 17 |
-
"Eosinophil %": (0, 5),
|
| 18 |
-
"Basophil %": (0, 2),
|
| 19 |
-
"Abs. Neutrophil": (2.0, 8.0),
|
| 20 |
-
"Abs. Lymphocyte": (1.2, 4.8),
|
| 21 |
-
"Abs. Monocyte": (0.0, 0.8),
|
| 22 |
-
"Abs. Eosinophil": (0.0, 0.5),
|
| 23 |
-
"Abs. Basophil": (0.0, 0.2),
|
| 24 |
-
}
|
| 25 |
|
| 26 |
def clean_and_parse_extracted_text(raw_text):
|
| 27 |
"""
|
| 28 |
Parse and clean the raw text to extract structured data.
|
| 29 |
"""
|
|
|
|
| 30 |
lines = raw_text.split("\n")
|
| 31 |
lines = [line.strip() for line in lines if line.strip()]
|
| 32 |
|
|
|
|
| 33 |
data = []
|
| 34 |
for line in lines:
|
| 35 |
-
# Match rows
|
| 36 |
match = re.match(
|
| 37 |
r"^(.*?)(\d+(\.\d+)?)(\s*-?\s*\d+(\.\d+)?\s*-?\s*\d+(\.\d+)?)?\s*([a-zA-Z/%]+)?\s*(H|L|Normal)?$",
|
| 38 |
line,
|
|
@@ -48,16 +39,9 @@ def clean_and_parse_extracted_text(raw_text):
|
|
| 48 |
else:
|
| 49 |
min_val = None
|
| 50 |
max_val = None
|
| 51 |
-
|
| 52 |
unit = match.group(7)
|
| 53 |
flag = "Normal" # Default flag
|
| 54 |
|
| 55 |
-
# Use default ranges if OCR fails to extract them
|
| 56 |
-
if min_val is None or max_val is None:
|
| 57 |
-
default_range = DEFAULT_RANGES.get(component)
|
| 58 |
-
if default_range:
|
| 59 |
-
min_val, max_val = default_range
|
| 60 |
-
|
| 61 |
# Determine the flag based on value and range
|
| 62 |
if min_val is not None and max_val is not None:
|
| 63 |
if value < min_val:
|
|
@@ -71,40 +55,49 @@ def clean_and_parse_extracted_text(raw_text):
|
|
| 71 |
# Create a DataFrame
|
| 72 |
df = pd.DataFrame(data, columns=["Component", "Your Value", "Min", "Max", "Units", "Flag"])
|
| 73 |
|
| 74 |
-
#
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
|
| 77 |
return df
|
| 78 |
|
| 79 |
-
|
|
|
|
| 80 |
"""
|
| 81 |
-
|
| 82 |
"""
|
| 83 |
-
|
| 84 |
-
import pytesseract
|
| 85 |
-
raw_text = pytesseract.image_to_string(image)
|
| 86 |
|
| 87 |
-
# Step 2: Parse and analyze the extracted text
|
| 88 |
-
df = clean_and_parse_extracted_text(raw_text)
|
| 89 |
|
| 90 |
-
|
| 91 |
-
|
|
|
|
| 92 |
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
outputs=gr.DataFrame(label="Blood Test Analysis"),
|
| 105 |
-
title="Blood Test Analyzer",
|
| 106 |
-
description="Upload an image of your blood test report to analyze the values and flag abnormalities.",
|
| 107 |
-
)
|
| 108 |
|
| 109 |
-
|
| 110 |
-
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
from PIL import Image
|
| 3 |
+
import pytesseract
|
| 4 |
import pandas as pd
|
| 5 |
+
import re
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def extract_text(image):
|
| 9 |
+
"""
|
| 10 |
+
Extract text from the image using Tesseract.
|
| 11 |
+
"""
|
| 12 |
+
return pytesseract.image_to_string(image)
|
| 13 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
def clean_and_parse_extracted_text(raw_text):
|
| 16 |
"""
|
| 17 |
Parse and clean the raw text to extract structured data.
|
| 18 |
"""
|
| 19 |
+
# Split the text into lines and clean up
|
| 20 |
lines = raw_text.split("\n")
|
| 21 |
lines = [line.strip() for line in lines if line.strip()]
|
| 22 |
|
| 23 |
+
# Identify and extract rows with valid components
|
| 24 |
data = []
|
| 25 |
for line in lines:
|
| 26 |
+
# Match rows containing numeric ranges and values
|
| 27 |
match = re.match(
|
| 28 |
r"^(.*?)(\d+(\.\d+)?)(\s*-?\s*\d+(\.\d+)?\s*-?\s*\d+(\.\d+)?)?\s*([a-zA-Z/%]+)?\s*(H|L|Normal)?$",
|
| 29 |
line,
|
|
|
|
| 39 |
else:
|
| 40 |
min_val = None
|
| 41 |
max_val = None
|
|
|
|
| 42 |
unit = match.group(7)
|
| 43 |
flag = "Normal" # Default flag
|
| 44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
# Determine the flag based on value and range
|
| 46 |
if min_val is not None and max_val is not None:
|
| 47 |
if value < min_val:
|
|
|
|
| 55 |
# Create a DataFrame
|
| 56 |
df = pd.DataFrame(data, columns=["Component", "Your Value", "Min", "Max", "Units", "Flag"])
|
| 57 |
|
| 58 |
+
# Fix misspellings and inconsistencies (if any known issues exist)
|
| 59 |
+
correction_map = {
|
| 60 |
+
"emoglobin": "Hemoglobin",
|
| 61 |
+
"ematocrit": "Hematocrit",
|
| 62 |
+
"% Platelet Count": "Platelet Count",
|
| 63 |
+
"ymphocyte %": "Lymphocyte %",
|
| 64 |
+
"L Differential Type Automated": "Differential Type",
|
| 65 |
+
}
|
| 66 |
+
df["Component"] = df["Component"].replace(correction_map)
|
| 67 |
|
| 68 |
return df
|
| 69 |
|
| 70 |
+
|
| 71 |
+
def display_results(df):
|
| 72 |
"""
|
| 73 |
+
Display the parsed data in a table format.
|
| 74 |
"""
|
| 75 |
+
st.dataframe(df, use_container_width=True)
|
|
|
|
|
|
|
| 76 |
|
|
|
|
|
|
|
| 77 |
|
| 78 |
+
# Streamlit app
|
| 79 |
+
st.title("Blood Report Analyzer")
|
| 80 |
+
st.write("Upload an image of a blood test report to analyze.")
|
| 81 |
|
| 82 |
+
uploaded_file = st.file_uploader("Upload Image", type=["png", "jpg", "jpeg"])
|
| 83 |
+
|
| 84 |
+
if uploaded_file is not None:
|
| 85 |
+
try:
|
| 86 |
+
# Load the image
|
| 87 |
+
image = Image.open(uploaded_file)
|
| 88 |
+
|
| 89 |
+
# Display the uploaded image
|
| 90 |
+
st.image(image, caption="Uploaded Image", use_container_width=True)
|
| 91 |
+
|
| 92 |
+
# Extract text from the image
|
| 93 |
+
extracted_text = extract_text(image)
|
| 94 |
+
|
| 95 |
+
# Parse the extracted text into a structured format
|
| 96 |
+
parsed_data = clean_and_parse_extracted_text(extracted_text)
|
| 97 |
|
| 98 |
+
# Display the structured data
|
| 99 |
+
st.subheader("Parsed Blood Test Results")
|
| 100 |
+
display_results(parsed_data)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
|
| 102 |
+
except Exception as e:
|
| 103 |
+
st.error(f"An error occurred: {e}")
|