File size: 3,098 Bytes
35dd57f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import streamlit as st
import os
import re
import faiss
import numpy as np
from groq import Groq
from sentence_transformers import SentenceTransformer
import PyPDF2

# ------------------ Setup ------------------
GROQ_API_KEY = st.secrets["GROQ_API_KEY"]
groq_client = Groq(api_key=GROQ_API_KEY)
embedder = SentenceTransformer("all-MiniLM-L6-v2")

# ------------------ Sample Medical Ranges ------------------
reference_data = {
    "Hemoglobin": "13.5-17.5 g/dL for men, 12.0-15.5 g/dL for women",
    "Cholesterol": "Below 200 mg/dL is desirable",
    "WBC": "4,500 to 11,000 cells/mcL",
    "Platelets": "150,000 to 450,000 platelets/mcL",
    "Blood Sugar": "Fasting <100 mg/dL, 2-hr post-meal <140 mg/dL"
}
reference_chunks = [f"{k}: {v}" for k, v in reference_data.items()]
reference_embeddings = embedder.encode(reference_chunks)
faiss_index = faiss.IndexFlatL2(reference_embeddings.shape[1])
faiss_index.add(np.array(reference_embeddings).astype("float32"))

# ------------------ Helper Functions ------------------
def extract_text(file):
    reader = PyPDF2.PdfReader(file)
    return " ".join(page.extract_text() for page in reader.pages if page.extract_text())

def extract_lab_values(text):
    results = {}
    for test in reference_data.keys():
        pattern = re.compile(fr"{test}\s*[:\-]?\s*(\d+\.?\d*)", re.IGNORECASE)
        match = pattern.search(text)
        if match:
            results[test] = match.group(1)
    return results

def retrieve_medical_info(query):
    query_embed = embedder.encode([query])
    _, indices = faiss_index.search(np.array(query_embed).astype("float32"), 1)
    return reference_chunks[indices[0][0]]

def query_llm(context, question):
    prompt = f"""You are a medical assistant. Based on this context:
    
{context}

Answer this question in very simple words:
{question}"""
    
    response = groq_client.chat.completions.create(
        messages=[{"role": "user", "content": prompt}],
        model="llama-3.1-8b-instant"
    )
    return response.choices[0].message.content

# ------------------ Streamlit UI ------------------
st.set_page_config(page_title="🧪 Medical Report Interpreter", layout="centered")
st.title("🧪 Medical Report Interpreter with RAG + Groq")

uploaded = st.file_uploader("Upload a Lab Report (PDF)", type=["pdf"])
if uploaded:
    with st.spinner("Reading and processing your medical report..."):
        text = extract_text(uploaded)
        labs = extract_lab_values(text)

    st.success("Report processed. Here's what I found:")
    st.write(labs)

    question = st.text_input("Ask a question about your lab results")

    if question:
        combined_context = ""
        for lab, value in labs.items():
            info = retrieve_medical_info(lab)
            combined_context += f"\n{lab} value is {value}. Reference: {info}"

        with st.spinner("Thinking..."):
            answer = query_llm(combined_context, question)

        st.subheader("📋 Explanation:")
        st.write(answer)

st.info("This app is for educational purposes only. Always consult a doctor for medical decisions.")