Spaces:
Runtime error
Runtime error
File size: 5,007 Bytes
f4e6fe7 f38d135 f4e6fe7 f3745ec 380f9f6 f3745ec 380f9f6 de56d6b f3745ec de56d6b 1c5ae39 de56d6b 1c5ae39 380f9f6 1e007e5 380f9f6 cab3630 fba8c3b f38d135 43a83e7 f38d135 f3745ec de56d6b f38d135 de56d6b f38d135 f3745ec f38d135 f3745ec de56d6b f3745ec de56d6b f3745ec 380f9f6 f31ea58 380f9f6 de56d6b 380f9f6 f31ea58 380f9f6 e3c8421 380f9f6 e3c8421 f31ea58 e3c8421 de56d6b f31ea58 f3745ec f31ea58 f3745ec 380f9f6 f3745ec de56d6b f3745ec de56d6b f3745ec f4e6fe7 de56d6b 70a4f0d f3745ec de56d6b f4e6fe7 f3745ec de56d6b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 | import os
import time
import gradio as gr
import re
import requests
import pandas as pd
import faiss
from sentence_transformers import SentenceTransformer
from transformers import AutoModelForCausalLM, AutoTokenizer
import PyPDF2
# Install required tools
os.system("apt-get update")
# Ensure Hugging Face Authentication
os.system("huggingface-cli login")
# Load the Llama-3.2-3B-Instruct model and tokenizer
MODEL_NAME = "meta-llama/Llama-3.2-3B-Instruct"
print("Loading Llama model...")
API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=API_TOKEN)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, token=API_TOKEN)
# Load the encoder model for FAISS
encoder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
# Paths for required files
INDEX_PATH = "blood_test_index.faiss"
CSV_PATH = "rag_documents.csv"
def generate_faiss_index():
df = pd.read_csv(CSV_PATH)
if "Content" not in df.columns:
raise ValueError("The CSV file must contain a 'Content' column.")
print("Encoding sentences...")
embeddings = encoder.encode(df["Content"].tolist()).astype("float32")
print("Creating FAISS index...")
index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(embeddings)
faiss.write_index(index, INDEX_PATH)
print("FAISS index generated successfully!")
if not os.path.exists(INDEX_PATH):
print("Generating FAISS index...")
if not os.path.exists(CSV_PATH):
raise FileNotFoundError(f"The required file '{CSV_PATH}' is missing!")
generate_faiss_index()
print("Loading FAISS index...")
index = faiss.read_index(INDEX_PATH)
rag_df = pd.read_csv(CSV_PATH)
def load_thresholds(file_path="blood_test_thresholds.csv"):
df = pd.read_csv(file_path)
thresholds = {}
for _, row in df.iterrows():
thresholds[row["Parameter"]] = {"low": row["Low"], "high": row["High"], "unit": row["Unit"]}
return thresholds
def extract_text_from_pdf(pdf_path):
text = ""
with open(pdf_path, "rb") as file:
reader = PyPDF2.PdfReader(file)
for page in reader.pages:
text += page.extract_text() or ""
return text
def retrieve_context(query, index, document_df):
query_vector = encoder.encode([query]).astype("float32")
distances, indices = index.search(query_vector, k=3)
results = [document_df.iloc[i]["Content"] for i in indices[0]]
return " ".join(results)
def generate_response_with_llama(flagged_abnormality, context):
prompt = (
f"Flagged Abnormality: {flagged_abnormality}\n"
f"Context: {context[:300]}.\n"
f"Provide specific and actionable medical advice for the abnormality:"
)
inputs = tokenizer(prompt, return_tensors="pt", max_length=512, truncation=True)
outputs = model.generate(
inputs["input_ids"],
max_length=150,
num_return_sequences=1,
no_repeat_ngram_size=2,
top_p=0.9,
temperature=0.7,
)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
def analyze_blood_report(extracted_text, thresholds):
lines = [line.strip() for line in extracted_text.split("\n") if line.strip()]
blood_data = {}
regex = r"([A-Za-z\s\#]+)\s*[:\-]?\s*([\d.]+)\s*([a-zA-Z/\s]*)"
for line in lines:
match = re.search(regex, line)
if match:
param, value_str, unit = match.group(1).strip(), match.group(2).strip(), match.group(3).strip()
try:
value = float(value_str)
blood_data[param] = {"value": value, "unit": unit}
except ValueError:
continue
flagged = {}
recommendations = {}
for parameter, data in blood_data.items():
if parameter in thresholds:
value, unit = data["value"], data["unit"]
if value < thresholds[parameter]["low"]:
flagged[parameter] = f"Low ({value} {unit})"
elif value > thresholds[parameter]["high"]:
flagged[parameter] = f"High ({value} {unit})"
for param, status in flagged.items():
query = f"The blood test result for {param} is {status}."
context = retrieve_context(query, index, rag_df)
recommendations[param] = generate_response_with_llama(flagged[param], context)
return flagged, recommendations
def process_pdf(pdf_path):
thresholds = load_thresholds()
extracted_text = extract_text_from_pdf(pdf_path)
flagged, recommendations = analyze_blood_report(extracted_text, thresholds)
return {"Flagged Abnormalities": flagged, "Recommendations": recommendations}
interface = gr.Interface(
fn=process_pdf,
inputs=gr.File(type="filepath", label="Upload Blood Test PDF"),
outputs="json",
title="Blood Test Analyzer (PDF)",
description="Upload a PDF blood test report. This tool extracts data, flags abnormalities, and provides medical recommendations."
)
if __name__ == "__main__":
interface.launch(share=True)
|