Spaces:

goldrode
/

bloodtest2

Runtime error

App Files Files Community

bloodtest2 / app.py

goldrode

Update app.py

70a4f0d verified about 1 year ago

raw

history blame contribute delete

5.01 kB

	import os
	import time
	import gradio as gr
	import re
	import requests
	import pandas as pd
	import faiss
	from sentence_transformers import SentenceTransformer
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import PyPDF2

	# Install required tools
	os.system("apt-get update")

	# Ensure Hugging Face Authentication
	os.system("huggingface-cli login")

	# Load the Llama-3.2-3B-Instruct model and tokenizer
	MODEL_NAME = "meta-llama/Llama-3.2-3B-Instruct"
	print("Loading Llama model...")
	API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=API_TOKEN)
	model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, token=API_TOKEN)

	# Load the encoder model for FAISS
	encoder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

	# Paths for required files
	INDEX_PATH = "blood_test_index.faiss"
	CSV_PATH = "rag_documents.csv"

	def generate_faiss_index():
	df = pd.read_csv(CSV_PATH)
	if "Content" not in df.columns:
	raise ValueError("The CSV file must contain a 'Content' column.")
	print("Encoding sentences...")
	embeddings = encoder.encode(df["Content"].tolist()).astype("float32")
	print("Creating FAISS index...")
	index = faiss.IndexFlatL2(embeddings.shape[1])
	index.add(embeddings)
	faiss.write_index(index, INDEX_PATH)
	print("FAISS index generated successfully!")

	if not os.path.exists(INDEX_PATH):
	print("Generating FAISS index...")
	if not os.path.exists(CSV_PATH):
	raise FileNotFoundError(f"The required file '{CSV_PATH}' is missing!")
	generate_faiss_index()

	print("Loading FAISS index...")
	index = faiss.read_index(INDEX_PATH)
	rag_df = pd.read_csv(CSV_PATH)

	def load_thresholds(file_path="blood_test_thresholds.csv"):
	df = pd.read_csv(file_path)
	thresholds = {}
	for _, row in df.iterrows():
	thresholds[row["Parameter"]] = {"low": row["Low"], "high": row["High"], "unit": row["Unit"]}
	return thresholds

	def extract_text_from_pdf(pdf_path):
	text = ""
	with open(pdf_path, "rb") as file:
	reader = PyPDF2.PdfReader(file)
	for page in reader.pages:
	text += page.extract_text() or ""
	return text

	def retrieve_context(query, index, document_df):
	query_vector = encoder.encode([query]).astype("float32")
	distances, indices = index.search(query_vector, k=3)
	results = [document_df.iloc[i]["Content"] for i in indices[0]]
	return " ".join(results)

	def generate_response_with_llama(flagged_abnormality, context):
	prompt = (
	f"Flagged Abnormality: {flagged_abnormality}\n"
	f"Context: {context[:300]}.\n"
	f"Provide specific and actionable medical advice for the abnormality:"
	)
	inputs = tokenizer(prompt, return_tensors="pt", max_length=512, truncation=True)
	outputs = model.generate(
	inputs["input_ids"],
	max_length=150,
	num_return_sequences=1,
	no_repeat_ngram_size=2,
	top_p=0.9,
	temperature=0.7,
	)
	return tokenizer.decode(outputs[0], skip_special_tokens=True)

	def analyze_blood_report(extracted_text, thresholds):
	lines = [line.strip() for line in extracted_text.split("\n") if line.strip()]
	blood_data = {}
	regex = r"([A-Za-z\s\#]+)\s[:\-]?\s([\d.]+)\s([a-zA-Z/\s])"
	for line in lines:
	match = re.search(regex, line)
	if match:
	param, value_str, unit = match.group(1).strip(), match.group(2).strip(), match.group(3).strip()
	try:
	value = float(value_str)
	blood_data[param] = {"value": value, "unit": unit}
	except ValueError:
	continue

	flagged = {}
	recommendations = {}
	for parameter, data in blood_data.items():
	if parameter in thresholds:
	value, unit = data["value"], data["unit"]
	if value < thresholds[parameter]["low"]:
	flagged[parameter] = f"Low ({value} {unit})"
	elif value > thresholds[parameter]["high"]:
	flagged[parameter] = f"High ({value} {unit})"

	for param, status in flagged.items():
	query = f"The blood test result for {param} is {status}."
	context = retrieve_context(query, index, rag_df)
	recommendations[param] = generate_response_with_llama(flagged[param], context)

	return flagged, recommendations

	def process_pdf(pdf_path):
	thresholds = load_thresholds()
	extracted_text = extract_text_from_pdf(pdf_path)
	flagged, recommendations = analyze_blood_report(extracted_text, thresholds)
	return {"Flagged Abnormalities": flagged, "Recommendations": recommendations}

	interface = gr.Interface(
	fn=process_pdf,
	inputs=gr.File(type="filepath", label="Upload Blood Test PDF"),
	outputs="json",
	title="Blood Test Analyzer (PDF)",
	description="Upload a PDF blood test report. This tool extracts data, flags abnormalities, and provides medical recommendations."
	)

	if __name__ == "__main__":
	interface.launch(share=True)