File size: 4,481 Bytes
aeefb97 dd0af55 58dc866 dd0af55 aeefb97 dd0af55 13747c8 dd0af55 aeefb97 13747c8 aeefb97 dd0af55 aeefb97 dd0af55 aeefb97 13747c8 dd0af55 13747c8 dd0af55 aeefb97 dd0af55 f30208e dd0af55 f1125fe ade1bfe f1125fe dd0af55 9c9d023 ade1bfe aeefb97 dd0af55 f1125fe 13747c8 dd0af55 aeefb97 dd0af55 f1125fe 13747c8 f1125fe 13747c8 dd0af55 13747c8 f1125fe aeefb97 6af8198 13747c8 aeefb97 f1125fe 13747c8 f1125fe 13747c8 aeefb97 f30208e dd0af55 6af8198 dd0af55 f1125fe f30208e dd0af55 f1125fe aeefb97 39c75f2 2ce12e8 aeefb97 2ce12e8 aeefb97 2ce12e8 aeefb97 2ce12e8 aeefb97 39c75f2 13747c8 39c75f2 13747c8 39c75f2 dd0af55 39c75f2 aeefb97 3c1950a aeefb97 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import os
import json
import numpy as np
import faiss
import gradio as gr
from PyPDF2 import PdfReader
import re
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM
# Load the knowledge base
with open("knowledge_base.json", "r") as file:
kb = json.load(file)
# Authenticate with Hugging Face
os.system("huggingface-cli login")
# Initialize the embedding model and FAISS index
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
kb_texts = [f"{item['Component']} {item['Range']} {item['Advice']}" for item in kb]
kb_embeddings = embedding_model.encode(kb_texts)
kb_embeddings = np.array(kb_embeddings, dtype="float32")
index = faiss.IndexFlatL2(kb_embeddings.shape[1])
index.add(kb_embeddings)
# Load the Hugging Face LLM (LLaMA)
llama_model_name = "meta-llama/Llama-3.2-3B-Instruct"
API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
tokenizer = AutoTokenizer.from_pretrained(llama_model_name, token=API_TOKEN)
llm = AutoModelForCausalLM.from_pretrained(llama_model_name, token=API_TOKEN)
# Generate advice using FAISS + LLM
def generate_advice(extracted_data):
try:
recommendations = []
for item in extracted_data:
query = f"{item['Component']} {item['Status']}"
print(f"Processing Query: {query}") # Debugging step
# Generate query embedding as float32
query_embedding = embedding_model.encode([query])
query_embedding = np.array(query_embedding, dtype="float32").reshape(1, -1)
# Search FAISS for the closest match
_, idx = index.search(query_embedding, 1)
best_match = kb[idx[0][0]]
# Prepare LLM prompt
role = "Medical expert providing advice based on lab results."
prompt = f"""
Lab Test: {item['Component']}
Value: {item['Value']} {item['Units']}
Status: {item['Status']}
Medical Guidelines: {best_match['Advice']}
Provide additional insights or recommendations.
"""
# Generate advice with LLaMA
message = [
{"role": "system", "content": role},
{"role": "user", "content": prompt},
]
input_text = tokenizer.apply_chat_template(
message, tokenize=True, add_generation_prompt=True, return_tensors="pt"
)
output = llm.generate(
input_ids=input_text["input_ids"],
max_length=150,
num_return_sequences=1
)
advice = tokenizer.decode(output[0], skip_special_tokens=True).strip()
recommendations.append({"Component": item["Component"], "Advice": advice})
return recommendations
except Exception as e:
print(f"Error: {e}") # Debugging any unexpected issues
return [{"error": f"Exception occurred: {str(e)}"}]
# Extract structured data from the PDF
def pdf_to_text(pdf_file):
try:
reader = PdfReader(pdf_file.name)
text = ""
for page in reader.pages:
text += page.extract_text()
# Regex to extract structured lab results
pattern = r"(\w+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\w/%]+)\s+(\w+)"
matches = re.findall(pattern, text)
# Structure data into a list of dictionaries
if matches:
results = [
{"Component": m[0], "Value": float(m[1]), "Min": float(m[2]), "Max": float(m[3]), "Units": m[4], "Status": m[5]}
for m in matches
]
return results
else:
return "No structured data found in the PDF."
except Exception as e:
return f"Error: {e}"
# Gradio Interface
def main():
with gr.Blocks() as app:
gr.Markdown("## Medical Test Interpreter with RAG and LLM")
with gr.Row():
pdf_input = gr.File(label="Upload PDF", type="filepath")
structured_data = gr.JSON(label="Extracted Structured Data")
advice_output = gr.JSON(label="Generated Advice")
extract_button = gr.Button("Extract Data")
interpret_button = gr.Button("Get Advice")
extract_button.click(pdf_to_text, inputs=pdf_input, outputs=structured_data)
interpret_button.click(generate_advice, inputs=structured_data, outputs=advice_output)
app.launch()
# Run the app
if __name__ == "__main__":
main() |