goldrode commited on
Commit
5c92c5f
·
verified ·
1 Parent(s): 3ca0b14

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -29
app.py CHANGED
@@ -2,39 +2,52 @@ import gradio as gr
2
  import fitz # PyMuPDF
3
  import re
4
  import numpy as np
5
- from chromadb import Client
6
- from sklearn.metrics.pairwise import cosine_similarity
 
7
  import google.generativeai as gemini
8
 
9
  # Step 1: Configure Gemini API
10
  gemini.configure(api_key="AIzaSyCOxpeeq4qUMjZje8sNtwnQZiQ9xVShLd0")
11
 
12
- # Step 2: Set up ChromaDB for Knowledge Retrieval
13
- client = Client()
14
- collection = client.get_or_create_collection("medical_knowledge")
 
 
 
 
 
15
 
16
- # Load medical knowledge into ChromaDB (run once)
17
- def load_medical_knowledge():
18
- knowledge = [
19
- {"name": "hemoglobin_normal", "text": "Normal hemoglobin levels are 13.8-17.2 g/dL for men and 12.1-15.1 g/dL for women."},
20
- {"name": "hemoglobin_low", "text": "Low hemoglobin levels indicate anemia, causing fatigue and weakness."},
21
- {"name": "glucose_normal", "text": "Normal fasting blood glucose levels are 70-99 mg/dL."},
22
- {"name": "glucose_high", "text": "High glucose levels suggest diabetes or prediabetes and need further testing."},
23
- ]
24
- for item in knowledge:
25
- collection.add(documents=[item["text"]], metadatas={"name": item["name"]}, ids=[item["name"]])
26
- print("Medical knowledge loaded.")
27
 
28
- # Step 3: Extract Text from PDF
 
 
 
 
 
 
 
 
 
 
 
 
29
  def extract_text_from_pdf(pdf_file):
 
30
  text = ""
31
  with fitz.open(stream=pdf_file.read(), filetype="pdf") as pdf:
32
  for page in pdf:
33
  text += page.get_text()
34
  return text
35
 
36
- # Step 4: Parse Blood Test Results
37
  def parse_blood_test_results(text):
 
38
  results = {}
39
  hemoglobin_match = re.search(r'Hemoglobin:\s*(\d+\.\d+)\s*g/dL', text, re.IGNORECASE)
40
  glucose_match = re.search(r'Glucose:\s*(\d+)\s*mg/dL', text, re.IGNORECASE)
@@ -44,19 +57,15 @@ def parse_blood_test_results(text):
44
  results['Glucose'] = int(glucose_match.group(1))
45
  return results
46
 
47
- # Step 5: Retrieve Knowledge Dynamically from ChromaDB
48
- def retrieve_medical_knowledge(parameter):
49
- results = collection.query(query_texts=[parameter], n_results=1)
50
- return results['documents'][0] if results['documents'] else "No relevant knowledge found."
51
-
52
  # Step 6: Generate Personalized Advice
53
  def generate_advice(test_results):
 
54
  advice = {}
55
  for parameter, value in test_results.items():
56
- medical_knowledge = retrieve_medical_knowledge(parameter)
57
  prompt = (
58
- f"The patient's {parameter} level is {value}. {medical_knowledge} "
59
- "Provide a clear, concise health recommendation."
60
  )
61
  response = gemini.generate_text(prompt)
62
  advice[parameter] = response.result
@@ -64,6 +73,7 @@ def generate_advice(test_results):
64
 
65
  # Step 7: Main Function for Gradio Interface
66
  def analyze_blood_test(pdf_file):
 
67
  text = extract_text_from_pdf(pdf_file)
68
  test_results = parse_blood_test_results(text)
69
  if not test_results:
@@ -71,15 +81,14 @@ def analyze_blood_test(pdf_file):
71
  advice = generate_advice(test_results)
72
  return advice
73
 
74
- # Gradio interface
75
  iface = gr.Interface(
76
  fn=analyze_blood_test,
77
  inputs=gr.inputs.File(label="Upload Blood Test PDF"),
78
  outputs="json",
79
- title="Blood Test Analysis with Full RAG Implementation",
80
  description="Upload a PDF with blood test results to receive personalized health advice."
81
  )
82
 
83
  if __name__ == "__main__":
84
- load_medical_knowledge() # Run once to load knowledge into ChromaDB
85
  iface.launch()
 
2
  import fitz # PyMuPDF
3
  import re
4
  import numpy as np
5
+ import faiss
6
+ from sklearn.feature_extraction.text import TfidfVectorizer
7
+ from sklearn.preprocessing import normalize
8
  import google.generativeai as gemini
9
 
10
  # Step 1: Configure Gemini API
11
  gemini.configure(api_key="AIzaSyCOxpeeq4qUMjZje8sNtwnQZiQ9xVShLd0")
12
 
13
+ # Step 2: Define Medical Knowledge Base
14
+ medical_knowledge = [
15
+ "Normal hemoglobin levels are 13.8 to 17.2 g/dL for men and 12.1 to 15.1 g/dL for women.",
16
+ "Low hemoglobin levels indicate anemia, causing fatigue and weakness.",
17
+ "High hemoglobin levels may suggest polycythemia or dehydration.",
18
+ "Normal fasting blood glucose levels are 70 to 99 mg/dL.",
19
+ "Elevated glucose levels indicate diabetes or prediabetes and require further testing.",
20
+ ]
21
 
22
+ # Step 3: Build FAISS Index
23
+ vectorizer = TfidfVectorizer()
24
+ knowledge_vectors = vectorizer.fit_transform(medical_knowledge).toarray()
25
+ knowledge_vectors = normalize(knowledge_vectors) # Normalize vectors for cosine similarity
 
 
 
 
 
 
 
26
 
27
+ # Initialize FAISS Index
28
+ dimension = knowledge_vectors.shape[1]
29
+ faiss_index = faiss.IndexFlatL2(dimension)
30
+ faiss_index.add(knowledge_vectors)
31
+
32
+ def retrieve_medical_knowledge(parameter):
33
+ """Retrieve relevant knowledge using FAISS."""
34
+ query_vector = vectorizer.transform([parameter]).toarray()
35
+ query_vector = normalize(query_vector) # Normalize the query vector
36
+ _, indices = faiss_index.search(query_vector, 1) # Retrieve top 1 result
37
+ return medical_knowledge[indices[0][0]]
38
+
39
+ # Step 4: Extract Text from PDF
40
  def extract_text_from_pdf(pdf_file):
41
+ """Extract text from the uploaded PDF file."""
42
  text = ""
43
  with fitz.open(stream=pdf_file.read(), filetype="pdf") as pdf:
44
  for page in pdf:
45
  text += page.get_text()
46
  return text
47
 
48
+ # Step 5: Parse Blood Test Results
49
  def parse_blood_test_results(text):
50
+ """Parse blood test results from the extracted text."""
51
  results = {}
52
  hemoglobin_match = re.search(r'Hemoglobin:\s*(\d+\.\d+)\s*g/dL', text, re.IGNORECASE)
53
  glucose_match = re.search(r'Glucose:\s*(\d+)\s*mg/dL', text, re.IGNORECASE)
 
57
  results['Glucose'] = int(glucose_match.group(1))
58
  return results
59
 
 
 
 
 
 
60
  # Step 6: Generate Personalized Advice
61
  def generate_advice(test_results):
62
+ """Generate personalized health advice using Gemini API."""
63
  advice = {}
64
  for parameter, value in test_results.items():
65
+ medical_info = retrieve_medical_knowledge(parameter)
66
  prompt = (
67
+ f"The patient's {parameter} level is {value}. {medical_info} "
68
+ "Provide clear, concise health advice."
69
  )
70
  response = gemini.generate_text(prompt)
71
  advice[parameter] = response.result
 
73
 
74
  # Step 7: Main Function for Gradio Interface
75
  def analyze_blood_test(pdf_file):
76
+ """Main function to analyze the uploaded blood test PDF."""
77
  text = extract_text_from_pdf(pdf_file)
78
  test_results = parse_blood_test_results(text)
79
  if not test_results:
 
81
  advice = generate_advice(test_results)
82
  return advice
83
 
84
+ # Gradio Interface
85
  iface = gr.Interface(
86
  fn=analyze_blood_test,
87
  inputs=gr.inputs.File(label="Upload Blood Test PDF"),
88
  outputs="json",
89
+ title="Blood Test Analysis with RAG and Gemini (FAISS)",
90
  description="Upload a PDF with blood test results to receive personalized health advice."
91
  )
92
 
93
  if __name__ == "__main__":
 
94
  iface.launch()