goldrode commited on
Commit
4971391
·
verified ·
1 Parent(s): 8a83bd4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -33
app.py CHANGED
@@ -1,66 +1,68 @@
1
  import gradio as gr
2
- import PyPDF2
3
  import re
4
- import chromadb
 
 
5
  import google.generativeai as gemini
6
- from langchain.embeddings.openai import OpenAIEmbeddings
7
- from langchain.vectorstores import Chroma
8
- from langchain_openai import OpenAIEmbeddings
9
-
10
 
11
  # Step 1: Configure Gemini API
12
  gemini.configure(api_key="AIzaSyCOxpeeq4qUMjZje8sNtwnQZiQ9xVShLd0")
13
 
14
- # Step 2: Initialize ChromaDB and Medical Knowledge Base
15
- embeddings = OpenAIEmbeddings()
16
- chroma_db = Chroma(persist_directory="./chroma_db", embedding_function=embeddings)
17
 
 
18
  def load_medical_knowledge():
19
- """Load medical knowledge into ChromaDB (only run once)."""
20
- documents = [
21
- "Normal hemoglobin levels are between 12-16 g/dL for women and 13-17 g/dL for men.",
22
- "Low hemoglobin levels can indicate anemia, which may cause fatigue and weakness.",
23
- "High glucose levels can indicate diabetes or prediabetes.",
24
- "Cholesterol above 200 mg/dL is considered high and increases cardiovascular risks."
25
  ]
26
- chroma_db.add_texts(documents)
 
27
  print("Medical knowledge loaded.")
28
 
29
  # Step 3: Extract Text from PDF
30
  def extract_text_from_pdf(pdf_file):
31
- reader = PyPDF2.PdfFileReader(pdf_file)
32
- text = ''
33
- for page_num in range(reader.numPages):
34
- text += reader.getPage(page_num).extract_text()
35
  return text
36
 
37
  # Step 4: Parse Blood Test Results
38
  def parse_blood_test_results(text):
39
  results = {}
40
- # Example regex for Hemoglobin
41
- hemoglobin_match = re.search(r"Hemoglobin:\s*(\d+\.\d+)\s*g/dL", text)
42
  if hemoglobin_match:
43
  results['Hemoglobin'] = float(hemoglobin_match.group(1))
44
- # Add more parameters like Glucose, Cholesterol, etc.
 
45
  return results
46
 
47
- # Step 5: Retrieve Medical Knowledge from ChromaDB
48
  def retrieve_medical_knowledge(parameter):
49
- results = chroma_db.similarity_search(parameter, k=1) # Retrieve most relevant knowledge
50
- return results[0].page_content if results else "No relevant knowledge found."
51
 
52
- # Step 6: Generate Advice using Gemini API
53
  def generate_advice(test_results):
54
  advice = {}
55
  for parameter, value in test_results.items():
56
  medical_knowledge = retrieve_medical_knowledge(parameter)
57
- prompt = (f"Patient's {parameter} level is {value}. {medical_knowledge}."
58
- " Provide clear, personalized health advice.")
 
 
59
  response = gemini.generate_text(prompt)
60
  advice[parameter] = response.result
61
  return advice
62
 
63
- # Step 7: Main Function for Gradio
64
  def analyze_blood_test(pdf_file):
65
  text = extract_text_from_pdf(pdf_file)
66
  test_results = parse_blood_test_results(text)
@@ -69,15 +71,15 @@ def analyze_blood_test(pdf_file):
69
  advice = generate_advice(test_results)
70
  return advice
71
 
72
- # Step 8: Gradio Interface
73
  iface = gr.Interface(
74
  fn=analyze_blood_test,
75
  inputs=gr.inputs.File(label="Upload Blood Test PDF"),
76
  outputs="json",
77
- title="Blood Test Analysis with RAG and Gemini",
78
  description="Upload a PDF with blood test results to receive personalized health advice."
79
  )
80
 
81
  if __name__ == "__main__":
82
- load_medical_knowledge() # Run once to populate the vector store
83
  iface.launch()
 
1
  import gradio as gr
2
+ import fitz # PyMuPDF
3
  import re
4
+ import numpy as np
5
+ from chromadb import Client
6
+ from sklearn.metrics.pairwise import cosine_similarity
7
  import google.generativeai as gemini
 
 
 
 
8
 
9
  # Step 1: Configure Gemini API
10
  gemini.configure(api_key="AIzaSyCOxpeeq4qUMjZje8sNtwnQZiQ9xVShLd0")
11
 
12
+ # Step 2: Set up ChromaDB for Knowledge Retrieval
13
+ client = Client()
14
+ collection = client.get_or_create_collection("medical_knowledge")
15
 
16
+ # Load medical knowledge into ChromaDB (run once)
17
  def load_medical_knowledge():
18
+ knowledge = [
19
+ {"name": "hemoglobin_normal", "text": "Normal hemoglobin levels are 13.8-17.2 g/dL for men and 12.1-15.1 g/dL for women."},
20
+ {"name": "hemoglobin_low", "text": "Low hemoglobin levels indicate anemia, causing fatigue and weakness."},
21
+ {"name": "glucose_normal", "text": "Normal fasting blood glucose levels are 70-99 mg/dL."},
22
+ {"name": "glucose_high", "text": "High glucose levels suggest diabetes or prediabetes and need further testing."},
 
23
  ]
24
+ for item in knowledge:
25
+ collection.add(documents=[item["text"]], metadatas={"name": item["name"]}, ids=[item["name"]])
26
  print("Medical knowledge loaded.")
27
 
28
  # Step 3: Extract Text from PDF
29
  def extract_text_from_pdf(pdf_file):
30
+ text = ""
31
+ with fitz.open(stream=pdf_file.read(), filetype="pdf") as pdf:
32
+ for page in pdf:
33
+ text += page.get_text()
34
  return text
35
 
36
  # Step 4: Parse Blood Test Results
37
  def parse_blood_test_results(text):
38
  results = {}
39
+ hemoglobin_match = re.search(r'Hemoglobin:\s*(\d+\.\d+)\s*g/dL', text, re.IGNORECASE)
40
+ glucose_match = re.search(r'Glucose:\s*(\d+)\s*mg/dL', text, re.IGNORECASE)
41
  if hemoglobin_match:
42
  results['Hemoglobin'] = float(hemoglobin_match.group(1))
43
+ if glucose_match:
44
+ results['Glucose'] = int(glucose_match.group(1))
45
  return results
46
 
47
+ # Step 5: Retrieve Knowledge Dynamically from ChromaDB
48
  def retrieve_medical_knowledge(parameter):
49
+ results = collection.query(query_texts=[parameter], n_results=1)
50
+ return results['documents'][0] if results['documents'] else "No relevant knowledge found."
51
 
52
+ # Step 6: Generate Personalized Advice
53
  def generate_advice(test_results):
54
  advice = {}
55
  for parameter, value in test_results.items():
56
  medical_knowledge = retrieve_medical_knowledge(parameter)
57
+ prompt = (
58
+ f"The patient's {parameter} level is {value}. {medical_knowledge} "
59
+ "Provide a clear, concise health recommendation."
60
+ )
61
  response = gemini.generate_text(prompt)
62
  advice[parameter] = response.result
63
  return advice
64
 
65
+ # Step 7: Main Function for Gradio Interface
66
  def analyze_blood_test(pdf_file):
67
  text = extract_text_from_pdf(pdf_file)
68
  test_results = parse_blood_test_results(text)
 
71
  advice = generate_advice(test_results)
72
  return advice
73
 
74
+ # Gradio interface
75
  iface = gr.Interface(
76
  fn=analyze_blood_test,
77
  inputs=gr.inputs.File(label="Upload Blood Test PDF"),
78
  outputs="json",
79
+ title="Blood Test Analysis with Full RAG Implementation",
80
  description="Upload a PDF with blood test results to receive personalized health advice."
81
  )
82
 
83
  if __name__ == "__main__":
84
+ load_medical_knowledge() # Run once to load knowledge into ChromaDB
85
  iface.launch()