goldrode commited on
Commit
13747c8
·
verified ·
1 Parent(s): dd0af55

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -27
app.py CHANGED
@@ -1,75 +1,97 @@
1
  import json
2
  import faiss
3
  from sentence_transformers import SentenceTransformer
4
- from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
5
- import gradio as gr
6
 
7
  # Load the knowledge base
8
  with open("knowledge_base.json", "r") as file:
9
  kb = json.load(file)
10
 
 
11
  # Initialize the embedding model
12
  embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
13
-
14
- # Generate embeddings for the knowledge base
15
  kb_texts = [f"{item['Component']} {item['Range']} {item['Advice']}" for item in kb]
16
  kb_embeddings = embedding_model.encode(kb_texts)
17
 
18
- # Create a FAISS index
19
  index = faiss.IndexFlatL2(kb_embeddings.shape[1])
20
  index.add(kb_embeddings)
21
 
22
- # Load Hugging Face LLM (flan-t5 model as an example)
23
- model_name = "google/flan-t5-large"
24
- tokenizer = AutoTokenizer.from_pretrained(model_name)
25
- llm = AutoModelForSeq2SeqLM.from_pretrained(model_name)
26
- text_generator = pipeline("text2text-generation", model=llm, tokenizer=tokenizer)
27
 
 
 
28
 
29
- # Function to generate advice using RAG
30
  def generate_advice(extracted_data):
31
  try:
32
  recommendations = []
33
  for item in extracted_data:
 
34
  query = f"{item['Component']} {item['Status']}"
35
  query_embedding = embedding_model.encode([query])
36
-
37
- # Retrieve nearest knowledge base entry
38
  _, idx = index.search(query_embedding, 1)
39
  best_match = kb[idx[0][0]]
40
 
41
- # Use Hugging Face LLM to generate detailed advice
 
42
  prompt = f"""
43
  Lab Test: {item['Component']}
44
  Value: {item['Value']} {item['Units']}
45
  Status: {item['Status']}
46
 
47
- Advice based on guidelines:
48
- {best_match['Advice']}
 
49
  """
50
- response = text_generator(prompt, max_length=150, num_return_sequences=1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  recommendations.append({
52
  "Component": item["Component"],
53
- "Advice": response[0]["generated_text"]
54
  })
55
  return recommendations
56
  except Exception as e:
57
  return f"Error: {e}"
58
 
 
 
 
 
59
 
60
- # Function to extract structured data from the PDF text
61
  def pdf_to_text(pdf_file):
62
  try:
63
- from PyPDF2 import PdfReader
64
-
65
  reader = PdfReader(pdf_file.name)
66
  text = ""
67
  for page in reader.pages:
68
  text += page.extract_text()
69
 
70
  # Regex to match lab results (e.g., WBC 4.4 4.8 10.8 K/ul Low)
71
- import re
72
-
73
  pattern = r"(\w+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\w/%]+)\s+(\w+)"
74
  matches = re.findall(pattern, text)
75
 
@@ -85,11 +107,10 @@ def pdf_to_text(pdf_file):
85
  except Exception as e:
86
  return f"Error: {e}"
87
 
88
-
89
- # Gradio Interface with Hugging Face LLM Integration
90
  def main():
91
  with gr.Blocks() as app:
92
- gr.Markdown("## Medical Test Interpreter with RAG (Hugging Face)")
93
  with gr.Row():
94
  pdf_input = gr.File(label="Upload PDF", type="filepath")
95
  structured_data = gr.JSON(label="Extracted Structured Data")
@@ -102,7 +123,6 @@ def main():
102
 
103
  app.launch()
104
 
105
-
106
  # Run the Gradio app
107
  if __name__ == "__main__":
108
  main()
 
1
  import json
2
  import faiss
3
  from sentence_transformers import SentenceTransformer
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM
 
5
 
6
  # Load the knowledge base
7
  with open("knowledge_base.json", "r") as file:
8
  kb = json.load(file)
9
 
10
+ os.system("huggingface-cli login")
11
  # Initialize the embedding model
12
  embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
 
 
13
  kb_texts = [f"{item['Component']} {item['Range']} {item['Advice']}" for item in kb]
14
  kb_embeddings = embedding_model.encode(kb_texts)
15
 
16
+ # Create FAISS index
17
  index = faiss.IndexFlatL2(kb_embeddings.shape[1])
18
  index.add(kb_embeddings)
19
 
20
+ # Load the Hugging Face LLM
21
+ llama_model_name = "meta-llama/Llama-3.2-3B-Instruct"
22
+ API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
 
 
23
 
24
+ tokenizer = AutoTokenizer.from_pretrained(llama_model_name, token=API_TOKEN)
25
+ llm = AutoModelForCausalLM.from_pretrained(llama_model_name, token=API_TOKEN)
26
 
27
+ # Generate advice using RAG
28
  def generate_advice(extracted_data):
29
  try:
30
  recommendations = []
31
  for item in extracted_data:
32
+ # Query FAISS for the closest match
33
  query = f"{item['Component']} {item['Status']}"
34
  query_embedding = embedding_model.encode([query])
 
 
35
  _, idx = index.search(query_embedding, 1)
36
  best_match = kb[idx[0][0]]
37
 
38
+ # Prepare the LLM prompt
39
+ role = "Medical expert providing advice based on lab results."
40
  prompt = f"""
41
  Lab Test: {item['Component']}
42
  Value: {item['Value']} {item['Units']}
43
  Status: {item['Status']}
44
 
45
+ Medical Guidelines: {best_match['Advice']}
46
+
47
+ Provide additional insights or recommendations.
48
  """
49
+
50
+ message_yours = [
51
+ {
52
+ "role": "system",
53
+ "content": role,
54
+ },
55
+ {"role": "user", "content": prompt},
56
+ ]
57
+
58
+ input_text_with_your_role = tokenizer.apply_chat_template(
59
+ message_yours,
60
+ tokenize=False,
61
+ add_generation_prompt=True,
62
+ return_tensors="pt",
63
+ )
64
+
65
+ # Generate response
66
+ output = llm.generate(
67
+ **input_text_with_your_role,
68
+ max_length=150,
69
+ num_return_sequences=1
70
+ )
71
+ advice = tokenizer.decode(output[0], skip_special_tokens=True).strip()
72
+
73
  recommendations.append({
74
  "Component": item["Component"],
75
+ "Advice": advice
76
  })
77
  return recommendations
78
  except Exception as e:
79
  return f"Error: {e}"
80
 
81
+ # Gradio app with LLM integration
82
+ import gradio as gr
83
+ from PyPDF2 import PdfReader
84
+ import re
85
 
86
+ # Function to extract structured data from PDF
87
  def pdf_to_text(pdf_file):
88
  try:
 
 
89
  reader = PdfReader(pdf_file.name)
90
  text = ""
91
  for page in reader.pages:
92
  text += page.extract_text()
93
 
94
  # Regex to match lab results (e.g., WBC 4.4 4.8 10.8 K/ul Low)
 
 
95
  pattern = r"(\w+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\w/%]+)\s+(\w+)"
96
  matches = re.findall(pattern, text)
97
 
 
107
  except Exception as e:
108
  return f"Error: {e}"
109
 
110
+ # Gradio Interface
 
111
  def main():
112
  with gr.Blocks() as app:
113
+ gr.Markdown("## Medical Test Interpreter with RAG and LLM")
114
  with gr.Row():
115
  pdf_input = gr.File(label="Upload PDF", type="filepath")
116
  structured_data = gr.JSON(label="Extracted Structured Data")
 
123
 
124
  app.launch()
125
 
 
126
  # Run the Gradio app
127
  if __name__ == "__main__":
128
  main()