goldrode commited on
Commit
dd0af55
·
verified ·
1 Parent(s): 7ac2672

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -8
app.py CHANGED
@@ -1,16 +1,75 @@
 
 
 
 
1
  import gradio as gr
2
- from PyPDF2 import PdfReader
3
- import re
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  # Function to extract structured data from the PDF text
6
  def pdf_to_text(pdf_file):
7
  try:
 
 
8
  reader = PdfReader(pdf_file.name)
9
  text = ""
10
  for page in reader.pages:
11
  text += page.extract_text()
12
 
13
  # Regex to match lab results (e.g., WBC 4.4 4.8 10.8 K/ul Low)
 
 
14
  pattern = r"(\w+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\w/%]+)\s+(\w+)"
15
  matches = re.findall(pattern, text)
16
 
@@ -27,19 +86,23 @@ def pdf_to_text(pdf_file):
27
  return f"Error: {e}"
28
 
29
 
30
- # Gradio Interface
31
  def main():
32
  with gr.Blocks() as app:
33
- gr.Markdown("## PDF to Structured Data")
34
  with gr.Row():
35
  pdf_input = gr.File(label="Upload PDF", type="filepath")
36
- output_text = gr.JSON(label="Extracted Structured Data")
37
- convert_button = gr.Button("Extract Data")
38
- convert_button.click(pdf_to_text, inputs=pdf_input, outputs=output_text)
 
 
 
 
39
 
40
  app.launch()
41
 
 
42
  # Run the Gradio app
43
  if __name__ == "__main__":
44
  main()
45
-
 
1
+ import json
2
+ import faiss
3
+ from sentence_transformers import SentenceTransformer
4
+ from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
5
  import gradio as gr
6
+
7
+ # Load the knowledge base
8
+ with open("knowledge_base.json", "r") as file:
9
+ kb = json.load(file)
10
+
11
+ # Initialize the embedding model
12
+ embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
13
+
14
+ # Generate embeddings for the knowledge base
15
+ kb_texts = [f"{item['Component']} {item['Range']} {item['Advice']}" for item in kb]
16
+ kb_embeddings = embedding_model.encode(kb_texts)
17
+
18
+ # Create a FAISS index
19
+ index = faiss.IndexFlatL2(kb_embeddings.shape[1])
20
+ index.add(kb_embeddings)
21
+
22
+ # Load Hugging Face LLM (flan-t5 model as an example)
23
+ model_name = "google/flan-t5-large"
24
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
25
+ llm = AutoModelForSeq2SeqLM.from_pretrained(model_name)
26
+ text_generator = pipeline("text2text-generation", model=llm, tokenizer=tokenizer)
27
+
28
+
29
+ # Function to generate advice using RAG
30
+ def generate_advice(extracted_data):
31
+ try:
32
+ recommendations = []
33
+ for item in extracted_data:
34
+ query = f"{item['Component']} {item['Status']}"
35
+ query_embedding = embedding_model.encode([query])
36
+
37
+ # Retrieve nearest knowledge base entry
38
+ _, idx = index.search(query_embedding, 1)
39
+ best_match = kb[idx[0][0]]
40
+
41
+ # Use Hugging Face LLM to generate detailed advice
42
+ prompt = f"""
43
+ Lab Test: {item['Component']}
44
+ Value: {item['Value']} {item['Units']}
45
+ Status: {item['Status']}
46
+
47
+ Advice based on guidelines:
48
+ {best_match['Advice']}
49
+ """
50
+ response = text_generator(prompt, max_length=150, num_return_sequences=1)
51
+ recommendations.append({
52
+ "Component": item["Component"],
53
+ "Advice": response[0]["generated_text"]
54
+ })
55
+ return recommendations
56
+ except Exception as e:
57
+ return f"Error: {e}"
58
+
59
 
60
  # Function to extract structured data from the PDF text
61
  def pdf_to_text(pdf_file):
62
  try:
63
+ from PyPDF2 import PdfReader
64
+
65
  reader = PdfReader(pdf_file.name)
66
  text = ""
67
  for page in reader.pages:
68
  text += page.extract_text()
69
 
70
  # Regex to match lab results (e.g., WBC 4.4 4.8 10.8 K/ul Low)
71
+ import re
72
+
73
  pattern = r"(\w+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\w/%]+)\s+(\w+)"
74
  matches = re.findall(pattern, text)
75
 
 
86
  return f"Error: {e}"
87
 
88
 
89
+ # Gradio Interface with Hugging Face LLM Integration
90
  def main():
91
  with gr.Blocks() as app:
92
+ gr.Markdown("## Medical Test Interpreter with RAG (Hugging Face)")
93
  with gr.Row():
94
  pdf_input = gr.File(label="Upload PDF", type="filepath")
95
+ structured_data = gr.JSON(label="Extracted Structured Data")
96
+ advice_output = gr.JSON(label="Generated Advice")
97
+ extract_button = gr.Button("Extract Data")
98
+ interpret_button = gr.Button("Get Advice")
99
+
100
+ extract_button.click(pdf_to_text, inputs=pdf_input, outputs=structured_data)
101
+ interpret_button.click(generate_advice, inputs=structured_data, outputs=advice_output)
102
 
103
  app.launch()
104
 
105
+
106
  # Run the Gradio app
107
  if __name__ == "__main__":
108
  main()