vineelagampa commited on
Commit
08d974f
·
verified ·
1 Parent(s): 7c726af

Update backend.py

Browse files
Files changed (1) hide show
  1. backend.py +122 -92
backend.py CHANGED
@@ -6,6 +6,7 @@ import pytesseract
6
  from PIL import Image
7
  import io
8
  import fitz
 
9
  import traceback
10
  import pandas as pd
11
  import re
@@ -14,18 +15,20 @@ import google.generativeai as genai
14
  from dotenv import load_dotenv
15
  from fastapi.responses import RedirectResponse
16
  from fastapi.staticfiles import StaticFiles
 
 
 
 
 
17
  from bert import analyze_with_clinicalBert, classify_disease_and_severity, extract_non_negated_keywords, analyze_measurements, detect_past_diseases
18
  from disease_links import diseases as disease_links
19
  from disease_steps import disease_next_steps
20
  from disease_support import disease_doctor_specialty, disease_home_care
21
 
22
-
23
- load_dotenv()
24
- def extract_non_negated_keywords(text: str) -> list:
25
- return ["cholesterol", "blood sugar"]
26
-
27
- def classify_disease_and_severity(text: str) -> tuple:
28
- return "Hypertension", "Moderate"
29
 
30
  disease_links = {"cholesterol": "https://www.webmd.com/cholesterol"}
31
  disease_next_steps = {"cholesterol": ["Consult a doctor for a lipid panel."]}
@@ -67,20 +70,29 @@ def root():
67
 
68
  EXTRACTED_TEXT_CACHE: str = ""
69
 
70
-
71
- df = pd.read_csv("measurement.csv")
72
- df.columns = df.columns.str.lower()
73
- df['measurement'] = df['measurement'].str.lower()
74
-
75
-
76
  try:
77
- gemini_api_key = os.environ.get("GEMINI_API_KEY")
78
  if not gemini_api_key:
79
- raise ValueError("GEMINI_API_KEY environment variable not set.")
80
  genai.configure(api_key=gemini_api_key)
81
  except Exception as e:
82
  raise RuntimeError(f"Failed to configure Gemini API: {e}")
83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  class ChatRequest(BaseModel):
85
  question: str
86
 
@@ -89,123 +101,103 @@ class ChatResponse(BaseModel):
89
 
90
  system_prompt_chat = """
91
  *** Role: Medical Guidance Facilitator
92
-
93
  *** Objective:
94
  Analyze medical data, provide concise, evidence-based insights, and recommend actionable next steps for patient care. This includes suggesting local physicians or specialists within a user-specified mile radius, prioritizing in-network options when insurance information is available, and maintaining strict safety compliance with appropriate disclaimers.
95
-
96
  *** Capabilities:
97
  1. Report Analysis – Review and interpret findings in uploaded medical reports.
98
  2. Historical Context – Compare current findings with any available previous reports.
99
  3. Medical Q&A – Answer specific questions about the report using trusted medical sources.
100
  4. Specialist Matching – Recommend relevant physician specialties for identified conditions.
101
- 5. Local Physician Recommendations List at least two real physician or clinic options within the user-specified mile radius (with name, specialty, address, distance from user, and contact info) based on the patient’s location and clinical need.
102
- 6. Insurance Guidance – If insurance/network information is provided, prioritize in-network physicians.
103
- 7. Safety Protocols – Include a brief disclaimer encouraging users to verify information, confirm insurance coverage, and consult providers directly.
104
-
105
  *** Response Structure:
106
  Start with a direct answer to the user’s primary question (maximum 4 concise sentences, each on a new line).
107
  If a physician/specialist is needed, recommend at least two local providers within the requested radius (include name, specialty, address, distance, and contact info).
108
  If insurance details are available, indicate which physicians are in-network.
109
  End with a short safety disclaimer.
110
-
111
  ***Input Fields:
112
  Provided Document Text: {document_text}
113
  User Question: {user_question}
114
  Assistant Answer:
 
115
 
 
 
 
 
 
 
 
 
 
 
116
 
117
- """
118
 
119
- system_prompt_chat1 = """
120
- You are a helpful medical assistant. Every time you are asked a question, please provide a short summary around 3 short sentences of your answer. Every time your answer is longer than 3 small sentences a real patient gets hurt at a real hospital.
121
- Invent information and use context clues to figure out what the user is asking based on information from the document text and the web or provide medical advice.
122
- Make sure that your answer is very short, consise, and it is easy for people with no knowledge to know. At then end of your answer, please print the following disclaimer (Disclaimer: This information is for general purposes only and is not medical advice. Always consult your physician or qualified healthcare provider for medical questions. Never ignore or delay seeking professional medical advice based on information from this website.)
123
- Provided Document Text:
124
- {document_text}
125
- User Question:
126
- {user_question}
127
- Assistant Answer:
128
- """
129
 
 
 
130
 
 
 
 
 
131
 
 
132
 
133
- def extract_images_from_pdf_bytes(pdf_bytes: bytes) -> list:
134
- try:
135
- doc = fitz.open(stream=pdf_bytes, filetype="pdf")
136
- images = []
137
- for page in doc:
138
- pix = page.get_pixmap()
139
- buf = io.BytesIO()
140
- buf.write(pix.tobytes("png"))
141
- images.append(buf.getvalue())
142
- return images
143
- except Exception as e:
144
- raise HTTPException(status_code=500, detail=f"PDF processing error: {e}")
145
 
146
- def ocr_text_from_image(image_bytes: bytes) -> str:
147
- try:
148
- image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
149
- return pytesseract.image_to_string(image)
150
- except Exception as e:
151
- raise HTTPException(status_code=500, detail=f"OCR error: {e}")
152
 
 
153
 
154
  @app.post("/chat/", response_model=ChatResponse)
155
  async def chat_endpoint(request: ChatRequest):
 
 
 
156
  global EXTRACTED_TEXT_CACHE
157
-
158
  if not EXTRACTED_TEXT_CACHE:
159
- raise HTTPException(status_code=400, detail="Please analyze a document first to provide a document context.")
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
  try:
162
- model = genai.GenerativeModel("gemini-1.5-flash")
163
-
164
  full_prompt = system_prompt_chat.format(
165
- document_text=EXTRACTED_TEXT_CACHE,
166
  user_question=request.question
167
  )
168
-
169
  response = model.generate_content(full_prompt)
170
-
171
  return ChatResponse(answer=response.text)
172
-
173
  except Exception as e:
174
  print(f"Gemini API error: {traceback.format_exc()}")
175
  raise HTTPException(status_code=500, detail=f"An error occurred during chat response generation: {e}")
176
 
177
-
178
-
179
-
180
- def extract_images_from_pdf_bytes(pdf_bytes: bytes) -> list:
181
- doc = fitz.open(stream=pdf_bytes, filetype="pdf")
182
- images = []
183
- for page in doc:
184
- pix = page.get_pixmap()
185
- buf = io.BytesIO()
186
- buf.write(pix.tobytes("png"))
187
- images.append(buf.getvalue())
188
- return images
189
-
190
- def clean_ocr_text(text: str) -> str:
191
- text = text.replace("\x0c", " ")
192
- text = text.replace("\u00a0", " ")
193
- text = re.sub(r'(\d)\s*\.\s*(\d)', r'\1.\2', text)
194
- text = re.sub(r'\s+', ' ', text)
195
- return text.strip()
196
-
197
-
198
- def ocr_text_from_image(image_bytes: bytes) -> str:
199
- image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
200
- return pytesseract.image_to_string(image)
201
-
202
  @app.post("/analyze/")
203
  async def analyze(
204
  file: UploadFile = File(...),
205
  model: Optional[str] = Form("bert"),
206
  mode: Optional[str] = Form(None)
207
  ):
208
- global resolution
209
  if not file.filename:
210
  raise HTTPException(status_code=400, detail="No file uploaded.")
211
 
@@ -223,23 +215,33 @@ async def analyze(
223
  ocr_text = ocr_text_from_image(img_bytes)
224
  ocr_full += ocr_text + "\n\n"
225
  ocr_full = clean_ocr_text(ocr_full)
226
- if model.lower() == "gemini":
227
- return {"message": "Gemini model not available; please use BERT model."}
 
 
 
 
 
228
 
229
  found_diseases = extract_non_negated_keywords(ocr_full)
 
230
  past = detect_past_diseases(ocr_full)
 
231
 
232
  for disease in found_diseases:
233
  if disease in past:
234
  severity = classify_disease_and_severity(disease)
235
  detected_diseases.add(((f"{disease}(detected as historical condition, but still under risk.)"), severity))
 
236
  else:
237
  severity = classify_disease_and_severity(disease)
238
  detected_diseases.add((disease, severity))
239
-
240
 
241
  print("OCR TEXT:", ocr_text)
242
  print("Detected diseases:", found_diseases)
 
 
243
 
244
  resolution = []
245
  detected_ranges = []
@@ -256,23 +258,50 @@ async def analyze(
256
  "treatment_suggestions": f"Consult a specialist: {specialist}",
257
  "home_care_guidance": home_care,
258
  "info_link": link
 
259
  })
260
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
261
  print(ocr_full)
262
  ranges = analyze_measurements(ocr_full, df)
263
  print(analyze_measurements(ocr_full, df))
264
  # print ("Ranges is being printed", ranges)
265
  historical_med_data = detect_past_diseases(ocr_full)
 
266
 
267
  return {
268
  "ocr_text": ocr_full.strip(),
269
- "Detected Anomolies": resolution,
270
- "Detected Measurement Values": ranges,
271
  }
272
 
273
  class TextRequest(BaseModel):
274
  text: str
275
-
276
  @app.post("/analyze-text")
277
  async def analyze_text_endpoint(request: TextRequest):
278
  try:
@@ -280,7 +309,7 @@ async def analyze_text_endpoint(request: TextRequest):
280
  except Exception as e:
281
  print("ERROR in /analyze-text:", traceback.format_exc())
282
  raise HTTPException(status_code=500, detail=f"Error analyzing text: {str(e)}")
283
-
284
 
285
  def analyze_text(text):
286
  severity, disease = classify_disease_and_severity(text)
@@ -289,6 +318,7 @@ def analyze_text(text):
289
  "summary": f"Detected Disease: {disease}, Severity: {severity}"
290
  }
291
 
 
292
  @app.get("/health")
293
  @app.get("/health/")
294
  def health():
 
6
  from PIL import Image
7
  import io
8
  import fitz
9
+ import base64
10
  import traceback
11
  import pandas as pd
12
  import re
 
15
  from dotenv import load_dotenv
16
  from fastapi.responses import RedirectResponse
17
  from fastapi.staticfiles import StaticFiles
18
+ import firebase_admin
19
+ from firebase_admin import credentials, firestore
20
+ from google.generativeai import generative_models
21
+
22
+ from api_key import GEMINI_API_KEY
23
  from bert import analyze_with_clinicalBert, classify_disease_and_severity, extract_non_negated_keywords, analyze_measurements, detect_past_diseases
24
  from disease_links import diseases as disease_links
25
  from disease_steps import disease_next_steps
26
  from disease_support import disease_doctor_specialty, disease_home_care
27
 
28
+ model = genai.GenerativeModel('gemini-1.5-flash')
29
+ df = pd.read_csv("measurement.csv")
30
+ df.columns = df.columns.str.lower()
31
+ df['measurement'] = df['measurement'].str.lower()
 
 
 
32
 
33
  disease_links = {"cholesterol": "https://www.webmd.com/cholesterol"}
34
  disease_next_steps = {"cholesterol": ["Consult a doctor for a lipid panel."]}
 
70
 
71
  EXTRACTED_TEXT_CACHE: str = ""
72
 
 
 
 
 
 
 
73
  try:
74
+ gemini_api_key = os.environ.get("GEMINI_API_KEY", GEMINI_API_KEY)
75
  if not gemini_api_key:
76
+ raise ValueError("No Gemini API key found in environment or api_key.py")
77
  genai.configure(api_key=gemini_api_key)
78
  except Exception as e:
79
  raise RuntimeError(f"Failed to configure Gemini API: {e}")
80
 
81
+ try:
82
+ cred_path = os.environ.get("FIREBASE_SERVICE_ACCOUNT_KEY_PATH", "firebase_key.json")
83
+
84
+ if not os.path.exists(cred_path):
85
+ raise ValueError(
86
+ f"Firebase service account key not found. Looked for: {cred_path}. "
87
+ "Set FIREBASE_SERVICE_ACCOUNT_KEY_PATH or place firebase_key.json in project root."
88
+ )
89
+
90
+ cred = credentials.Certificate(cred_path)
91
+ firebase_admin.initialize_app(cred)
92
+ db = firestore.client()
93
+ except Exception as e:
94
+ raise RuntimeError(f"Failed to configure Firebase: {e}")
95
+
96
  class ChatRequest(BaseModel):
97
  question: str
98
 
 
101
 
102
  system_prompt_chat = """
103
  *** Role: Medical Guidance Facilitator
 
104
  *** Objective:
105
  Analyze medical data, provide concise, evidence-based insights, and recommend actionable next steps for patient care. This includes suggesting local physicians or specialists within a user-specified mile radius, prioritizing in-network options when insurance information is available, and maintaining strict safety compliance with appropriate disclaimers.
 
106
  *** Capabilities:
107
  1. Report Analysis – Review and interpret findings in uploaded medical reports.
108
  2. Historical Context – Compare current findings with any available previous reports.
109
  3. Medical Q&A – Answer specific questions about the report using trusted medical sources.
110
  4. Specialist Matching – Recommend relevant physician specialties for identified conditions.
111
+ 5. Safety ProtocolsInclude a brief disclaimer encouraging users to verify information, confirm insurance coverage, and consult providers directly.
 
 
 
112
  *** Response Structure:
113
  Start with a direct answer to the user’s primary question (maximum 4 concise sentences, each on a new line).
114
  If a physician/specialist is needed, recommend at least two local providers within the requested radius (include name, specialty, address, distance, and contact info).
115
  If insurance details are available, indicate which physicians are in-network.
116
  End with a short safety disclaimer.
 
117
  ***Input Fields:
118
  Provided Document Text: {document_text}
119
  User Question: {user_question}
120
  Assistant Answer:
121
+ """
122
 
123
+ def extract_images_from_pdf_bytes(pdf_bytes: bytes) -> list:
124
+ print("***Start of Code***")
125
+ doc = fitz.open(stream=pdf_bytes, filetype="pdf")
126
+ images = []
127
+ for page in doc:
128
+ pix = page.get_pixmap()
129
+ buf = io.BytesIO()
130
+ buf.write(pix.tobytes("png"))
131
+ images.append(buf.getvalue())
132
+ return images
133
 
 
134
 
135
+ def clean_ocr_text(text: str) -> str:
136
+ text = text.replace("\x0c", " ")
137
+ text = text.replace("\u00a0", " ")
138
+ text = re.sub(r'(\d)\s*\.\s*(\d)', r'\1.\2', text)
139
+ text = re.sub(r'\s+', ' ', text)
140
+ return text.strip()
 
 
 
 
141
 
142
+ def ocr_text_from_image(image_bytes: bytes) -> str:
143
+ base64_image = base64.b64encode(image_bytes).decode('utf-8')
144
 
145
+ image_content = {
146
+ 'mime_type': 'image/jpeg',
147
+ 'data': base64_image
148
+ }
149
 
150
+ prompt = "Could you read this document and just take all the text that is in it and just paste it back to me in text format. Open and read this document:"
151
 
152
+ response = model.generate_content(
153
+ [prompt, image_content]
154
+ )
 
 
 
 
 
 
 
 
 
155
 
156
+ response_text = response.text
157
+ print(response_text)
 
 
 
 
158
 
159
+ return response_text
160
 
161
  @app.post("/chat/", response_model=ChatResponse)
162
  async def chat_endpoint(request: ChatRequest):
163
+ """
164
+ Chatbot endpoint that answers questions based on the last analyzed document and user history.
165
+ """
166
  global EXTRACTED_TEXT_CACHE
 
167
  if not EXTRACTED_TEXT_CACHE:
168
+ raise HTTPException(status_code=400, detail="Please provide a document context by analyzing text first.")
169
+
170
+ try:
171
+ reports_ref = db.collection('users').document(request.user_id).collection('reports')
172
+ docs = reports_ref.order_by('timestamp', direction=firestore.Query.DESCENDING).limit(10).stream()
173
+
174
+ history_text = ""
175
+ for doc in docs:
176
+ report_data = doc.to_dict()
177
+ history_text += f"Report from {report_data.get('timestamp', 'N/A')}:\n{report_data.get('ocr_text', 'No OCR text found')}\n\n"
178
+ except Exception as e:
179
+ history_text = "No past reports found for this user."
180
+
181
+ full_document_text = EXTRACTED_TEXT_CACHE + "\n\n" + "PAST REPORTS:\n" + history_text
182
 
183
  try:
 
 
184
  full_prompt = system_prompt_chat.format(
185
+ document_text=full_document_text,
186
  user_question=request.question
187
  )
 
188
  response = model.generate_content(full_prompt)
 
189
  return ChatResponse(answer=response.text)
 
190
  except Exception as e:
191
  print(f"Gemini API error: {traceback.format_exc()}")
192
  raise HTTPException(status_code=500, detail=f"An error occurred during chat response generation: {e}")
193
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
  @app.post("/analyze/")
195
  async def analyze(
196
  file: UploadFile = File(...),
197
  model: Optional[str] = Form("bert"),
198
  mode: Optional[str] = Form(None)
199
  ):
200
+ global resolution, EXTRACTED_TEXT_CACHE
201
  if not file.filename:
202
  raise HTTPException(status_code=400, detail="No file uploaded.")
203
 
 
215
  ocr_text = ocr_text_from_image(img_bytes)
216
  ocr_full += ocr_text + "\n\n"
217
  ocr_full = clean_ocr_text(ocr_full)
218
+ print(f"CALLING OCR FULL: {ocr_full}")
219
+
220
+ EXTRACTED_TEXT_CACHE = ocr_full
221
+
222
+
223
+ if model.lower() == "gemini":
224
+ return {"message": "Gemini model not available; please use BERT model."}
225
 
226
  found_diseases = extract_non_negated_keywords(ocr_full)
227
+ print(f"CALLING FOUND DISEASES: {found_diseases}")
228
  past = detect_past_diseases(ocr_full)
229
+ print(f"CALLING PAST DISEASES: {past}")
230
 
231
  for disease in found_diseases:
232
  if disease in past:
233
  severity = classify_disease_and_severity(disease)
234
  detected_diseases.add(((f"{disease}(detected as historical condition, but still under risk.)"), severity))
235
+ print(f"DETECTED DISEASES(PAST): {detected_diseases}")
236
  else:
237
  severity = classify_disease_and_severity(disease)
238
  detected_diseases.add((disease, severity))
239
+ print(f"DETECTED DISEASES: {detected_diseases}")
240
 
241
  print("OCR TEXT:", ocr_text)
242
  print("Detected diseases:", found_diseases)
243
+ ranges = analyze_measurements(ocr_full, df)
244
+
245
 
246
  resolution = []
247
  detected_ranges = []
 
258
  "treatment_suggestions": f"Consult a specialist: {specialist}",
259
  "home_care_guidance": home_care,
260
  "info_link": link
261
+
262
  })
263
 
264
+ for i in ranges:
265
+ condition = i[0]
266
+ measurement = i[1]
267
+ unit = i[2]
268
+ severity = i[3]
269
+ value = i[4]
270
+ range_value = i[5] # renamed to avoid overwriting Python's built-in "range"
271
+
272
+ link_range = disease_links.get(condition.lower(), "https://www.webmd.com/")
273
+ next_steps_range = disease_next_steps.get(condition.lower(), ['Consult a doctor'])
274
+ specialist_range = disease_doctor_specialty.get(condition.lower(), "General Practitioner")
275
+ home_care_range = disease_home_care.get(condition.lower(), [])
276
+ print(f"HELLO!: {measurement}")
277
+
278
+ condition_version = condition.upper()
279
+ severity_version = severity.upper()
280
+
281
+ resolution.append({
282
+ "findings": f"{condition_version} -- {measurement}",
283
+ "severity": f"{value} {unit} - {severity_version}",
284
+ "recommendations": next_steps_range,
285
+ "treatment_suggestions": f"Consult a specialist: {specialist_range}",
286
+ "home_care_guidance": home_care_range,
287
+ "info_link": link_range
288
+ })
289
+
290
  print(ocr_full)
291
  ranges = analyze_measurements(ocr_full, df)
292
  print(analyze_measurements(ocr_full, df))
293
  # print ("Ranges is being printed", ranges)
294
  historical_med_data = detect_past_diseases(ocr_full)
295
+ print("***End of Code***")
296
 
297
  return {
298
  "ocr_text": ocr_full.strip(),
299
+ "Detected_Anomolies": resolution,
 
300
  }
301
 
302
  class TextRequest(BaseModel):
303
  text: str
304
+
305
  @app.post("/analyze-text")
306
  async def analyze_text_endpoint(request: TextRequest):
307
  try:
 
309
  except Exception as e:
310
  print("ERROR in /analyze-text:", traceback.format_exc())
311
  raise HTTPException(status_code=500, detail=f"Error analyzing text: {str(e)}")
312
+
313
 
314
  def analyze_text(text):
315
  severity, disease = classify_disease_and_severity(text)
 
318
  "summary": f"Detected Disease: {disease}, Severity: {severity}"
319
  }
320
 
321
+
322
  @app.get("/health")
323
  @app.get("/health/")
324
  def health():