pilayar commited on
Commit
ffe98fd
·
verified ·
1 Parent(s): 9efc99e

Update app.py

Browse files

Added RAG arch for verification

Files changed (1) hide show
  1. app.py +86 -37
app.py CHANGED
@@ -6,8 +6,53 @@ from pydantic import BaseModel, Field
6
  from PIL import Image
7
  import requests
8
  from datetime import datetime
 
 
 
 
9
 
10
- # 1. Enhanced Schema (Added NPI for the API call)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  class ProviderLicense(BaseModel):
12
  provider_name: str = Field(description="Full name of the healthcare provider")
13
  license_number: str = Field(description="The professional license number")
@@ -15,11 +60,6 @@ class ProviderLicense(BaseModel):
15
  state: str = Field(description="The state where the license was issued")
16
  expiration_date: str = Field(description="Format: YYYY-MM-DD")
17
 
18
- # 2. Page Configuration
19
- st.set_page_config(page_title="AI Credentialing Assistant", layout="wide")
20
- st.title("🩺 Provider Credentialing AI")
21
-
22
- # 3. Helper Function for NPPES
23
  def get_nppes_data(npi_number):
24
  url = "https://npiregistry.cms.hhs.gov/api/?version=2.1"
25
  params = {"number": npi_number}
@@ -30,14 +70,20 @@ def get_nppes_data(npi_number):
30
  return data["results"][0] if data.get("result_count", 0) > 0 else None
31
  return None
32
  except Exception as e:
33
- st.error(f"NPPES API Error: {e}")
34
  return None
35
 
36
- # 4. Initialize Client
 
 
 
 
 
 
 
37
  api_key = os.environ.get("GEMINI_API_KEY")
38
  client = genai.Client(api_key=api_key)
39
 
40
- # 5. Sidebar & Main Logic
41
  uploaded_file = st.sidebar.file_uploader("Upload Medical License", type=["jpg", "jpeg", "png", "pdf"])
42
 
43
  if uploaded_file:
@@ -49,12 +95,12 @@ if uploaded_file:
49
  st.image(image, use_container_width=True)
50
 
51
  with col2:
52
- st.subheader("AI Extraction & Registry Match")
53
- with st.spinner("Analyzing and Verifying..."):
54
  try:
55
- # AI Extraction
56
  response = client.models.generate_content(
57
- model="gemini-2.5-flash", # Verify model ID in your environment
58
  contents=["Extract details from this license.", image],
59
  config=types.GenerateContentConfig(
60
  response_mime_type="application/json",
@@ -63,43 +109,46 @@ if uploaded_file:
63
  )
64
  data = response.parsed
65
 
66
- # Logic: Is Valid? (Calculated, not guessed)
67
  expiry = datetime.strptime(data.expiration_date, "%Y-%m-%d").date()
68
  is_active = expiry >= datetime.today().date()
69
 
70
- # Display Results
71
  st.metric("Provider Name", data.provider_name)
72
  st.write(f"**License:** {data.license_number} ({data.state})")
73
 
74
  status_color = "green" if is_active else "red"
75
  st.markdown(f"**Status:** :{status_color}[{ 'Valid' if is_active else 'Expired'}] (Expires: {data.expiration_date})")
76
 
77
- # --- NEW: Automated NPPES Verification ---
78
  st.divider()
79
- st.subheader("Federal Registry Verification")
80
  registry_data = get_nppes_data(data.npi_number)
81
 
82
  if registry_data:
83
- # Based on your st.json, registry_data is the single record dictionary
84
- basic_info = registry_data.get('basic', {})
85
-
86
- # Check if it's an Organization (NPI-2) or Individual (NPI-1)
87
- if registry_data.get('enumeration_type') == 'NPI-2':
88
- display_name = basic_info.get('organization_name', 'Unnamed Organization')
89
- st.info(f"Verified as Organizational NPI: {display_name}")
90
- else:
91
- f_name = basic_info.get('first_name', '')
92
- l_name = basic_info.get('last_name', '')
93
- display_name = f"{f_name} {l_name}".strip()
94
- st.success(f"Verified as Individual Provider: {display_name}")
95
 
96
- st.write(f"**Registry Name:** {display_name}")
97
-
98
- # Taxonomy parsing (based on your JSON structure)
99
- taxonomies = registry_data.get('taxonomies', [])
100
- if taxonomies:
101
- st.write(f"**Primary Taxonomy:** {taxonomies[0].get('desc', 'N/A')}")
102
- st.write(f"**Associated License:** {taxonomies[0].get('license', 'N/A')}")
103
-
 
 
 
 
 
 
 
104
  except Exception as e:
105
  st.error(f"Processing Error: {e}")
 
6
  from PIL import Image
7
  import requests
8
  from datetime import datetime
9
+ import pandas as pd
10
+ from langchain.docstore.document import Document
11
+ from langchain_community.vectorstores import Chroma
12
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
13
 
14
+ # --- 1. RAG CONFIGURATION & INGESTION ---
15
+ @st.cache_resource
16
+ def get_vector_db(file_path):
17
+ """Ingests the 2026 Alert List and caches the vector database."""
18
+ if not os.path.exists(file_path):
19
+ st.error(f"Alert List file not found at {file_path}")
20
+ return None
21
+
22
+ df = pd.read_csv(file_path)
23
+ df.columns = df.columns.str.strip()
24
+
25
+ documents = []
26
+ for _, row in df.iterrows():
27
+ content = (
28
+ f"Provider Name: {row['Name']}\n"
29
+ f"License Number: {row['License Number']}\n"
30
+ f"Action Taken: {row['Action Type']}\n"
31
+ f"Effective Date: {row['Effective Date']}\n"
32
+ f"Summary: {row.get('Description', 'Administrative disciplinary action recorded.')}"
33
+ )
34
+ metadata = {
35
+ "license": str(row['License Number']),
36
+ "provider_name": row['Name']
37
+ }
38
+ documents.append(Document(page_content=content, metadata=metadata))
39
+
40
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
41
+ # Using an in-memory Chroma for the demo; use persist_directory for production
42
+ vector_db = Chroma.from_documents(documents, embeddings)
43
+ return vector_db
44
+
45
+ def check_yellow_flags(license_number, vector_db):
46
+ """Searches for the license in the ingested RAG database."""
47
+ # Perform similarity search
48
+ results = vector_db.similarity_search(license_number, k=1)
49
+
50
+ # Check if the result is actually a match (similarity search can return 'close' matches)
51
+ if results and license_number in results[0].page_content:
52
+ return results[0].page_content
53
+ return None
54
+
55
+ # --- 2. SCHEMAS & HELPERS ---
56
  class ProviderLicense(BaseModel):
57
  provider_name: str = Field(description="Full name of the healthcare provider")
58
  license_number: str = Field(description="The professional license number")
 
60
  state: str = Field(description="The state where the license was issued")
61
  expiration_date: str = Field(description="Format: YYYY-MM-DD")
62
 
 
 
 
 
 
63
  def get_nppes_data(npi_number):
64
  url = "https://npiregistry.cms.hhs.gov/api/?version=2.1"
65
  params = {"number": npi_number}
 
70
  return data["results"][0] if data.get("result_count", 0) > 0 else None
71
  return None
72
  except Exception as e:
 
73
  return None
74
 
75
+ # --- 3. PAGE SETUP & INITIALIZATION ---
76
+ st.set_page_config(page_title="AI Credentialing Assistant", layout="wide")
77
+ st.title("🩺 Provider Credentialing AI")
78
+
79
+ # Load the RAG Database (Point to your uploaded CSV)
80
+ alert_list_path = "alert-actions-2026.xlsx - Sheet1.csv"
81
+ vdb = get_vector_db(alert_list_path)
82
+
83
  api_key = os.environ.get("GEMINI_API_KEY")
84
  client = genai.Client(api_key=api_key)
85
 
86
+ # --- 4. MAIN WORKFLOW ---
87
  uploaded_file = st.sidebar.file_uploader("Upload Medical License", type=["jpg", "jpeg", "png", "pdf"])
88
 
89
  if uploaded_file:
 
95
  st.image(image, use_container_width=True)
96
 
97
  with col2:
98
+ st.subheader("AI Extraction & Risk Analysis")
99
+ with st.spinner("Extracting & Verifying..."):
100
  try:
101
+ # A. Extraction
102
  response = client.models.generate_content(
103
+ model="gemini-2.0-flash",
104
  contents=["Extract details from this license.", image],
105
  config=types.GenerateContentConfig(
106
  response_mime_type="application/json",
 
109
  )
110
  data = response.parsed
111
 
112
+ # B. Expiration Logic
113
  expiry = datetime.strptime(data.expiration_date, "%Y-%m-%d").date()
114
  is_active = expiry >= datetime.today().date()
115
 
 
116
  st.metric("Provider Name", data.provider_name)
117
  st.write(f"**License:** {data.license_number} ({data.state})")
118
 
119
  status_color = "green" if is_active else "red"
120
  st.markdown(f"**Status:** :{status_color}[{ 'Valid' if is_active else 'Expired'}] (Expires: {data.expiration_date})")
121
 
122
+ # C. Federal Verification
123
  st.divider()
124
+ st.subheader("Federal Registry (NPPES)")
125
  registry_data = get_nppes_data(data.npi_number)
126
 
127
  if registry_data:
128
+ basic = registry_data.get('basic', {})
129
+ name = basic.get('organization_name') if registry_data.get('enumeration_type') == 'NPI-2' else f"{basic.get('first_name')} {basic.get('last_name')}"
130
+ st.success(f"NPI Verified: {name}")
131
+ else:
132
+ st.warning("NPI not found in Federal Registry")
133
+
134
+ # D. NEW: RAG-based Yellow Flag Detection
135
+ st.divider()
136
+ st.subheader("⚠️ Risk Intelligence (RAG)")
 
 
 
137
 
138
+ flag_context = check_yellow_flags(data.license_number, vdb)
139
+
140
+ if flag_context:
141
+ st.error(f"YELLOW FLAG DETECTED for License {data.license_number}")
142
+ # Use Gemini to summarize the disciplinary action for the user
143
+ risk_summary = client.models.generate_content(
144
+ model="gemini-2.0-flash",
145
+ contents=[f"Based on this medical board record, summarize the risk in one sentence: {flag_context}"]
146
+ )
147
+ st.warning(risk_summary.text)
148
+ with st.expander("View Raw Alert Detail"):
149
+ st.text(flag_context)
150
+ else:
151
+ st.success("No active flags found in the 2026 Medical Board Alert List.")
152
+
153
  except Exception as e:
154
  st.error(f"Processing Error: {e}")