Spaces:

tajuarAkash
/

Heath_Insurance_Fraud_Prediction

Sleeping

App Files Files Community

Heath_Insurance_Fraud_Prediction / app.py

tajuarAkash

Update app.py

2393287 verified about 1 year ago

raw

history blame contribute delete

11.6 kB

	#new
	import streamlit as st
	import joblib
	import numpy as np
	import pandas as pd
	from sklearn.preprocessing import StandardScaler
	from huggingface_hub import hf_hub_download
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	import torch

	# Load your NLP model (Hugging Face model) for fraud prediction using BERT (or similar NLP model)
	nlp_model_name = "tajuarAkash/Health_Insurance_Fraud_detection_using_NLP" # replace with your Hugging Face model path
	nlp_tokenizer = AutoTokenizer.from_pretrained(nlp_model_name)
	nlp_model = AutoModelForSequenceClassification.from_pretrained(nlp_model_name)

	# Load the Random Forest model for ML-based prediction
	rf_model_path = hf_hub_download(repo_id="tajuarAkash/Health_Insurance_Fraud_detection_using_Random_forest", filename="random_forest_model.joblib")
	rf_model = joblib.load(rf_model_path)

	# Preprocessing for the user inputs
	# Preprocessing for the user inputs
	import streamlit as st
	import joblib
	import numpy as np
	import pandas as pd
	from sklearn.preprocessing import StandardScaler
	from huggingface_hub import hf_hub_download
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	import torch

	# Load your NLP model (Hugging Face model) for fraud prediction using BERT (or similar NLP model)
	nlp_model_name = "tajuarAkash/Health_Insurance_Fraud_detection_using_NLP" # replace with your Hugging Face model path
	nlp_tokenizer = AutoTokenizer.from_pretrained(nlp_model_name)
	nlp_model = AutoModelForSequenceClassification.from_pretrained(nlp_model_name)

	# Load the Random Forest model for ML-based prediction
	rf_model_path = hf_hub_download(repo_id="tajuarAkash/Health_Insurance_Fraud_detection_using_Random_forest", filename="random_forest_model.joblib")
	rf_model = joblib.load(rf_model_path)

	# Preprocessing for the user inputs
	def preprocess_input(input_data, method="ml"):
	if method == "ml":
	# For Random Forest prediction, apply necessary transformations like scaling or encoding.
	# Convert ClaimDate to ordinal (number of days since a particular date)
	input_data['ClaimDate'] = pd.to_datetime(input_data['ClaimDate']).dt.toordinal()

	# Wrap each feature value in a list to create a valid DataFrame
	input_df = pd.DataFrame({
	'ClaimDate': [input_data['ClaimDate']], # Now converted to ordinal value
	'ClaimAmount': [input_data['ClaimAmount']],
	'PatientAge': [input_data['PatientAge']],
	'PatientIncome': [input_data['PatientIncome']],
	'PatientGender': [input_data['PatientGender']],
	'ProviderSpecialty': [input_data['ProviderSpecialty']],
	'ClaimStatus': [input_data['ClaimStatus']],
	'PatientMaritalStatus': [input_data['PatientMaritalStatus']],
	'PatientEmploymentStatus': [input_data['PatientEmploymentStatus']],
	'ProviderLocation': [input_data['ProviderLocation']],
	'ClaimType': [input_data['ClaimType']],
	'ClaimSubmissionMethod': [input_data['ClaimSubmissionMethod']],
	})

	# Apply necessary preprocessing: Encoding and scaling (use the same scaler and encoders as in training)
	input_df['PatientGender'] = input_df['PatientGender'].apply(lambda x: 1 if x == 'Male' else 0)
	claim_status_mapping = {"Denied": 0, "Pending": 1, "Approved": 2}
	input_df['ClaimStatus'] = input_df['ClaimStatus'].map(claim_status_mapping)

	scaler = StandardScaler()
	input_scaled = scaler.fit_transform(input_df) # Scaling the data

	return input_scaled

	elif method == "nlp":
	# For NLP-based prediction, concatenate features into a single paragraph
	claim_date = input_data['ClaimDate']
	claim_amount = input_data['ClaimAmount']
	patient_age = input_data['PatientAge']
	patient_gender = input_data['PatientGender']
	provider_specialty = input_data['ProviderSpecialty']
	claim_status = input_data['ClaimStatus']
	patient_income = input_data['PatientIncome']
	patient_marital_status = input_data['PatientMaritalStatus']
	patient_employment_status = input_data['PatientEmploymentStatus']
	provider_location = input_data['ProviderLocation']
	claim_type = input_data['ClaimType']
	claim_submission_method = input_data['ClaimSubmissionMethod']

	# Create a sentence (paragraph) using the input data
	input_text = f"The claim date is {claim_date}, with a claim amount of {claim_amount}. " \
	f"The patient is {patient_age} years old, and their gender is {patient_gender}. " \
	f"The provider specialty is {provider_specialty}. The claim status is {claim_status}. " \
	f"The patient's income is {patient_income}, marital status is {patient_marital_status}, " \
	f"and employment status is {patient_employment_status}. The provider location is {provider_location}. " \
	f"The claim type is {claim_type}, and the claim submission method is {claim_submission_method}. " \
	f"Claim legitimacy: {input_data['ClaimLegitimacy']}."

	# Tokenize the input text for NLP
	inputs = nlp_tokenizer(input_text, return_tensors="pt", padding=True, truncation=True, max_length=512)
	return inputs


	# Title and description for the app
	st.title("Insurance Claim Fraud Detection")
	st.write("""
	This app predicts whether an insurance claim is fraudulent or legitimate based on user input.
	You can choose between ML-based prediction or NLP-based prediction.
	""")

	# Buttons to select prediction method
	prediction_method = st.radio("Choose Prediction Method", ("ML Prediction", "NLP Prediction"))

	# Input fields for the user (these should match your model features)
	claim_date = st.date_input("Enter the claim date")
	claim_amount = st.number_input("Enter the claim amount", min_value=0)
	patient_age = st.number_input("Enter the patient's age", min_value=0)
	patient_income = st.number_input("Enter the patient's income", min_value=0)
	patient_gender = st.selectbox("Select patient's gender", ["Male", "Female"])
	provider_specialty = st.text_input("Enter the provider specialty")
	claim_status = st.selectbox("Claim status", ["Denied", "Pending", "Approved"])
	patient_marital_status = st.text_input("Enter the marital status")
	patient_employment_status = st.text_input("Enter the employment status")
	provider_location = st.text_input("Enter the provider location")
	claim_type = st.text_input("Enter the claim type")
	claim_submission_method = st.text_input("Enter the claim submission method")
	# ClaimLegitimacy is excluded from input (it’s the target that we want to predict)
	# claim_legitimacy = st.selectbox("Claim legitimacy", ["Fraud", "Legitimate"])

	# Create a button to trigger prediction
	if st.button('Predict'):
	input_data = {
	"ClaimDate": claim_date,
	"ClaimAmount": claim_amount,
	"PatientAge": patient_age,
	"PatientIncome": patient_income,
	"PatientGender": patient_gender,
	"ProviderSpecialty": provider_specialty,
	"ClaimStatus": claim_status,
	"PatientMaritalStatus": patient_marital_status,
	"PatientEmploymentStatus": patient_employment_status,
	"ProviderLocation": provider_location,
	"ClaimType": claim_type,
	"ClaimSubmissionMethod": claim_submission_method,
	# "ClaimLegitimacy": claim_legitimacy, # Removed since it's the target we want to predict
	}

	# Preprocess the input data based on the selected method
	if prediction_method == "ML Prediction":
	input_scaled = preprocess_input(input_data, method="ml")

	# Get the prediction from the ML model (Random Forest)
	prediction = rf_model.predict(input_scaled)

	if prediction == 1:
	st.write("This claim is predicted to be fraudulent (ML model).")
	else:
	st.write("This claim is predicted to be legitimate (ML model).")

	elif prediction_method == "NLP Prediction":
	inputs = preprocess_input(input_data, method="nlp")

	# Get the prediction from the NLP model (BERT)
	with torch.no_grad():
	logits = nlp_model(**inputs).logits
	predicted_class = torch.argmax(logits, dim=-1).item()

	if predicted_class == 1:
	st.write("This claim is predicted to be fraudulent (NLP model).")
	else:
	st.write("This claim is predicted to be legitimate (NLP model).")





	# Title and description for the app
	st.title("Insurance Claim Fraud Detection")
	st.write("""
	This app predicts whether an insurance claim is fraudulent or legitimate based on user input.
	You can choose between ML-based prediction or NLP-based prediction.
	""")

	# Buttons to select prediction method
	prediction_method = st.radio("Choose Prediction Method", ("ML Prediction", "NLP Prediction"))

	# Input fields for the user (these should match your model features)
	claim_date = st.date_input("Enter the claim date")
	claim_amount = st.number_input("Enter the claim amount", min_value=0)
	patient_age = st.number_input("Enter the patient's age", min_value=0)
	patient_income = st.number_input("Enter the patient's income", min_value=0)
	patient_gender = st.selectbox("Select patient's gender", ["Male", "Female"])
	provider_specialty = st.text_input("Enter the provider specialty")
	claim_status = st.selectbox("Claim status", ["Denied", "Pending", "Approved"])
	patient_marital_status = st.text_input("Enter the marital status")
	patient_employment_status = st.text_input("Enter the employment status")
	provider_location = st.text_input("Enter the provider location")
	claim_type = st.text_input("Enter the claim type")
	claim_submission_method = st.text_input("Enter the claim submission method")
	claim_legitimacy = st.selectbox("Claim legitimacy", ["Fraud", "Legitimate"])

	# Create a button to trigger prediction
	if st.button('Predict'):
	input_data = {
	"ClaimDate": claim_date,
	"ClaimAmount": claim_amount,
	"PatientAge": patient_age,
	"PatientIncome": patient_income,
	"PatientGender": patient_gender,
	"ProviderSpecialty": provider_specialty,
	"ClaimStatus": claim_status,
	"PatientMaritalStatus": patient_marital_status,
	"PatientEmploymentStatus": patient_employment_status,
	"ProviderLocation": provider_location,
	"ClaimType": claim_type,
	"ClaimSubmissionMethod": claim_submission_method,
	"ClaimLegitimacy": claim_legitimacy,
	}

	# Preprocess the input data based on the selected method
	if prediction_method == "ML Prediction":
	input_scaled = preprocess_input(input_data, method="ml")

	# Get the prediction from the ML model (Random Forest)
	prediction = rf_model.predict(input_scaled)

	if prediction == 1:
	st.write("This claim is predicted to be fraudulent (ML model).")
	else:
	st.write("This claim is predicted to be legitimate (ML model).")

	elif prediction_method == "NLP Prediction":
	inputs = preprocess_input(input_data, method="nlp")

	# Get the prediction from the NLP model (BERT)
	with torch.no_grad():
	logits = nlp_model(**inputs).logits
	predicted_class = torch.argmax(logits, dim=-1).item()

	if predicted_class == 1:
	st.write("This claim is predicted to be fraudulent (NLP model).")
	else:
	st.write("This claim is predicted to be legitimate (NLP model).")