| | |
| | import streamlit as st |
| | import joblib |
| | import numpy as np |
| | import pandas as pd |
| | from sklearn.preprocessing import StandardScaler |
| | from huggingface_hub import hf_hub_download |
| | from transformers import AutoTokenizer, AutoModelForSequenceClassification |
| | import torch |
| |
|
| | |
| | nlp_model_name = "tajuarAkash/Health_Insurance_Fraud_detection_using_NLP" |
| | nlp_tokenizer = AutoTokenizer.from_pretrained(nlp_model_name) |
| | nlp_model = AutoModelForSequenceClassification.from_pretrained(nlp_model_name) |
| |
|
| | |
| | rf_model_path = hf_hub_download(repo_id="tajuarAkash/Health_Insurance_Fraud_detection_using_Random_forest", filename="random_forest_model.joblib") |
| | rf_model = joblib.load(rf_model_path) |
| |
|
| | |
| | |
| | import streamlit as st |
| | import joblib |
| | import numpy as np |
| | import pandas as pd |
| | from sklearn.preprocessing import StandardScaler |
| | from huggingface_hub import hf_hub_download |
| | from transformers import AutoTokenizer, AutoModelForSequenceClassification |
| | import torch |
| |
|
| | |
| | nlp_model_name = "tajuarAkash/Health_Insurance_Fraud_detection_using_NLP" |
| | nlp_tokenizer = AutoTokenizer.from_pretrained(nlp_model_name) |
| | nlp_model = AutoModelForSequenceClassification.from_pretrained(nlp_model_name) |
| |
|
| | |
| | rf_model_path = hf_hub_download(repo_id="tajuarAkash/Health_Insurance_Fraud_detection_using_Random_forest", filename="random_forest_model.joblib") |
| | rf_model = joblib.load(rf_model_path) |
| |
|
| | |
| | def preprocess_input(input_data, method="ml"): |
| | if method == "ml": |
| | |
| | |
| | input_data['ClaimDate'] = pd.to_datetime(input_data['ClaimDate']).dt.toordinal() |
| |
|
| | |
| | input_df = pd.DataFrame({ |
| | 'ClaimDate': [input_data['ClaimDate']], |
| | 'ClaimAmount': [input_data['ClaimAmount']], |
| | 'PatientAge': [input_data['PatientAge']], |
| | 'PatientIncome': [input_data['PatientIncome']], |
| | 'PatientGender': [input_data['PatientGender']], |
| | 'ProviderSpecialty': [input_data['ProviderSpecialty']], |
| | 'ClaimStatus': [input_data['ClaimStatus']], |
| | 'PatientMaritalStatus': [input_data['PatientMaritalStatus']], |
| | 'PatientEmploymentStatus': [input_data['PatientEmploymentStatus']], |
| | 'ProviderLocation': [input_data['ProviderLocation']], |
| | 'ClaimType': [input_data['ClaimType']], |
| | 'ClaimSubmissionMethod': [input_data['ClaimSubmissionMethod']], |
| | }) |
| |
|
| | |
| | input_df['PatientGender'] = input_df['PatientGender'].apply(lambda x: 1 if x == 'Male' else 0) |
| | claim_status_mapping = {"Denied": 0, "Pending": 1, "Approved": 2} |
| | input_df['ClaimStatus'] = input_df['ClaimStatus'].map(claim_status_mapping) |
| |
|
| | scaler = StandardScaler() |
| | input_scaled = scaler.fit_transform(input_df) |
| |
|
| | return input_scaled |
| |
|
| | elif method == "nlp": |
| | |
| | claim_date = input_data['ClaimDate'] |
| | claim_amount = input_data['ClaimAmount'] |
| | patient_age = input_data['PatientAge'] |
| | patient_gender = input_data['PatientGender'] |
| | provider_specialty = input_data['ProviderSpecialty'] |
| | claim_status = input_data['ClaimStatus'] |
| | patient_income = input_data['PatientIncome'] |
| | patient_marital_status = input_data['PatientMaritalStatus'] |
| | patient_employment_status = input_data['PatientEmploymentStatus'] |
| | provider_location = input_data['ProviderLocation'] |
| | claim_type = input_data['ClaimType'] |
| | claim_submission_method = input_data['ClaimSubmissionMethod'] |
| |
|
| | |
| | input_text = f"The claim date is {claim_date}, with a claim amount of {claim_amount}. " \ |
| | f"The patient is {patient_age} years old, and their gender is {patient_gender}. " \ |
| | f"The provider specialty is {provider_specialty}. The claim status is {claim_status}. " \ |
| | f"The patient's income is {patient_income}, marital status is {patient_marital_status}, " \ |
| | f"and employment status is {patient_employment_status}. The provider location is {provider_location}. " \ |
| | f"The claim type is {claim_type}, and the claim submission method is {claim_submission_method}. " \ |
| | f"Claim legitimacy: {input_data['ClaimLegitimacy']}." |
| | |
| | |
| | inputs = nlp_tokenizer(input_text, return_tensors="pt", padding=True, truncation=True, max_length=512) |
| | return inputs |
| |
|
| |
|
| | |
| | st.title("Insurance Claim Fraud Detection") |
| | st.write(""" |
| | This app predicts whether an insurance claim is fraudulent or legitimate based on user input. |
| | You can choose between **ML-based prediction** or **NLP-based prediction**. |
| | """) |
| |
|
| | |
| | prediction_method = st.radio("Choose Prediction Method", ("ML Prediction", "NLP Prediction")) |
| |
|
| | |
| | claim_date = st.date_input("Enter the claim date") |
| | claim_amount = st.number_input("Enter the claim amount", min_value=0) |
| | patient_age = st.number_input("Enter the patient's age", min_value=0) |
| | patient_income = st.number_input("Enter the patient's income", min_value=0) |
| | patient_gender = st.selectbox("Select patient's gender", ["Male", "Female"]) |
| | provider_specialty = st.text_input("Enter the provider specialty") |
| | claim_status = st.selectbox("Claim status", ["Denied", "Pending", "Approved"]) |
| | patient_marital_status = st.text_input("Enter the marital status") |
| | patient_employment_status = st.text_input("Enter the employment status") |
| | provider_location = st.text_input("Enter the provider location") |
| | claim_type = st.text_input("Enter the claim type") |
| | claim_submission_method = st.text_input("Enter the claim submission method") |
| | |
| | |
| |
|
| | |
| | if st.button('Predict'): |
| | input_data = { |
| | "ClaimDate": claim_date, |
| | "ClaimAmount": claim_amount, |
| | "PatientAge": patient_age, |
| | "PatientIncome": patient_income, |
| | "PatientGender": patient_gender, |
| | "ProviderSpecialty": provider_specialty, |
| | "ClaimStatus": claim_status, |
| | "PatientMaritalStatus": patient_marital_status, |
| | "PatientEmploymentStatus": patient_employment_status, |
| | "ProviderLocation": provider_location, |
| | "ClaimType": claim_type, |
| | "ClaimSubmissionMethod": claim_submission_method, |
| | |
| | } |
| |
|
| | |
| | if prediction_method == "ML Prediction": |
| | input_scaled = preprocess_input(input_data, method="ml") |
| | |
| | |
| | prediction = rf_model.predict(input_scaled) |
| | |
| | if prediction == 1: |
| | st.write("This claim is predicted to be **fraudulent** (ML model).") |
| | else: |
| | st.write("This claim is predicted to be **legitimate** (ML model).") |
| | |
| | elif prediction_method == "NLP Prediction": |
| | inputs = preprocess_input(input_data, method="nlp") |
| |
|
| | |
| | with torch.no_grad(): |
| | logits = nlp_model(**inputs).logits |
| | predicted_class = torch.argmax(logits, dim=-1).item() |
| |
|
| | if predicted_class == 1: |
| | st.write("This claim is predicted to be **fraudulent** (NLP model).") |
| | else: |
| | st.write("This claim is predicted to be **legitimate** (NLP model).") |
| |
|
| |
|
| |
|
| |
|
| |
|
| | |
| | st.title("Insurance Claim Fraud Detection") |
| | st.write(""" |
| | This app predicts whether an insurance claim is fraudulent or legitimate based on user input. |
| | You can choose between **ML-based prediction** or **NLP-based prediction**. |
| | """) |
| |
|
| | |
| | prediction_method = st.radio("Choose Prediction Method", ("ML Prediction", "NLP Prediction")) |
| |
|
| | |
| | claim_date = st.date_input("Enter the claim date") |
| | claim_amount = st.number_input("Enter the claim amount", min_value=0) |
| | patient_age = st.number_input("Enter the patient's age", min_value=0) |
| | patient_income = st.number_input("Enter the patient's income", min_value=0) |
| | patient_gender = st.selectbox("Select patient's gender", ["Male", "Female"]) |
| | provider_specialty = st.text_input("Enter the provider specialty") |
| | claim_status = st.selectbox("Claim status", ["Denied", "Pending", "Approved"]) |
| | patient_marital_status = st.text_input("Enter the marital status") |
| | patient_employment_status = st.text_input("Enter the employment status") |
| | provider_location = st.text_input("Enter the provider location") |
| | claim_type = st.text_input("Enter the claim type") |
| | claim_submission_method = st.text_input("Enter the claim submission method") |
| | claim_legitimacy = st.selectbox("Claim legitimacy", ["Fraud", "Legitimate"]) |
| |
|
| | |
| | if st.button('Predict'): |
| | input_data = { |
| | "ClaimDate": claim_date, |
| | "ClaimAmount": claim_amount, |
| | "PatientAge": patient_age, |
| | "PatientIncome": patient_income, |
| | "PatientGender": patient_gender, |
| | "ProviderSpecialty": provider_specialty, |
| | "ClaimStatus": claim_status, |
| | "PatientMaritalStatus": patient_marital_status, |
| | "PatientEmploymentStatus": patient_employment_status, |
| | "ProviderLocation": provider_location, |
| | "ClaimType": claim_type, |
| | "ClaimSubmissionMethod": claim_submission_method, |
| | "ClaimLegitimacy": claim_legitimacy, |
| | } |
| |
|
| | |
| | if prediction_method == "ML Prediction": |
| | input_scaled = preprocess_input(input_data, method="ml") |
| | |
| | |
| | prediction = rf_model.predict(input_scaled) |
| | |
| | if prediction == 1: |
| | st.write("This claim is predicted to be **fraudulent** (ML model).") |
| | else: |
| | st.write("This claim is predicted to be **legitimate** (ML model).") |
| | |
| | elif prediction_method == "NLP Prediction": |
| | inputs = preprocess_input(input_data, method="nlp") |
| |
|
| | |
| | with torch.no_grad(): |
| | logits = nlp_model(**inputs).logits |
| | predicted_class = torch.argmax(logits, dim=-1).item() |
| |
|
| | if predicted_class == 1: |
| | st.write("This claim is predicted to be **fraudulent** (NLP model).") |
| | else: |
| | st.write("This claim is predicted to be **legitimate** (NLP model).") |
| |
|