Thuongtruong commited on
Commit
149b3e1
·
verified ·
1 Parent(s): f1ae445

Upload 5 files

Browse files
README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # Provider Fraud Detection App 🕵️‍♂️
2
+
3
+ Upload a CSV file to detect fraudulent healthcare providers using an Autoencoder model.
4
+ Trained with TensorFlow and deployed with Streamlit on Hugging Face Spaces.
5
+
6
+ ## Instructions:
7
+ - Upload a CSV file with numeric columns
8
+ - The model will predict anomalies
9
+ - Download results with fraud score + labels
autoencoder_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d247f33b73abaef641c5fdf9b4d8eef3798e2abf630a28aa9bd21f9c1148859b
3
+ size 25848
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ tensorflow==2.11.0
2
+ pandas
3
+ numpy
4
+ joblib
5
+ streamlit
6
+ protobuf==3.19.6
scaler_autoencoder.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a7019770b83397ae976d7969b0e307631e0398c1f498f08b19af364ea249a5d
3
+ size 2311
streamlit_app.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ os.environ['STREAMLIT_HOME'] = '/tmp'
4
+ os.environ['STREAMLIT_METRICS_ENABLED'] = 'false'
5
+ import pandas as pd
6
+ import numpy as np
7
+ import joblib
8
+ from tensorflow.keras.models import load_model
9
+
10
+ # Load model và scaler đã huấn luyện
11
+ model = load_model("autoencoder_model.h5")
12
+ scaler = joblib.load("scaler_autoencoder.pkl")
13
+
14
+ st.title("🔍 Provider Fraud Detection App")
15
+ st.markdown("Upload a new dataset to detect potential fraudulent providers.")
16
+
17
+ uploaded_file = st.file_uploader("📤 Upload CSV file", type=["csv"])
18
+
19
+ if uploaded_file is not None:
20
+ df = pd.read_csv(uploaded_file)
21
+ st.success("✅ File uploaded successfully!")
22
+
23
+ # Dự phòng giữ ID
24
+ if 'ProviderID' in df.columns:
25
+ id_col = df['ProviderID']
26
+ else:
27
+ id_col = df.index
28
+
29
+ # Tiền xử lý
30
+ df_processed = df.select_dtypes(include=[np.number])
31
+ df_processed.replace([np.inf, -np.inf], np.nan, inplace=True)
32
+ df_processed.dropna(axis=1, how='all', inplace=True)
33
+ df_processed = df_processed.loc[:, df_processed.nunique() > 1]
34
+ df_processed = df_processed.fillna(df_processed.mean())
35
+
36
+ # Chuẩn hóa
37
+ X_scaled = scaler.transform(df_processed)
38
+
39
+ # Dự đoán với autoencoder
40
+ reconstructions = model.predict(X_scaled)
41
+ mse = np.mean(np.power(X_scaled - reconstructions, 2), axis=1)
42
+
43
+ # Threshold
44
+ threshold = np.percentile(mse, 95)
45
+ is_fraud = mse > threshold
46
+
47
+ # Tạo kết quả
48
+ result_df = pd.DataFrame({
49
+ 'ProviderID': id_col,
50
+ 'fraud_score': mse,
51
+ 'is_fraud': is_fraud
52
+ })
53
+
54
+ st.markdown("### 📋 Detection Results Preview")
55
+ st.dataframe(result_df.head(10))
56
+
57
+ st.markdown(f"🔴 **Threshold (95th percentile):** {threshold:.6f}")
58
+ st.metric("⚠️ Fraudulent Providers Detected", is_fraud.sum())
59
+
60
+ # Tải file kết quả
61
+ csv = result_df.to_csv(index=False).encode("utf-8")
62
+ st.download_button("📥 Download Results", data=csv, file_name="fraud_detection_results.csv", mime="text/csv")