Spaces:

Thuongtruong
/

streamlit_app

Configuration error

App Files Files Community

Thuongtruong commited on May 27, 2025

Commit

149b3e1

verified ·

1 Parent(s): f1ae445

Upload 5 files

Browse files

Files changed (5) hide show

README.md +9 -0
autoencoder_model.h5 +3 -0
requirements.txt +6 -0
scaler_autoencoder.pkl +3 -0
streamlit_app.py +62 -0

README.md ADDED Viewed

	@@ -0,0 +1,9 @@

+# Provider Fraud Detection App 🕵️‍♂️
+Upload a CSV file to detect fraudulent healthcare providers using an Autoencoder model.
+Trained with TensorFlow and deployed with Streamlit on Hugging Face Spaces.
+## Instructions:
+- Upload a CSV file with numeric columns
+- The model will predict anomalies
+- Download results with fraud score + labels

autoencoder_model.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d247f33b73abaef641c5fdf9b4d8eef3798e2abf630a28aa9bd21f9c1148859b
+size 25848

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+tensorflow==2.11.0
+pandas
+numpy
+joblib
+streamlit
+protobuf==3.19.6

scaler_autoencoder.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4a7019770b83397ae976d7969b0e307631e0398c1f498f08b19af364ea249a5d
+size 2311

streamlit_app.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import streamlit as st
+import os
+os.environ['STREAMLIT_HOME'] = '/tmp'
+os.environ['STREAMLIT_METRICS_ENABLED'] = 'false'
+import pandas as pd
+import numpy as np
+import joblib
+from tensorflow.keras.models import load_model
+# Load model và scaler đã huấn luyện
+model = load_model("autoencoder_model.h5")
+scaler = joblib.load("scaler_autoencoder.pkl")
+st.title("🔍 Provider Fraud Detection App")
+st.markdown("Upload a new dataset to detect potential fraudulent providers.")
+uploaded_file = st.file_uploader("📤 Upload CSV file", type=["csv"])
+if uploaded_file is not None:
+    df = pd.read_csv(uploaded_file)
+    st.success("✅ File uploaded successfully!")
+    # Dự phòng giữ ID
+    if 'ProviderID' in df.columns:
+        id_col = df['ProviderID']
+    else:
+        id_col = df.index
+    # Tiền xử lý
+    df_processed = df.select_dtypes(include=[np.number])
+    df_processed.replace([np.inf, -np.inf], np.nan, inplace=True)
+    df_processed.dropna(axis=1, how='all', inplace=True)
+    df_processed = df_processed.loc[:, df_processed.nunique() > 1]
+    df_processed = df_processed.fillna(df_processed.mean())
+    # Chuẩn hóa
+    X_scaled = scaler.transform(df_processed)
+    # Dự đoán với autoencoder
+    reconstructions = model.predict(X_scaled)
+    mse = np.mean(np.power(X_scaled - reconstructions, 2), axis=1)
+    # Threshold
+    threshold = np.percentile(mse, 95)
+    is_fraud = mse > threshold
+    # Tạo kết quả
+    result_df = pd.DataFrame({
+        'ProviderID': id_col,
+        'fraud_score': mse,
+        'is_fraud': is_fraud
+    })
+    st.markdown("### 📋 Detection Results Preview")
+    st.dataframe(result_df.head(10))
+    st.markdown(f"🔴 **Threshold (95th percentile):** {threshold:.6f}")
+    st.metric("⚠️ Fraudulent Providers Detected", is_fraud.sum())
+    # Tải file kết quả
+    csv = result_df.to_csv(index=False).encode("utf-8")
+    st.download_button("📥 Download Results", data=csv, file_name="fraud_detection_results.csv", mime="text/csv")