Spaces:

Roberta2024
/

AblationStudy_

Runtime error

App Files Files Community

AblationStudy_ / app.py

Roberta2024

Create app.py

59beeed verified over 1 year ago

raw

history blame contribute delete

2.65 kB

	import streamlit as st
	import pandas as pd
	import matplotlib.pyplot as plt
	import seaborn as sns
	from sklearn.ensemble import RandomForestClassifier
	from xgboost import XGBClassifier
	from sklearn.tree import DecisionTreeClassifier
	from sklearn.model_selection import train_test_split
	import numpy as np

	# Function to process data and return feature importances
	def calculate_importances(file):
	# Read uploaded file
	heart_df = pd.read_csv(file)

	# Set X and y
	X = heart_df.drop('target', axis=1)
	y = heart_df['target']

	# Split the data
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

	# Initialize models
	rf_model = RandomForestClassifier(random_state=42)
	xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
	cart_model = DecisionTreeClassifier(random_state=42)

	# Train models
	rf_model.fit(X_train, y_train)
	xgb_model.fit(X_train, y_train)
	cart_model.fit(X_train, y_train)

	# Get feature importances
	rf_importances = rf_model.feature_importances_
	xgb_importances = xgb_model.feature_importances_
	cart_importances = cart_model.feature_importances_

	feature_names = X.columns

	# Prepare DataFrame
	rf_importance = {'Feature': feature_names, 'Random Forest': rf_importances}
	xgb_importance = {'Feature': feature_names, 'XGBoost': xgb_importances}
	cart_importance = {'Feature': feature_names, 'CART': cart_importances}

	# Create DataFrames
	rf_df = pd.DataFrame(rf_importance)
	xgb_df = pd.DataFrame(xgb_importance)
	cart_df = pd.DataFrame(cart_importance)

	# Merge DataFrames
	importance_df = rf_df.merge(xgb_df, on='Feature').merge(cart_df, on='Feature')

	# Save to Excel
	file_name = 'feature_importances.xlsx'
	importance_df.to_excel(file_name, index=False)

	return file_name, importance_df.head()

	# Streamlit interface
	st.title("Feature Importance Calculation")

	# File upload
	uploaded_file = st.file_uploader("Upload heart.csv file", type=['csv'])

	if uploaded_file is not None:
	# Process the file and get results
	excel_file, preview_df = calculate_importances(uploaded_file)

	# Display a preview of the DataFrame
	st.write("Feature Importances (Preview):")
	st.dataframe(preview_df)

	# Provide a link to download the Excel file
	with open(excel_file, "rb") as file:
	btn = st.download_button(
	label="Download Excel File",
	data=file,
	file_name=excel_file,
	mime="application/vnd.ms-excel"
	)