Spaces:

Rajusunnam
/

Cancer_Predictions

Sleeping

App Files Files Community

Cancer_Predictions / app.py

Rajusunnam

Rename app (2).py to app.py

cf95905 verified about 1 year ago

raw

history blame contribute delete

4 kB

	import streamlit as st
	import pandas as pd
	import warnings
	from sklearn.pipeline import Pipeline
	from sklearn.svm import SVC
	from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
	from sklearn.model_selection import train_test_split
	from sklearn.impute import SimpleImputer
	from sklearn.preprocessing import StandardScaler, OneHotEncoder
	from sklearn.compose import ColumnTransformer

	# Filter out warnings
	warnings.filterwarnings("ignore")

	st.title("Cancer Prediction App")
	st.markdown("### Powered by Innomatics Research Lab")

	# Load dataset
	@st.cache_data
	def load_data():
	return pd.read_csv('cancer_prediction_data (2).csv')

	data = load_data()

	# Ensure target column exists
	target_col = 'Cancer_Present'
	if target_col not in data.columns:
	st.error(f"Target column '{target_col}' not found in data!")
	st.stop()

	# Split features and target
	X = data.drop(columns=[target_col])
	y = data[target_col]

	# Define feature categories
	numerical_features = ['Age', 'Tumor_Size']
	categorical_features = ['Gender', 'Tumor_Grade', 'Symptoms_Severity', 'Family_History',
	'Smoking_History', 'Alcohol_Consumption', 'Exercise_Frequency']

	# Preprocessing pipeline
	def create_preprocessing_pipeline():
	numerical_pipeline = Pipeline([
	('imputer', SimpleImputer(strategy='mean')),
	('scaler', StandardScaler())
	])
	categorical_pipeline = Pipeline([
	('imputer', SimpleImputer(strategy='most_frequent')),
	('encoder', OneHotEncoder(handle_unknown='ignore'))
	])
	return ColumnTransformer([
	('num', numerical_pipeline, numerical_features),
	('cat', categorical_pipeline, categorical_features)
	])

	preprocess = create_preprocessing_pipeline()

	# Sidebar - Select Algorithm
	st.sidebar.header("Model Selection")
	algorithm = st.sidebar.radio("Choose an Algorithm", ["SVM", "Random Forest", "Gradient Boosting"])

	# Train different models
	model_dict = {
	"SVM": SVC(),
	"Random Forest": RandomForestClassifier(),
	"Gradient Boosting": GradientBoostingClassifier()
	}

	# Create pipeline
	pipeline = Pipeline([
	('preprocessing', preprocess),
	('classifier', model_dict[algorithm])
	])

	# Train model
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
	pipeline.fit(X_train, y_train)
	accuracy = pipeline.score(X_test, y_test)
	st.sidebar.write(f"{algorithm} Accuracy: {accuracy * 100:.2f}%")

	# Sidebar - User input
	def user_input_features():
	age = st.sidebar.slider("Age", 0, 120, 50)
	tumor_size = st.sidebar.slider("Tumor Size", 0.0, 100.0, 5.0)
	gender = st.sidebar.selectbox("Gender", ["Male", "Female"])
	tumor_grade = st.sidebar.selectbox("Tumor Grade", ["Low", "Medium", "High"])
	symptoms_severity = st.sidebar.selectbox("Symptoms Severity", ["Mild", "Moderate", "Severe"])
	family_history = st.sidebar.selectbox("Family History", ["Yes", "No"])
	smoking_history = st.sidebar.selectbox("Smoking History", ["Current Smoker", "Non-Smoker"])
	alcohol_consumption = st.sidebar.selectbox("Alcohol Consumption", ["Low", "Moderate", "High"])
	exercise_frequency = st.sidebar.selectbox("Exercise Frequency", ["Never", "Rarely", "Occasionally", "Often"])
	return pd.DataFrame({
	'Age': [age],
	'Tumor_Size': [tumor_size],
	'Gender': [gender],
	'Tumor_Grade': [tumor_grade],
	'Symptoms_Severity': [symptoms_severity],
	'Family_History': [family_history],
	'Smoking_History': [smoking_history],
	'Alcohol_Consumption': [alcohol_consumption],
	'Exercise_Frequency': [exercise_frequency]
	})

	st.sidebar.markdown("### Patient Data Input")
	input_df = user_input_features()
	st.subheader("User Input Data")
	st.write(input_df)

	# Prediction
	if st.button("Predict Cancer Presence"):
	prediction = pipeline.predict(input_df)
	result = "Cancer Detected" if prediction[0] == 1 else "No Cancer Detected"
	st.subheader(f"### Prediction: {result}")