Spaces:

varshitha22
/

Cancer_Prediction

Sleeping

App Files Files Community

Cancer_Prediction / cancer.py

varshitha22

Update cancer.py

d287fb3 verified 10 months ago

raw

history blame contribute delete

5.19 kB

	import streamlit as st
	import pandas as pd
	from sklearn.pipeline import Pipeline
	from sklearn.preprocessing import StandardScaler, OrdinalEncoder, OneHotEncoder
	from sklearn.impute import SimpleImputer
	from sklearn.compose import ColumnTransformer
	from sklearn.model_selection import train_test_split
	from sklearn.tree import DecisionTreeClassifier
	from sklearn.linear_model import LogisticRegression
	from sklearn.neighbors import KNeighborsClassifier
	from sklearn.ensemble import RandomForestClassifier
	from xgboost import XGBClassifier

	# Load dataset
	def load_data():
	return pd.read_csv('cancer_prediction_data (2).csv')

	# Data Preprocessing
	def preprocess_data(df):
	numeric = ['Age', 'Tumor_Size']
	ordinal = ['Tumor_Grade', 'Symptoms_Severity', 'Alcohol_Consumption', 'Exercise_Frequency']
	nominal = ['Gender', 'Family_History', 'Smoking_History']

	preprocess = ColumnTransformer([
	('num', Pipeline([
	('imputer', SimpleImputer(strategy='mean')),
	('scaler', StandardScaler())
	]), numeric),
	('ord', Pipeline([
	('imputer', SimpleImputer(strategy='most_frequent')),
	('encoder', OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1))
	]), ordinal),
	('nom', Pipeline([
	('imputer', SimpleImputer(strategy='most_frequent')),
	('encoder', OneHotEncoder(sparse_output=False, handle_unknown='ignore'))
	]), nominal)
	], remainder='passthrough')

	x = df.drop('Cancer_Present', axis=1)
	y = df['Cancer_Present']
	return train_test_split(x, y, test_size=0.2, random_state=23), preprocess

	# Train Model
	def train_model(x_train, y_train, preprocess, model_name):
	models = {
	'Decision Tree': DecisionTreeClassifier(),
	'Logistic Regression': LogisticRegression(),
	'KNN': KNeighborsClassifier(),
	'Random Forest': RandomForestClassifier(),
	'XGBoost': XGBClassifier()
	}
	pipeline = Pipeline([
	('preprocessor', preprocess),
	('classifier', models[model_name])
	])
	pipeline.fit(x_train, y_train)
	return pipeline

	# Streamlit UI
	st.set_page_config(page_title='Cancer Prediction App', layout='wide')

	with st.sidebar:
	st.markdown("### Select Machine Learning Model")
	model_name = st.radio("Choose a Model", ['Decision Tree', 'Logistic Regression', 'KNN', 'Random Forest', 'XGBoost'])
	if st.button("Train Model"):
	df = load_data()
	(x_train, x_test, y_train, y_test), preprocess = preprocess_data(df)
	model = train_model(x_train, y_train, preprocess, model_name)
	accuracy = model.score(x_test, y_test)
	st.session_state['trained_model'] = model
	st.session_state['x_train'] = x_train
	st.success(f"Model Trained Successfully! Accuracy: {accuracy:.2f}")

	st.title("🎗️ Cancer Prediction")

	st.markdown("""<style>.big-font {font-size:20px !important;}</style>
	<p class="big-font">Provide patient details below to predict cancer presence:</p>""", unsafe_allow_html=True)

	col1, col2 = st.columns(2)
	with col1:
	age = st.slider("Age", 18, 100, 30)
	tumor_size = st.slider("Tumor Size", 1.0, 10.0, 5.0)
	tumor_grade = st.selectbox("Tumor Grade", ['High', 'Low', 'Medium'])
	symptoms_severity = st.selectbox("Symptoms Severity", ['Mild', 'Moderate', 'Severe'])

	with col2:
	smoking_history = st.selectbox("Smoking History", ['Never Smoker', 'Former Smoker', 'Current Smoker'])
	alcohol_consumption = st.selectbox("Alcohol Consumption", ['Low','Moderate','High'])
	exercise_frequency = st.selectbox("Exercise Frequency", ['Rarely', 'Occasionally', 'Regularly','Never'])
	gender = st.selectbox("Gender", [0, 1])
	family_history = st.selectbox("Family History", ["No", "Yes"])

	input_data = [[age, tumor_size, tumor_grade, symptoms_severity, smoking_history,
	alcohol_consumption, exercise_frequency, gender, family_history]]

	if st.button("Predict Cancer Presence"):
	if 'trained_model' in st.session_state:
	model = st.session_state['trained_model']
	x_train = st.session_state['x_train']

	# Create DataFrame for input
	input_df = pd.DataFrame(input_data, columns=x_train.columns)

	# Convert numeric inputs explicitly to float
	for col in ['Age', 'Tumor_Size']:
	input_df[col] = pd.to_numeric(input_df[col], errors='coerce')

	# Apply preprocessing
	input_transformed = model.named_steps['preprocessor'].transform(input_df)

	# Make prediction
	prediction = model.named_steps['classifier'].predict(input_transformed)

	if prediction[0] == 1:
	st.markdown("<h3 style='color: red;'>Cancer Prediction: Positive 🟥</h3>", unsafe_allow_html=True)
	st.write("Unfortunately, the model predicts the presence of cancer. Please consult a doctor for further advice.")
	else:
	st.markdown("<h3 style='color: green;'>Cancer Prediction: Negative 🟩</h3>", unsafe_allow_html=True)
	st.write("Good news! The model predicts that there is no cancer detected. Stay healthy!")
	else:
	st.error("Please train a model first!")