Spaces:

prahalya
/

Cancer_Prediction

Sleeping

App Files Files Community

Cancer_Prediction / app.py

prahalya

Upload 3 files

b9ef127 verified 10 months ago

raw

history blame contribute delete

4.3 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	from sklearn.pipeline import Pipeline
	from sklearn.preprocessing import StandardScaler, OrdinalEncoder, OneHotEncoder
	from sklearn.impute import SimpleImputer
	from sklearn.compose import ColumnTransformer
	from sklearn.model_selection import train_test_split
	from sklearn.tree import DecisionTreeClassifier
	from sklearn.svm import SVC
	from sklearn.linear_model import LogisticRegression
	from sklearn.neighbors import KNeighborsClassifier

	# Load dataset
	def load_data():
	df = pd.read_csv('cancer_prediction_data (2).csv')
	return df

	# Data Preprocessing
	def preprocess_data(df):
	numeric = ['Age', 'Tumor_Size']
	ordinal = ['Tumor_Grade', 'Symptoms_Severity', 'Alcohol_Consumption', 'Exercise_Frequency']
	nominal = ['Gender', 'Family_History', 'Smoking_History']

	# Pipelines
	numeric_preprocess = Pipeline([
	('imputer', SimpleImputer(strategy='mean')),
	('scaler', StandardScaler())
	])
	ordinal_preprocess = Pipeline([
	('imputer', SimpleImputer(strategy='most_frequent')),
	('encoder', OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1))
	])
	nominal_preprocess = Pipeline([
	('imputer', SimpleImputer(strategy='most_frequent')),
	('encoder', OneHotEncoder(sparse_output=False, handle_unknown='ignore'))
	])

	# Column Transformer
	preprocess = ColumnTransformer([
	('num', numeric_preprocess, numeric),
	('ord', ordinal_preprocess, ordinal),
	('nom', nominal_preprocess, nominal)
	], remainder='passthrough')

	X = df.drop('Cancer_Present', axis=1)
	y = df['Cancer_Present']
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=23)
	return X_train, X_test, y_train, y_test, preprocess

	# Train Models
	def train_model(X_train, y_train, preprocess, model_name):
	models = {
	'Decision Tree': DecisionTreeClassifier(),
	'SVM': SVC(),
	'Logistic Regression': LogisticRegression(),
	'KNN': KNeighborsClassifier()
	}

	model = models[model_name]
	pipeline = Pipeline([
	('preprocessor', preprocess),
	('classifier', model)
	])
	pipeline.fit(X_train, y_train)
	return pipeline

	# Streamlit UI
	st.title("Cancer Prediction Using Machine Learning")
	df = load_data()
	X_train, X_test, y_train, y_test, preprocess = preprocess_data(df)

	model_name = st.selectbox("Select Model", ['Decision Tree', 'SVM', 'Logistic Regression', 'KNN'])

	if st.button("Train Model"):
	model = train_model(X_train, y_train, preprocess, model_name)
	accuracy = model.score(X_test, y_test)
	st.write(f"Model Accuracy: {accuracy:.2f}")
	st.session_state['trained_model'] = model
	st.success("Model trained successfully!")

	# Prediction Section
	st.header("Make a Prediction")
	age = st.number_input("Age", min_value=18, max_value=100, value=30)
	tumor_size = st.number_input("Tumor Size", min_value=1.0, max_value=10.0, value=5.0)
	tumor_grade = st.selectbox("Tumor Grade", [1, 2, 3])
	symptoms_severity = st.selectbox("Symptoms Severity", [1, 2, 3])
	smoking_history = st.selectbox("Smoking History", [0, 1, 2])
	alcohol_consumption = st.selectbox("Alcohol Consumption", [0, 1, 2, 3])
	exercise_frequency = st.selectbox("Exercise Frequency", [0, 1, 2, 3])
	gender = st.selectbox("Gender", [0, 1])
	family_history = st.selectbox("Family History", [0, 1])

	input_data = [[age, tumor_size, tumor_grade, symptoms_severity, smoking_history,
	alcohol_consumption, exercise_frequency, gender, family_history]]

	if st.button("Predict Cancer Presence"):
	if 'trained_model' in st.session_state:
	model = st.session_state['trained_model']
	input_df = pd.DataFrame(input_data, columns=X_train.columns)

	# Transform input data using the same preprocessor
	input_transformed = model.named_steps['preprocessor'].transform(input_df)
	prediction = model.named_steps['classifier'].predict(input_transformed)

	st.write("Cancer Prediction:", "Positive" if prediction[0] == 1 else "Negative")
	else:
	st.error("Please train a model first!")