Spaces:

tjwrld
/

mlonline

Sleeping

App Files Files Community

mlonline / app.py

tjwrld

Update app.py

4943d49 verified over 1 year ago

raw

history blame contribute delete

25.2 kB

	import os
	import time
	import warnings
	from sklearn import metrics, preprocessing
	from sklearn.calibration import LabelEncoder
	import streamlit as st
	import pandas as pd
	import numpy as np
	from sklearn.model_selection import train_test_split
	from sklearn.linear_model import LinearRegression
	from sklearn.linear_model import LogisticRegression
	from sklearn.feature_extraction.text import CountVectorizer
	from sklearn.feature_extraction.text import TfidfTransformer,TfidfVectorizer
	from sklearn.pipeline import Pipeline
	from sklearn.tree import DecisionTreeClassifier
	from sklearn.metrics import confusion_matrix
	from sklearn import tree
	from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
	from sklearn.ensemble import RandomForestRegressor
	from sklearn import svm
	from sklearn.naive_bayes import GaussianNB
	from joblib import dump, load
	from matplotlib import pyplot as plt
	from sklearn.tree import plot_tree
	import sweetviz as sv
	from pathlib import Path
	import hashlib
	import google.generativeai as genai
	from sklearn.feature_extraction.text import CountVectorizer
	from sklearn.feature_extraction.text import TfidfTransformer,TfidfVectorizer
	from sklearn.pipeline import Pipeline
	from streamlit_extras.metric_cards import style_metric_cards
	from streamlit_extras.colored_header import colored_header

	st.set_page_config(
	page_title="Codeless-ML",
	page_icon=":📈:",
	layout="wide",
	menu_items={
	'About': "# Under Construction"
	}
	)
	@st.experimental_fragment
	def main():
	# Set up Streamlit page
	warnings.filterwarnings("ignore")
	hide_st_style = """
	<style>
	MainMenu {visibility: hidden;}
	footer {visibility: hidden;}
	# header {visibility: hidden;}
	</style>
	"""
	st.markdown(hide_st_style, unsafe_allow_html=True)

	page_bg_img = '''
	<style>
	[data-testid = "stAppViewContainer"] {
	background-image: url("https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRvIlgLZYLc2E7gKTaK1kETo-amo5l6mrWQlh7owYgWXw&s");
	background-size: cover;
	}
	[data-testid = "stFileUploaderDropzone"]{
	# background-image: url("https://cdn.dribbble.com/users/1076456/screenshots/17355064/media/1486af881edbacd6db74d34b8e8190f4.png?resize=1000x750&vertical=center");
	background-size: cover;
	}
	[data-testid = "stHeader"] {
	background-color : rgba(0,0,0,0);
	}
	[data-testid = "stToolbar"] {
	right: 2rem;
	}
	</style>
	'''

	st.markdown(page_bg_img, unsafe_allow_html=True)


	custom_css = '''
	<style>
	[data-testid = "stDataFrameResizable"] {
	width: 1800px !important;
	height: 320px !important;
	}
	</style>
	'''

	st.markdown(custom_css, unsafe_allow_html=True)

	st.title("Codeless Machine Learning..")
	colored_header(
	label="Train Your Model Online",
	description="We currently support csv file format only",
	color_name="violet-70",
	)

	st.text("Add Dataset in CSV format only")

	st.divider()

	upload_dir = "./uploads"
	if not os.path.exists(upload_dir):
	os.makedirs(upload_dir)

	try:
	uploaded_file = st.file_uploader("Choose a CSV file",type=['csv'],accept_multiple_files=False)
	except:
	st.warning("Only Csv format supported")

	left_column, right_column = st.columns([0.4,1])
	with left_column:
	st.text("Use Example Dataset")
	with right_column:
	on0 = st.toggle('Yes!! (drug200.csv)',key="check0")
	onn1 = st.toggle('Yes!! (mentalhealth.csv)',key="checkk1")
	# if on0:
	# uploaded_file = ".\drug200.csv"
	st.divider()
	df=None
	if on0:
	uploaded_file = "./drug200.csv" # Adjust the path as necessary
	if os.path.exists(uploaded_file): # Check if the file exists
	df = pd.read_csv(uploaded_file)
	st.subheader("CSV Data")
	st.write(df)
	else:
	st.error("File 'drug200.csv' does not exist.")
	elif onn1:
	uploaded_file = "./mentalhealth.csv" # Adjust the path as necessary
	if os.path.exists(uploaded_file): # Check if the file exists
	df = pd.read_csv(uploaded_file)
	st.subheader("CSV Data")
	st.write(df)
	else:
	st.error("File 'drug200.csv' does not exist.")
	else:
	if uploaded_file is not None:
	file_path = os.path.join(upload_dir, "new.csv")

	if os.path.exists(file_path):
	os.remove(file_path)

	with open(file_path, "wb") as f:
	f.write(uploaded_file.getbuffer())

	df = pd.read_csv(file_path)

	newfile = os.path.join(upload_dir, "half.csv")
	if os.path.exists(newfile):
	os.remove(newfile)

	if len(df) > 1000:
	rows_to_keep = len(df) // 12
	elif len(df) > 500 or len(df)<1000:
	rows_to_keep = len(df) // 8
	elif len(df) < 500 or len(df) > 100:
	rows_to_keep = len(df) // 5
	else:
	rows_to_keep = len(df) // 2

	df_half = df.iloc[:rows_to_keep]
	df_half.to_csv(newfile, index=False)

	st.subheader("CSV Data")
	st.write(df)

	if uploaded_file is not None:
	st.divider()
	with st.container():
	st.subheader("Get Ai Suggestion")
	left_column, right_column = st.columns([0.4,1])
	with left_column:
	st.write("This will take few seconds :->")
	with right_column:
	agree23 = st.button('Get Suggestion',key="check23")

	if agree23:
	try:
	api_key1 = os.getenv("OPENAI_API_KEY")
	genai.configure(api_key=os.environ["OPENAI_API_KEY"])

	def upload_to_gemini(path, mime_type=None):
	return genai.upload_file(path, mime_type=mime_type)

	def wait_for_files_active(files):
	for file in files:
	while file.state.name == "PROCESSING":
	time.sleep(10)
	file = genai.get_file(file.name)
	if file.state.name != "ACTIVE":
	raise Exception(f"File {file.name} failed to process")

	# Generation configuration
	generation_config = {
	"temperature": 1,
	"top_p": 0.95,
	"top_k": 64,
	"max_output_tokens": 5000,
	"response_mime_type": "text/plain",
	}

	model = genai.GenerativeModel(
	model_name="gemini-1.5-flash",
	generation_config=generation_config,
	)
	if on0:
	files = [upload_to_gemini(uploaded_file, mime_type="text/csv")]
	wait_for_files_active(files)
	elif onn1:
	files = [upload_to_gemini(uploaded_file, mime_type="text/csv")]
	wait_for_files_active(files)
	else:
	csv_path = "./uploads/half.csv"
	if os.path.exists(file_path):
	files = [upload_to_gemini(csv_path, mime_type="text/csv")]
	wait_for_files_active(files)

	chat_session = model.start_chat(
	history=[
	{
	"role": "user",
	"parts": [files[0]],
	},
	]
	)
	response = chat_session.send_message("Summarize the dataset and suggest which among these machine learning model to use.(LinearRegression,LogisticRegression,DecisionTreeClassifier,DecisionTreeRegressor,RandomForestClassifier,RandomForestRegressor,SVC,Gaussian Naive Bayes)")
	with st.chat_message("assistant"):
	st.write(response.text)
	except Exception as e:
	st.error("Oops, something went wrong here")

	st.divider()

	with st.container():

	st.subheader("Visualise Data")
	left_column, right_column = st.columns([0.4,1])
	with left_column:
	st.write("This will take few seconds :->")
	with right_column:
	agree22 = st.button('Visualise',key="check22")

	if agree22:
	try:
	report_file = 'report.html'

	if os.path.exists(report_file):
	os.remove(report_file)

	report = sv.analyze(df)
	report.show_html(report_file, layout='vertical', open_browser=True)

	st.write("Exploratory Data Analysis with Sweetviz")
	with open(report_file, 'r', encoding='utf-8') as HtmlFile:
	source_code = HtmlFile.read()
	st.components.v1.html(source_code, height=1000, scrolling=True)
	except:
	st.error("Oops Something went wrong here")

	st.divider()

	with st.container():
	left_column, right_column = st.columns(2)
	with left_column:
	st.subheader("Data Description:")
	st.write(df.describe())

	with right_column:
	st.subheader("Null Values:")
	null_values = df.isnull().sum()
	st.write(null_values)


	st.divider()
	with st.container():
	st.subheader("Handle Null Values if you have")
	on = st.toggle('Activate feature',key="check1")
	if on:
	left_column, right_column,col3 = st.columns(3)
	with left_column:

	genre = st.selectbox(
	"Select Options To Handle Null Values",
	["None","dropna","fillna","ffill","bfill"]
	)
	if genre == "None":
	None
	if genre == "dropna":
	df = df.dropna()
	st.write(df)
	if genre == "fillna":
	df = df.fillna(df.mean())
	st.write(df)
	if genre == "ffill":
	df = df.ffill()
	st.write(df)
	if genre == "bfill":
	df = df.bfill()
	st.write(df)

	with right_column:
	st.subheader("Null Values Now:")
	null_values = df.isnull().sum()
	st.write(null_values)
	with col3:
	st.subheader("Present Columns:")
	st.dataframe(df.columns)

	st.divider()

	tenext = False
	with st.container():
	st.subheader("Turn This On If You Have Fully Text Dataset")
	st.write("To Create a Simple Model choose only 1 Row")
	te = st.toggle('Activate feature',key="te1")
	if te:
	tenext = True
	def clean_text(series):
	return series.str.lower().str.replace('[^\w\s]', '', regex=True)

	with st.container():
	left_column, right_column,col3 = st.columns(3)

	with left_column:
	st.subheader("Training Column 1 E.g. Questions:")
	training_col1 = list(df.columns)
	options2 = st.multiselect(
	'Select training column (only one)',
	training_col1,
	key="tefirst"
	)

	if options2:
	st.write("Training Columns:")
	questions = df[options2]
	# Clean the questions
	cleaned_questions = clean_text(questions.astype(str).agg(' '.join, axis=1)).values.flatten()
	st.write(cleaned_questions)

	with right_column:
	st.subheader("Training Column 2 E.g. Answers")
	tel1 = [col for col in training_col1 if col not in options2]

	options = st.multiselect(
	'Select training column (only one)',
	tel1,
	key="te2"
	)

	if options:
	st.write("Test Column:")
	answers = df[options]
	# Clean the answers
	cleaned_answers = clean_text(answers.astype(str).agg(' '.join, axis=1)).values.flatten()
	answer = cleaned_answers # Already flattened
	st.write(answer)
	with col3:
	st.subheader("Enter n_estimators")

	n_estimators = st.number_input(f"Select Value (default: 50)",value=50,min_value=10, max_value=500)

	st.subheader("Random State")
	randomstate = st.number_input(f"Select Value (default: 10)",value=10,min_value=1, max_value=50)

	st.divider()

	with st.container():
	st.subheader("Select an Algorithm(Training Can Take Time!!)")
	try:
	options1 = st.selectbox(
	'Select From DropDown',
	('None', 'DecisionTreeClassifier','RandomForestClassifier','GradientBoostingClassifier')
	)

	st.write('You selected:', options1)
	st.divider()

	# Initialize variables
	model = None
	type2 = None
	jo = False
	max_depth = 3

	if options1 == 'None':
	pass
	elif options1 == "DecisionTreeClassifier":
	model = DecisionTreeClassifier(max_depth=5,random_state=randomstate)
	type2 = 'classifier'
	jo = True
	elif options1 == "RandomForestClassifier":
	model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=randomstate)
	type2 = 'classifier'
	jo = True
	elif options1 == "GradientBoostingClassifier":
	model = GradientBoostingClassifier(n_estimators=n_estimators, learning_rate=0.1, max_depth=max_depth, random_state=randomstate)
	type2 = 'gbc'
	Pipe = Pipeline([
	('bow', CountVectorizer()),
	('tfidf', TfidfTransformer()),
	(type2, model)
	])
	J= Pipe.fit(cleaned_questions, answer)
	except:
	pass


	if jo == True:
	col1, col2 = st.columns(2)
	col1.metric(label="Model trained successfully.", value=options1)
	col2.metric(label="Model score:", value=J.score(cleaned_questions, answer), delta=None)
	try:
	st.caption("")
	st.divider()
	st.subheader("Predict")
	with st.container():
	left_column1, right_column1 = st.columns([3,1])
	with left_column1:
	gg = st.text_input("Enter Values Here",placeholder="Enter")
	with right_column1:
	bu1 = st.button("Predict")
	if bu1:
	prediction = Pipe.predict([gg])[0]
	with st.chat_message("assistant"):
	st.write(f"Model Predicted: {prediction}")
	except Exception as e:
	st.error("Model Not Selected Properly")

	st.divider()

	with st.container():
	left_column, right_column = st.columns([1,1])
	try:
	with left_column:
	st.subheader("Download Trained Model")
	with right_column:
	if st.button("Download"):
	pass
	except:
	pass
	st.divider()


	if tenext == False:

	with st.container():
	st.subheader("Handle Labelled Data")
	on1 = st.toggle('Activate feature',key="check2")
	if on1:
	left_column, right_column = st.columns(2)
	with left_column:
	lab = list(df.columns)
	options1 = st.multiselect(
	'Select only labelled columns',
	lab,
	key = "third"
	)
	if options1:
	le = preprocessing.LabelEncoder()
	df[options1] = df[options1].apply(le.fit_transform)
	with right_column:
	st.write(df)
	st.divider()
	#split train test
	with st.container():
	left_column, right_column,col3 = st.columns(3)
	with left_column:
	st.subheader("Training Column Names")
	# Use df.columns directly as options for the multiselect widget
	training_col = list(df.columns)
	options2 = st.multiselect(
	'Select training columns',
	training_col,
	key = "first"
	)

	# Use the selected options directly as column names
	if options2:
	st.write("Training Columns:")
	# Display DataFrame with the selected columns
	x = df[options2]
	st.write(x)

	with right_column:
	tel = []
	st.subheader("Test Column Name")
	for i in training_col :
	if i not in options2:
	tel.append(i)
	options = st.multiselect(
	'Select training columns',
	tel,
	key="second"
	)
	if options:
	st.write("Test Column:")
	l = df[options]
	y = np.ravel(l)
	st.write(y)

	with col3:
	st.subheader("Enter Weight")

	f = st.number_input(f"Select Value (default: 0.25)",value=0.25,min_value=0.1, max_value=0.9)

	st.subheader("Random State")
	g = st.number_input(f"Select Value (default: 3)",value=3,min_value=1, max_value=10)

	st.divider()
	st.subheader("Select an Algorithm")
	option = st.selectbox(
	'Select From DropDown',
	('None','LinearRegression', 'LogisticRegression', 'DecisionTreeClassifier','DecisionTreeRegressor','RandomForestClassifier','RandomForestRegressor','SVC','Gaussian Naive Bayes'))

	st.write('You selected:', option)

	st.divider()

	with st.container():
	jo = False
	try:
	if f>0.1:
	st.subheader("Model")

	X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=f,random_state=g)

	if option == 'None':
	pass
	elif option == "DecisionTreeClassifier":
	model = tree.DecisionTreeClassifier(max_depth = 5)
	jo = True
	elif option == "DecisionTreeRegressor":
	model = tree.DecisionTreeRegressor(max_depth = 5)
	jo = True
	elif option == "RandomForestClassifier":
	model = RandomForestClassifier(max_depth = 5)
	jo = True
	elif option == "RandomForestRegressor":
	model = RandomForestRegressor(max_depth = 5)
	jo = True
	elif option == 'SVC':
	model = svm.SVC()
	jo = True
	elif option == "Gaussian Naive Bayes":
	model = GaussianNB()
	jo = True
	else:
	model = eval(option + '()') #model name assign linear,logistic
	jo = True

	m = model.fit(X_train, y_train)
	except:
	st.error("Something went wrong We think model is not selected properly")

	if jo == True:
	try:
	col1, col2 = st.columns(2)
	col1.metric(label="Model trained successfully.", value=option)
	col2.metric(label="Model score:", value=m.score(X_test, y_test), delta = None)
	style_metric_cards()
	st.caption("Tip: You can change the Score Using Weight")

	st.divider()

	st.subheader("Predict")
	str1 = ", ".join(options2) #column name
	st.write("Enter the following values separated by commas -->" + " " + str1)
	with st.container():
	left_column, right_column = st.columns([3,1])
	with left_column:
	g = st.text_input("Enter Values Here",placeholder="Enter")
	with right_column:
	if st.button("Predict"):
	values = list(map(float, g.split(',')))
	values_df = pd.DataFrame([values], columns=X_train.columns)
	prediction = m.predict(values_df)[0]
	st.write(f"Model Predicted: {prediction}")

	st.divider()
	with st.container():
	left_column, right_column = st.columns([1,1])
	with left_column:
	st.subheader("Download Trained Model")
	with right_column:
	if st.button("Download"):
	pass

	except:
	st.error("Something went wrong Select Parameters Correctly")

	st.divider()


	if __name__ == "__main__":
	main()