mlonline / app.py
tjwrld's picture
Update app.py
4943d49 verified
import os
import time
import warnings
from sklearn import metrics, preprocessing
from sklearn.calibration import LabelEncoder
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer,TfidfVectorizer
from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix
from sklearn import tree
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn import svm
from sklearn.naive_bayes import GaussianNB
from joblib import dump, load
from matplotlib import pyplot as plt
from sklearn.tree import plot_tree
import sweetviz as sv
from pathlib import Path
import hashlib
import google.generativeai as genai
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer,TfidfVectorizer
from sklearn.pipeline import Pipeline
from streamlit_extras.metric_cards import style_metric_cards
from streamlit_extras.colored_header import colored_header
st.set_page_config(
page_title="Codeless-ML",
page_icon=":📈:",
layout="wide",
menu_items={
'About': "# Under Construction"
}
)
@st.experimental_fragment
def main():
# Set up Streamlit page
warnings.filterwarnings("ignore")
hide_st_style = """
<style>
MainMenu {visibility: hidden;}
footer {visibility: hidden;}
# header {visibility: hidden;}
</style>
"""
st.markdown(hide_st_style, unsafe_allow_html=True)
page_bg_img = '''
<style>
[data-testid = "stAppViewContainer"] {
background-image: url("https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRvIlgLZYLc2E7gKTaK1kETo-amo5l6mrWQlh7owYgWXw&s");
background-size: cover;
}
[data-testid = "stFileUploaderDropzone"]{
# background-image: url("https://cdn.dribbble.com/users/1076456/screenshots/17355064/media/1486af881edbacd6db74d34b8e8190f4.png?resize=1000x750&vertical=center");
background-size: cover;
}
[data-testid = "stHeader"] {
background-color : rgba(0,0,0,0);
}
[data-testid = "stToolbar"] {
right: 2rem;
}
</style>
'''
st.markdown(page_bg_img, unsafe_allow_html=True)
custom_css = '''
<style>
[data-testid = "stDataFrameResizable"] {
width: 1800px !important;
height: 320px !important;
}
</style>
'''
st.markdown(custom_css, unsafe_allow_html=True)
st.title("Codeless Machine Learning..")
colored_header(
label="Train Your Model Online",
description="We currently support csv file format only",
color_name="violet-70",
)
st.text("Add Dataset in CSV format only")
st.divider()
upload_dir = "./uploads"
if not os.path.exists(upload_dir):
os.makedirs(upload_dir)
try:
uploaded_file = st.file_uploader("Choose a CSV file",type=['csv'],accept_multiple_files=False)
except:
st.warning("Only Csv format supported")
left_column, right_column = st.columns([0.4,1])
with left_column:
st.text("Use Example Dataset")
with right_column:
on0 = st.toggle('Yes!! (drug200.csv)',key="check0")
onn1 = st.toggle('Yes!! (mentalhealth.csv)',key="checkk1")
# if on0:
# uploaded_file = ".\drug200.csv"
st.divider()
df=None
if on0:
uploaded_file = "./drug200.csv" # Adjust the path as necessary
if os.path.exists(uploaded_file): # Check if the file exists
df = pd.read_csv(uploaded_file)
st.subheader("CSV Data")
st.write(df)
else:
st.error("File 'drug200.csv' does not exist.")
elif onn1:
uploaded_file = "./mentalhealth.csv" # Adjust the path as necessary
if os.path.exists(uploaded_file): # Check if the file exists
df = pd.read_csv(uploaded_file)
st.subheader("CSV Data")
st.write(df)
else:
st.error("File 'drug200.csv' does not exist.")
else:
if uploaded_file is not None:
file_path = os.path.join(upload_dir, "new.csv")
if os.path.exists(file_path):
os.remove(file_path)
with open(file_path, "wb") as f:
f.write(uploaded_file.getbuffer())
df = pd.read_csv(file_path)
newfile = os.path.join(upload_dir, "half.csv")
if os.path.exists(newfile):
os.remove(newfile)
if len(df) > 1000:
rows_to_keep = len(df) // 12
elif len(df) > 500 or len(df)<1000:
rows_to_keep = len(df) // 8
elif len(df) < 500 or len(df) > 100:
rows_to_keep = len(df) // 5
else:
rows_to_keep = len(df) // 2
df_half = df.iloc[:rows_to_keep]
df_half.to_csv(newfile, index=False)
st.subheader("CSV Data")
st.write(df)
if uploaded_file is not None:
st.divider()
with st.container():
st.subheader("Get Ai Suggestion")
left_column, right_column = st.columns([0.4,1])
with left_column:
st.write("This will take few seconds :->")
with right_column:
agree23 = st.button('Get Suggestion',key="check23")
if agree23:
try:
api_key1 = os.getenv("OPENAI_API_KEY")
genai.configure(api_key=os.environ["OPENAI_API_KEY"])
def upload_to_gemini(path, mime_type=None):
return genai.upload_file(path, mime_type=mime_type)
def wait_for_files_active(files):
for file in files:
while file.state.name == "PROCESSING":
time.sleep(10)
file = genai.get_file(file.name)
if file.state.name != "ACTIVE":
raise Exception(f"File {file.name} failed to process")
# Generation configuration
generation_config = {
"temperature": 1,
"top_p": 0.95,
"top_k": 64,
"max_output_tokens": 5000,
"response_mime_type": "text/plain",
}
model = genai.GenerativeModel(
model_name="gemini-1.5-flash",
generation_config=generation_config,
)
if on0:
files = [upload_to_gemini(uploaded_file, mime_type="text/csv")]
wait_for_files_active(files)
elif onn1:
files = [upload_to_gemini(uploaded_file, mime_type="text/csv")]
wait_for_files_active(files)
else:
csv_path = "./uploads/half.csv"
if os.path.exists(file_path):
files = [upload_to_gemini(csv_path, mime_type="text/csv")]
wait_for_files_active(files)
chat_session = model.start_chat(
history=[
{
"role": "user",
"parts": [files[0]],
},
]
)
response = chat_session.send_message("Summarize the dataset and suggest which among these machine learning model to use.(LinearRegression,LogisticRegression,DecisionTreeClassifier,DecisionTreeRegressor,RandomForestClassifier,RandomForestRegressor,SVC,Gaussian Naive Bayes)")
with st.chat_message("assistant"):
st.write(response.text)
except Exception as e:
st.error("Oops, something went wrong here")
st.divider()
with st.container():
st.subheader("Visualise Data")
left_column, right_column = st.columns([0.4,1])
with left_column:
st.write("This will take few seconds :->")
with right_column:
agree22 = st.button('Visualise',key="check22")
if agree22:
try:
report_file = 'report.html'
if os.path.exists(report_file):
os.remove(report_file)
report = sv.analyze(df)
report.show_html(report_file, layout='vertical', open_browser=True)
st.write("Exploratory Data Analysis with Sweetviz")
with open(report_file, 'r', encoding='utf-8') as HtmlFile:
source_code = HtmlFile.read()
st.components.v1.html(source_code, height=1000, scrolling=True)
except:
st.error("Oops Something went wrong here")
st.divider()
with st.container():
left_column, right_column = st.columns(2)
with left_column:
st.subheader("Data Description:")
st.write(df.describe())
with right_column:
st.subheader("Null Values:")
null_values = df.isnull().sum()
st.write(null_values)
st.divider()
with st.container():
st.subheader("Handle Null Values if you have")
on = st.toggle('Activate feature',key="check1")
if on:
left_column, right_column,col3 = st.columns(3)
with left_column:
genre = st.selectbox(
"Select Options To Handle Null Values",
["None","dropna","fillna","ffill","bfill"]
)
if genre == "None":
None
if genre == "dropna":
df = df.dropna()
st.write(df)
if genre == "fillna":
df = df.fillna(df.mean())
st.write(df)
if genre == "ffill":
df = df.ffill()
st.write(df)
if genre == "bfill":
df = df.bfill()
st.write(df)
with right_column:
st.subheader("Null Values Now:")
null_values = df.isnull().sum()
st.write(null_values)
with col3:
st.subheader("Present Columns:")
st.dataframe(df.columns)
st.divider()
tenext = False
with st.container():
st.subheader("Turn This On If You Have Fully Text Dataset")
st.write("To Create a Simple Model choose only 1 Row")
te = st.toggle('Activate feature',key="te1")
if te:
tenext = True
def clean_text(series):
return series.str.lower().str.replace('[^\w\s]', '', regex=True)
with st.container():
left_column, right_column,col3 = st.columns(3)
with left_column:
st.subheader("Training Column 1 E.g. Questions:")
training_col1 = list(df.columns)
options2 = st.multiselect(
'Select training column (only one)',
training_col1,
key="tefirst"
)
if options2:
st.write("Training Columns:")
questions = df[options2]
# Clean the questions
cleaned_questions = clean_text(questions.astype(str).agg(' '.join, axis=1)).values.flatten()
st.write(cleaned_questions)
with right_column:
st.subheader("Training Column 2 E.g. Answers")
tel1 = [col for col in training_col1 if col not in options2]
options = st.multiselect(
'Select training column (only one)',
tel1,
key="te2"
)
if options:
st.write("Test Column:")
answers = df[options]
# Clean the answers
cleaned_answers = clean_text(answers.astype(str).agg(' '.join, axis=1)).values.flatten()
answer = cleaned_answers # Already flattened
st.write(answer)
with col3:
st.subheader("Enter n_estimators")
n_estimators = st.number_input(f"Select Value (default: 50)",value=50,min_value=10, max_value=500)
st.subheader("Random State")
randomstate = st.number_input(f"Select Value (default: 10)",value=10,min_value=1, max_value=50)
st.divider()
with st.container():
st.subheader("Select an Algorithm(Training Can Take Time!!)")
try:
options1 = st.selectbox(
'Select From DropDown',
('None', 'DecisionTreeClassifier','RandomForestClassifier','GradientBoostingClassifier')
)
st.write('You selected:', options1)
st.divider()
# Initialize variables
model = None
type2 = None
jo = False
max_depth = 3
if options1 == 'None':
pass
elif options1 == "DecisionTreeClassifier":
model = DecisionTreeClassifier(max_depth=5,random_state=randomstate)
type2 = 'classifier'
jo = True
elif options1 == "RandomForestClassifier":
model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=randomstate)
type2 = 'classifier'
jo = True
elif options1 == "GradientBoostingClassifier":
model = GradientBoostingClassifier(n_estimators=n_estimators, learning_rate=0.1, max_depth=max_depth, random_state=randomstate)
type2 = 'gbc'
Pipe = Pipeline([
('bow', CountVectorizer()),
('tfidf', TfidfTransformer()),
(type2, model)
])
J= Pipe.fit(cleaned_questions, answer)
except:
pass
if jo == True:
col1, col2 = st.columns(2)
col1.metric(label="Model trained successfully.", value=options1)
col2.metric(label="Model score:", value=J.score(cleaned_questions, answer), delta=None)
try:
st.caption("")
st.divider()
st.subheader("Predict")
with st.container():
left_column1, right_column1 = st.columns([3,1])
with left_column1:
gg = st.text_input("Enter Values Here",placeholder="Enter")
with right_column1:
bu1 = st.button("Predict")
if bu1:
prediction = Pipe.predict([gg])[0]
with st.chat_message("assistant"):
st.write(f"Model Predicted: {prediction}")
except Exception as e:
st.error("Model Not Selected Properly")
st.divider()
with st.container():
left_column, right_column = st.columns([1,1])
try:
with left_column:
st.subheader("Download Trained Model")
with right_column:
if st.button("Download"):
pass
except:
pass
st.divider()
if tenext == False:
with st.container():
st.subheader("Handle Labelled Data")
on1 = st.toggle('Activate feature',key="check2")
if on1:
left_column, right_column = st.columns(2)
with left_column:
lab = list(df.columns)
options1 = st.multiselect(
'Select only labelled columns',
lab,
key = "third"
)
if options1:
le = preprocessing.LabelEncoder()
df[options1] = df[options1].apply(le.fit_transform)
with right_column:
st.write(df)
st.divider()
#split train test
with st.container():
left_column, right_column,col3 = st.columns(3)
with left_column:
st.subheader("Training Column Names")
# Use df.columns directly as options for the multiselect widget
training_col = list(df.columns)
options2 = st.multiselect(
'Select training columns',
training_col,
key = "first"
)
# Use the selected options directly as column names
if options2:
st.write("Training Columns:")
# Display DataFrame with the selected columns
x = df[options2]
st.write(x)
with right_column:
tel = []
st.subheader("Test Column Name")
for i in training_col :
if i not in options2:
tel.append(i)
options = st.multiselect(
'Select training columns',
tel,
key="second"
)
if options:
st.write("Test Column:")
l = df[options]
y = np.ravel(l)
st.write(y)
with col3:
st.subheader("Enter Weight")
f = st.number_input(f"Select Value (default: 0.25)",value=0.25,min_value=0.1, max_value=0.9)
st.subheader("Random State")
g = st.number_input(f"Select Value (default: 3)",value=3,min_value=1, max_value=10)
st.divider()
st.subheader("Select an Algorithm")
option = st.selectbox(
'Select From DropDown',
('None','LinearRegression', 'LogisticRegression', 'DecisionTreeClassifier','DecisionTreeRegressor','RandomForestClassifier','RandomForestRegressor','SVC','Gaussian Naive Bayes'))
st.write('You selected:', option)
st.divider()
with st.container():
jo = False
try:
if f>0.1:
st.subheader("Model")
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=f,random_state=g)
if option == 'None':
pass
elif option == "DecisionTreeClassifier":
model = tree.DecisionTreeClassifier(max_depth = 5)
jo = True
elif option == "DecisionTreeRegressor":
model = tree.DecisionTreeRegressor(max_depth = 5)
jo = True
elif option == "RandomForestClassifier":
model = RandomForestClassifier(max_depth = 5)
jo = True
elif option == "RandomForestRegressor":
model = RandomForestRegressor(max_depth = 5)
jo = True
elif option == 'SVC':
model = svm.SVC()
jo = True
elif option == "Gaussian Naive Bayes":
model = GaussianNB()
jo = True
else:
model = eval(option + '()') #model name assign linear,logistic
jo = True
m = model.fit(X_train, y_train)
except:
st.error("Something went wrong We think model is not selected properly")
if jo == True:
try:
col1, col2 = st.columns(2)
col1.metric(label="Model trained successfully.", value=option)
col2.metric(label="Model score:", value=m.score(X_test, y_test), delta = None)
style_metric_cards()
st.caption("Tip: You can change the Score Using Weight")
st.divider()
st.subheader("Predict")
str1 = ", ".join(options2) #column name
st.write("Enter the following values separated by commas -->" + " " + str1)
with st.container():
left_column, right_column = st.columns([3,1])
with left_column:
g = st.text_input("Enter Values Here",placeholder="Enter")
with right_column:
if st.button("Predict"):
values = list(map(float, g.split(',')))
values_df = pd.DataFrame([values], columns=X_train.columns)
prediction = m.predict(values_df)[0]
st.write(f"Model Predicted: {prediction}")
st.divider()
with st.container():
left_column, right_column = st.columns([1,1])
with left_column:
st.subheader("Download Trained Model")
with right_column:
if st.button("Download"):
pass
except:
st.error("Something went wrong Select Parameters Correctly")
st.divider()
if __name__ == "__main__":
main()