import os
import time
import warnings
from sklearn import metrics, preprocessing
from sklearn.calibration import LabelEncoder
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer,TfidfVectorizer
from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix
from sklearn import tree
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn import svm
from sklearn.naive_bayes import GaussianNB
from joblib import dump, load
from matplotlib import pyplot as plt
from sklearn.tree import plot_tree
import sweetviz as sv
from pathlib import Path
import hashlib
import google.generativeai as genai
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer,TfidfVectorizer
from sklearn.pipeline import Pipeline
from streamlit_extras.metric_cards import style_metric_cards
from streamlit_extras.colored_header import colored_header
st.set_page_config(
page_title="Codeless-ML",
page_icon=":📈:",
layout="wide",
menu_items={
'About': "# Under Construction"
}
)
@st.experimental_fragment
def main():
# Set up Streamlit page
warnings.filterwarnings("ignore")
hide_st_style = """
"""
st.markdown(hide_st_style, unsafe_allow_html=True)
page_bg_img = '''
'''
st.markdown(page_bg_img, unsafe_allow_html=True)
custom_css = '''
'''
st.markdown(custom_css, unsafe_allow_html=True)
st.title("Codeless Machine Learning..")
colored_header(
label="Train Your Model Online",
description="We currently support csv file format only",
color_name="violet-70",
)
st.text("Add Dataset in CSV format only")
st.divider()
upload_dir = "./uploads"
if not os.path.exists(upload_dir):
os.makedirs(upload_dir)
try:
uploaded_file = st.file_uploader("Choose a CSV file",type=['csv'],accept_multiple_files=False)
except:
st.warning("Only Csv format supported")
left_column, right_column = st.columns([0.4,1])
with left_column:
st.text("Use Example Dataset")
with right_column:
on0 = st.toggle('Yes!! (drug200.csv)',key="check0")
onn1 = st.toggle('Yes!! (mentalhealth.csv)',key="checkk1")
# if on0:
# uploaded_file = ".\drug200.csv"
st.divider()
df=None
if on0:
uploaded_file = "./drug200.csv" # Adjust the path as necessary
if os.path.exists(uploaded_file): # Check if the file exists
df = pd.read_csv(uploaded_file)
st.subheader("CSV Data")
st.write(df)
else:
st.error("File 'drug200.csv' does not exist.")
elif onn1:
uploaded_file = "./mentalhealth.csv" # Adjust the path as necessary
if os.path.exists(uploaded_file): # Check if the file exists
df = pd.read_csv(uploaded_file)
st.subheader("CSV Data")
st.write(df)
else:
st.error("File 'drug200.csv' does not exist.")
else:
if uploaded_file is not None:
file_path = os.path.join(upload_dir, "new.csv")
if os.path.exists(file_path):
os.remove(file_path)
with open(file_path, "wb") as f:
f.write(uploaded_file.getbuffer())
df = pd.read_csv(file_path)
newfile = os.path.join(upload_dir, "half.csv")
if os.path.exists(newfile):
os.remove(newfile)
if len(df) > 1000:
rows_to_keep = len(df) // 12
elif len(df) > 500 or len(df)<1000:
rows_to_keep = len(df) // 8
elif len(df) < 500 or len(df) > 100:
rows_to_keep = len(df) // 5
else:
rows_to_keep = len(df) // 2
df_half = df.iloc[:rows_to_keep]
df_half.to_csv(newfile, index=False)
st.subheader("CSV Data")
st.write(df)
if uploaded_file is not None:
st.divider()
with st.container():
st.subheader("Get Ai Suggestion")
left_column, right_column = st.columns([0.4,1])
with left_column:
st.write("This will take few seconds :->")
with right_column:
agree23 = st.button('Get Suggestion',key="check23")
if agree23:
try:
api_key1 = os.getenv("OPENAI_API_KEY")
genai.configure(api_key=os.environ["OPENAI_API_KEY"])
def upload_to_gemini(path, mime_type=None):
return genai.upload_file(path, mime_type=mime_type)
def wait_for_files_active(files):
for file in files:
while file.state.name == "PROCESSING":
time.sleep(10)
file = genai.get_file(file.name)
if file.state.name != "ACTIVE":
raise Exception(f"File {file.name} failed to process")
# Generation configuration
generation_config = {
"temperature": 1,
"top_p": 0.95,
"top_k": 64,
"max_output_tokens": 5000,
"response_mime_type": "text/plain",
}
model = genai.GenerativeModel(
model_name="gemini-1.5-flash",
generation_config=generation_config,
)
if on0:
files = [upload_to_gemini(uploaded_file, mime_type="text/csv")]
wait_for_files_active(files)
elif onn1:
files = [upload_to_gemini(uploaded_file, mime_type="text/csv")]
wait_for_files_active(files)
else:
csv_path = "./uploads/half.csv"
if os.path.exists(file_path):
files = [upload_to_gemini(csv_path, mime_type="text/csv")]
wait_for_files_active(files)
chat_session = model.start_chat(
history=[
{
"role": "user",
"parts": [files[0]],
},
]
)
response = chat_session.send_message("Summarize the dataset and suggest which among these machine learning model to use.(LinearRegression,LogisticRegression,DecisionTreeClassifier,DecisionTreeRegressor,RandomForestClassifier,RandomForestRegressor,SVC,Gaussian Naive Bayes)")
with st.chat_message("assistant"):
st.write(response.text)
except Exception as e:
st.error("Oops, something went wrong here")
st.divider()
with st.container():
st.subheader("Visualise Data")
left_column, right_column = st.columns([0.4,1])
with left_column:
st.write("This will take few seconds :->")
with right_column:
agree22 = st.button('Visualise',key="check22")
if agree22:
try:
report_file = 'report.html'
if os.path.exists(report_file):
os.remove(report_file)
report = sv.analyze(df)
report.show_html(report_file, layout='vertical', open_browser=True)
st.write("Exploratory Data Analysis with Sweetviz")
with open(report_file, 'r', encoding='utf-8') as HtmlFile:
source_code = HtmlFile.read()
st.components.v1.html(source_code, height=1000, scrolling=True)
except:
st.error("Oops Something went wrong here")
st.divider()
with st.container():
left_column, right_column = st.columns(2)
with left_column:
st.subheader("Data Description:")
st.write(df.describe())
with right_column:
st.subheader("Null Values:")
null_values = df.isnull().sum()
st.write(null_values)
st.divider()
with st.container():
st.subheader("Handle Null Values if you have")
on = st.toggle('Activate feature',key="check1")
if on:
left_column, right_column,col3 = st.columns(3)
with left_column:
genre = st.selectbox(
"Select Options To Handle Null Values",
["None","dropna","fillna","ffill","bfill"]
)
if genre == "None":
None
if genre == "dropna":
df = df.dropna()
st.write(df)
if genre == "fillna":
df = df.fillna(df.mean())
st.write(df)
if genre == "ffill":
df = df.ffill()
st.write(df)
if genre == "bfill":
df = df.bfill()
st.write(df)
with right_column:
st.subheader("Null Values Now:")
null_values = df.isnull().sum()
st.write(null_values)
with col3:
st.subheader("Present Columns:")
st.dataframe(df.columns)
st.divider()
tenext = False
with st.container():
st.subheader("Turn This On If You Have Fully Text Dataset")
st.write("To Create a Simple Model choose only 1 Row")
te = st.toggle('Activate feature',key="te1")
if te:
tenext = True
def clean_text(series):
return series.str.lower().str.replace('[^\w\s]', '', regex=True)
with st.container():
left_column, right_column,col3 = st.columns(3)
with left_column:
st.subheader("Training Column 1 E.g. Questions:")
training_col1 = list(df.columns)
options2 = st.multiselect(
'Select training column (only one)',
training_col1,
key="tefirst"
)
if options2:
st.write("Training Columns:")
questions = df[options2]
# Clean the questions
cleaned_questions = clean_text(questions.astype(str).agg(' '.join, axis=1)).values.flatten()
st.write(cleaned_questions)
with right_column:
st.subheader("Training Column 2 E.g. Answers")
tel1 = [col for col in training_col1 if col not in options2]
options = st.multiselect(
'Select training column (only one)',
tel1,
key="te2"
)
if options:
st.write("Test Column:")
answers = df[options]
# Clean the answers
cleaned_answers = clean_text(answers.astype(str).agg(' '.join, axis=1)).values.flatten()
answer = cleaned_answers # Already flattened
st.write(answer)
with col3:
st.subheader("Enter n_estimators")
n_estimators = st.number_input(f"Select Value (default: 50)",value=50,min_value=10, max_value=500)
st.subheader("Random State")
randomstate = st.number_input(f"Select Value (default: 10)",value=10,min_value=1, max_value=50)
st.divider()
with st.container():
st.subheader("Select an Algorithm(Training Can Take Time!!)")
try:
options1 = st.selectbox(
'Select From DropDown',
('None', 'DecisionTreeClassifier','RandomForestClassifier','GradientBoostingClassifier')
)
st.write('You selected:', options1)
st.divider()
# Initialize variables
model = None
type2 = None
jo = False
max_depth = 3
if options1 == 'None':
pass
elif options1 == "DecisionTreeClassifier":
model = DecisionTreeClassifier(max_depth=5,random_state=randomstate)
type2 = 'classifier'
jo = True
elif options1 == "RandomForestClassifier":
model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=randomstate)
type2 = 'classifier'
jo = True
elif options1 == "GradientBoostingClassifier":
model = GradientBoostingClassifier(n_estimators=n_estimators, learning_rate=0.1, max_depth=max_depth, random_state=randomstate)
type2 = 'gbc'
Pipe = Pipeline([
('bow', CountVectorizer()),
('tfidf', TfidfTransformer()),
(type2, model)
])
J= Pipe.fit(cleaned_questions, answer)
except:
pass
if jo == True:
col1, col2 = st.columns(2)
col1.metric(label="Model trained successfully.", value=options1)
col2.metric(label="Model score:", value=J.score(cleaned_questions, answer), delta=None)
try:
st.caption("")
st.divider()
st.subheader("Predict")
with st.container():
left_column1, right_column1 = st.columns([3,1])
with left_column1:
gg = st.text_input("Enter Values Here",placeholder="Enter")
with right_column1:
bu1 = st.button("Predict")
if bu1:
prediction = Pipe.predict([gg])[0]
with st.chat_message("assistant"):
st.write(f"Model Predicted: {prediction}")
except Exception as e:
st.error("Model Not Selected Properly")
st.divider()
with st.container():
left_column, right_column = st.columns([1,1])
try:
with left_column:
st.subheader("Download Trained Model")
with right_column:
if st.button("Download"):
pass
except:
pass
st.divider()
if tenext == False:
with st.container():
st.subheader("Handle Labelled Data")
on1 = st.toggle('Activate feature',key="check2")
if on1:
left_column, right_column = st.columns(2)
with left_column:
lab = list(df.columns)
options1 = st.multiselect(
'Select only labelled columns',
lab,
key = "third"
)
if options1:
le = preprocessing.LabelEncoder()
df[options1] = df[options1].apply(le.fit_transform)
with right_column:
st.write(df)
st.divider()
#split train test
with st.container():
left_column, right_column,col3 = st.columns(3)
with left_column:
st.subheader("Training Column Names")
# Use df.columns directly as options for the multiselect widget
training_col = list(df.columns)
options2 = st.multiselect(
'Select training columns',
training_col,
key = "first"
)
# Use the selected options directly as column names
if options2:
st.write("Training Columns:")
# Display DataFrame with the selected columns
x = df[options2]
st.write(x)
with right_column:
tel = []
st.subheader("Test Column Name")
for i in training_col :
if i not in options2:
tel.append(i)
options = st.multiselect(
'Select training columns',
tel,
key="second"
)
if options:
st.write("Test Column:")
l = df[options]
y = np.ravel(l)
st.write(y)
with col3:
st.subheader("Enter Weight")
f = st.number_input(f"Select Value (default: 0.25)",value=0.25,min_value=0.1, max_value=0.9)
st.subheader("Random State")
g = st.number_input(f"Select Value (default: 3)",value=3,min_value=1, max_value=10)
st.divider()
st.subheader("Select an Algorithm")
option = st.selectbox(
'Select From DropDown',
('None','LinearRegression', 'LogisticRegression', 'DecisionTreeClassifier','DecisionTreeRegressor','RandomForestClassifier','RandomForestRegressor','SVC','Gaussian Naive Bayes'))
st.write('You selected:', option)
st.divider()
with st.container():
jo = False
try:
if f>0.1:
st.subheader("Model")
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=f,random_state=g)
if option == 'None':
pass
elif option == "DecisionTreeClassifier":
model = tree.DecisionTreeClassifier(max_depth = 5)
jo = True
elif option == "DecisionTreeRegressor":
model = tree.DecisionTreeRegressor(max_depth = 5)
jo = True
elif option == "RandomForestClassifier":
model = RandomForestClassifier(max_depth = 5)
jo = True
elif option == "RandomForestRegressor":
model = RandomForestRegressor(max_depth = 5)
jo = True
elif option == 'SVC':
model = svm.SVC()
jo = True
elif option == "Gaussian Naive Bayes":
model = GaussianNB()
jo = True
else:
model = eval(option + '()') #model name assign linear,logistic
jo = True
m = model.fit(X_train, y_train)
except:
st.error("Something went wrong We think model is not selected properly")
if jo == True:
try:
col1, col2 = st.columns(2)
col1.metric(label="Model trained successfully.", value=option)
col2.metric(label="Model score:", value=m.score(X_test, y_test), delta = None)
style_metric_cards()
st.caption("Tip: You can change the Score Using Weight")
st.divider()
st.subheader("Predict")
str1 = ", ".join(options2) #column name
st.write("Enter the following values separated by commas -->" + " " + str1)
with st.container():
left_column, right_column = st.columns([3,1])
with left_column:
g = st.text_input("Enter Values Here",placeholder="Enter")
with right_column:
if st.button("Predict"):
values = list(map(float, g.split(',')))
values_df = pd.DataFrame([values], columns=X_train.columns)
prediction = m.predict(values_df)[0]
st.write(f"Model Predicted: {prediction}")
st.divider()
with st.container():
left_column, right_column = st.columns([1,1])
with left_column:
st.subheader("Download Trained Model")
with right_column:
if st.button("Download"):
pass
except:
st.error("Something went wrong Select Parameters Correctly")
st.divider()
if __name__ == "__main__":
main()