|
|
import os |
|
|
import time |
|
|
import warnings |
|
|
from sklearn import metrics, preprocessing |
|
|
from sklearn.calibration import LabelEncoder |
|
|
import streamlit as st |
|
|
import pandas as pd |
|
|
import numpy as np |
|
|
from sklearn.model_selection import train_test_split |
|
|
from sklearn.linear_model import LinearRegression |
|
|
from sklearn.linear_model import LogisticRegression |
|
|
from sklearn.feature_extraction.text import CountVectorizer |
|
|
from sklearn.feature_extraction.text import TfidfTransformer,TfidfVectorizer |
|
|
from sklearn.pipeline import Pipeline |
|
|
from sklearn.tree import DecisionTreeClassifier |
|
|
from sklearn.metrics import confusion_matrix |
|
|
from sklearn import tree |
|
|
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier |
|
|
from sklearn.ensemble import RandomForestRegressor |
|
|
from sklearn import svm |
|
|
from sklearn.naive_bayes import GaussianNB |
|
|
from joblib import dump, load |
|
|
from matplotlib import pyplot as plt |
|
|
from sklearn.tree import plot_tree |
|
|
import sweetviz as sv |
|
|
from pathlib import Path |
|
|
import hashlib |
|
|
import google.generativeai as genai |
|
|
from sklearn.feature_extraction.text import CountVectorizer |
|
|
from sklearn.feature_extraction.text import TfidfTransformer,TfidfVectorizer |
|
|
from sklearn.pipeline import Pipeline |
|
|
from streamlit_extras.metric_cards import style_metric_cards |
|
|
from streamlit_extras.colored_header import colored_header |
|
|
|
|
|
st.set_page_config( |
|
|
page_title="Codeless-ML", |
|
|
page_icon=":📈:", |
|
|
layout="wide", |
|
|
menu_items={ |
|
|
'About': "# Under Construction" |
|
|
} |
|
|
) |
|
|
@st.experimental_fragment |
|
|
def main(): |
|
|
|
|
|
warnings.filterwarnings("ignore") |
|
|
hide_st_style = """ |
|
|
<style> |
|
|
MainMenu {visibility: hidden;} |
|
|
footer {visibility: hidden;} |
|
|
# header {visibility: hidden;} |
|
|
</style> |
|
|
""" |
|
|
st.markdown(hide_st_style, unsafe_allow_html=True) |
|
|
|
|
|
page_bg_img = ''' |
|
|
<style> |
|
|
[data-testid = "stAppViewContainer"] { |
|
|
background-image: url("https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRvIlgLZYLc2E7gKTaK1kETo-amo5l6mrWQlh7owYgWXw&s"); |
|
|
background-size: cover; |
|
|
} |
|
|
[data-testid = "stFileUploaderDropzone"]{ |
|
|
# background-image: url("https://cdn.dribbble.com/users/1076456/screenshots/17355064/media/1486af881edbacd6db74d34b8e8190f4.png?resize=1000x750&vertical=center"); |
|
|
background-size: cover; |
|
|
} |
|
|
[data-testid = "stHeader"] { |
|
|
background-color : rgba(0,0,0,0); |
|
|
} |
|
|
[data-testid = "stToolbar"] { |
|
|
right: 2rem; |
|
|
} |
|
|
</style> |
|
|
''' |
|
|
|
|
|
st.markdown(page_bg_img, unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
custom_css = ''' |
|
|
<style> |
|
|
[data-testid = "stDataFrameResizable"] { |
|
|
width: 1800px !important; |
|
|
height: 320px !important; |
|
|
} |
|
|
</style> |
|
|
''' |
|
|
|
|
|
st.markdown(custom_css, unsafe_allow_html=True) |
|
|
|
|
|
st.title("Codeless Machine Learning..") |
|
|
colored_header( |
|
|
label="Train Your Model Online", |
|
|
description="We currently support csv file format only", |
|
|
color_name="violet-70", |
|
|
) |
|
|
|
|
|
st.text("Add Dataset in CSV format only") |
|
|
|
|
|
st.divider() |
|
|
|
|
|
upload_dir = "./uploads" |
|
|
if not os.path.exists(upload_dir): |
|
|
os.makedirs(upload_dir) |
|
|
|
|
|
try: |
|
|
uploaded_file = st.file_uploader("Choose a CSV file",type=['csv'],accept_multiple_files=False) |
|
|
except: |
|
|
st.warning("Only Csv format supported") |
|
|
|
|
|
left_column, right_column = st.columns([0.4,1]) |
|
|
with left_column: |
|
|
st.text("Use Example Dataset") |
|
|
with right_column: |
|
|
on0 = st.toggle('Yes!! (drug200.csv)',key="check0") |
|
|
onn1 = st.toggle('Yes!! (mentalhealth.csv)',key="checkk1") |
|
|
|
|
|
|
|
|
st.divider() |
|
|
df=None |
|
|
if on0: |
|
|
uploaded_file = "./drug200.csv" |
|
|
if os.path.exists(uploaded_file): |
|
|
df = pd.read_csv(uploaded_file) |
|
|
st.subheader("CSV Data") |
|
|
st.write(df) |
|
|
else: |
|
|
st.error("File 'drug200.csv' does not exist.") |
|
|
elif onn1: |
|
|
uploaded_file = "./mentalhealth.csv" |
|
|
if os.path.exists(uploaded_file): |
|
|
df = pd.read_csv(uploaded_file) |
|
|
st.subheader("CSV Data") |
|
|
st.write(df) |
|
|
else: |
|
|
st.error("File 'drug200.csv' does not exist.") |
|
|
else: |
|
|
if uploaded_file is not None: |
|
|
file_path = os.path.join(upload_dir, "new.csv") |
|
|
|
|
|
if os.path.exists(file_path): |
|
|
os.remove(file_path) |
|
|
|
|
|
with open(file_path, "wb") as f: |
|
|
f.write(uploaded_file.getbuffer()) |
|
|
|
|
|
df = pd.read_csv(file_path) |
|
|
|
|
|
newfile = os.path.join(upload_dir, "half.csv") |
|
|
if os.path.exists(newfile): |
|
|
os.remove(newfile) |
|
|
|
|
|
if len(df) > 1000: |
|
|
rows_to_keep = len(df) // 12 |
|
|
elif len(df) > 500 or len(df)<1000: |
|
|
rows_to_keep = len(df) // 8 |
|
|
elif len(df) < 500 or len(df) > 100: |
|
|
rows_to_keep = len(df) // 5 |
|
|
else: |
|
|
rows_to_keep = len(df) // 2 |
|
|
|
|
|
df_half = df.iloc[:rows_to_keep] |
|
|
df_half.to_csv(newfile, index=False) |
|
|
|
|
|
st.subheader("CSV Data") |
|
|
st.write(df) |
|
|
|
|
|
if uploaded_file is not None: |
|
|
st.divider() |
|
|
with st.container(): |
|
|
st.subheader("Get Ai Suggestion") |
|
|
left_column, right_column = st.columns([0.4,1]) |
|
|
with left_column: |
|
|
st.write("This will take few seconds :->") |
|
|
with right_column: |
|
|
agree23 = st.button('Get Suggestion',key="check23") |
|
|
|
|
|
if agree23: |
|
|
try: |
|
|
api_key1 = os.getenv("OPENAI_API_KEY") |
|
|
genai.configure(api_key=os.environ["OPENAI_API_KEY"]) |
|
|
|
|
|
def upload_to_gemini(path, mime_type=None): |
|
|
return genai.upload_file(path, mime_type=mime_type) |
|
|
|
|
|
def wait_for_files_active(files): |
|
|
for file in files: |
|
|
while file.state.name == "PROCESSING": |
|
|
time.sleep(10) |
|
|
file = genai.get_file(file.name) |
|
|
if file.state.name != "ACTIVE": |
|
|
raise Exception(f"File {file.name} failed to process") |
|
|
|
|
|
|
|
|
generation_config = { |
|
|
"temperature": 1, |
|
|
"top_p": 0.95, |
|
|
"top_k": 64, |
|
|
"max_output_tokens": 5000, |
|
|
"response_mime_type": "text/plain", |
|
|
} |
|
|
|
|
|
model = genai.GenerativeModel( |
|
|
model_name="gemini-1.5-flash", |
|
|
generation_config=generation_config, |
|
|
) |
|
|
if on0: |
|
|
files = [upload_to_gemini(uploaded_file, mime_type="text/csv")] |
|
|
wait_for_files_active(files) |
|
|
elif onn1: |
|
|
files = [upload_to_gemini(uploaded_file, mime_type="text/csv")] |
|
|
wait_for_files_active(files) |
|
|
else: |
|
|
csv_path = "./uploads/half.csv" |
|
|
if os.path.exists(file_path): |
|
|
files = [upload_to_gemini(csv_path, mime_type="text/csv")] |
|
|
wait_for_files_active(files) |
|
|
|
|
|
chat_session = model.start_chat( |
|
|
history=[ |
|
|
{ |
|
|
"role": "user", |
|
|
"parts": [files[0]], |
|
|
}, |
|
|
] |
|
|
) |
|
|
response = chat_session.send_message("Summarize the dataset and suggest which among these machine learning model to use.(LinearRegression,LogisticRegression,DecisionTreeClassifier,DecisionTreeRegressor,RandomForestClassifier,RandomForestRegressor,SVC,Gaussian Naive Bayes)") |
|
|
with st.chat_message("assistant"): |
|
|
st.write(response.text) |
|
|
except Exception as e: |
|
|
st.error("Oops, something went wrong here") |
|
|
|
|
|
st.divider() |
|
|
|
|
|
with st.container(): |
|
|
|
|
|
st.subheader("Visualise Data") |
|
|
left_column, right_column = st.columns([0.4,1]) |
|
|
with left_column: |
|
|
st.write("This will take few seconds :->") |
|
|
with right_column: |
|
|
agree22 = st.button('Visualise',key="check22") |
|
|
|
|
|
if agree22: |
|
|
try: |
|
|
report_file = 'report.html' |
|
|
|
|
|
if os.path.exists(report_file): |
|
|
os.remove(report_file) |
|
|
|
|
|
report = sv.analyze(df) |
|
|
report.show_html(report_file, layout='vertical', open_browser=True) |
|
|
|
|
|
st.write("Exploratory Data Analysis with Sweetviz") |
|
|
with open(report_file, 'r', encoding='utf-8') as HtmlFile: |
|
|
source_code = HtmlFile.read() |
|
|
st.components.v1.html(source_code, height=1000, scrolling=True) |
|
|
except: |
|
|
st.error("Oops Something went wrong here") |
|
|
|
|
|
st.divider() |
|
|
|
|
|
with st.container(): |
|
|
left_column, right_column = st.columns(2) |
|
|
with left_column: |
|
|
st.subheader("Data Description:") |
|
|
st.write(df.describe()) |
|
|
|
|
|
with right_column: |
|
|
st.subheader("Null Values:") |
|
|
null_values = df.isnull().sum() |
|
|
st.write(null_values) |
|
|
|
|
|
|
|
|
st.divider() |
|
|
with st.container(): |
|
|
st.subheader("Handle Null Values if you have") |
|
|
on = st.toggle('Activate feature',key="check1") |
|
|
if on: |
|
|
left_column, right_column,col3 = st.columns(3) |
|
|
with left_column: |
|
|
|
|
|
genre = st.selectbox( |
|
|
"Select Options To Handle Null Values", |
|
|
["None","dropna","fillna","ffill","bfill"] |
|
|
) |
|
|
if genre == "None": |
|
|
None |
|
|
if genre == "dropna": |
|
|
df = df.dropna() |
|
|
st.write(df) |
|
|
if genre == "fillna": |
|
|
df = df.fillna(df.mean()) |
|
|
st.write(df) |
|
|
if genre == "ffill": |
|
|
df = df.ffill() |
|
|
st.write(df) |
|
|
if genre == "bfill": |
|
|
df = df.bfill() |
|
|
st.write(df) |
|
|
|
|
|
with right_column: |
|
|
st.subheader("Null Values Now:") |
|
|
null_values = df.isnull().sum() |
|
|
st.write(null_values) |
|
|
with col3: |
|
|
st.subheader("Present Columns:") |
|
|
st.dataframe(df.columns) |
|
|
|
|
|
st.divider() |
|
|
|
|
|
tenext = False |
|
|
with st.container(): |
|
|
st.subheader("Turn This On If You Have Fully Text Dataset") |
|
|
st.write("To Create a Simple Model choose only 1 Row") |
|
|
te = st.toggle('Activate feature',key="te1") |
|
|
if te: |
|
|
tenext = True |
|
|
def clean_text(series): |
|
|
return series.str.lower().str.replace('[^\w\s]', '', regex=True) |
|
|
|
|
|
with st.container(): |
|
|
left_column, right_column,col3 = st.columns(3) |
|
|
|
|
|
with left_column: |
|
|
st.subheader("Training Column 1 E.g. Questions:") |
|
|
training_col1 = list(df.columns) |
|
|
options2 = st.multiselect( |
|
|
'Select training column (only one)', |
|
|
training_col1, |
|
|
key="tefirst" |
|
|
) |
|
|
|
|
|
if options2: |
|
|
st.write("Training Columns:") |
|
|
questions = df[options2] |
|
|
|
|
|
cleaned_questions = clean_text(questions.astype(str).agg(' '.join, axis=1)).values.flatten() |
|
|
st.write(cleaned_questions) |
|
|
|
|
|
with right_column: |
|
|
st.subheader("Training Column 2 E.g. Answers") |
|
|
tel1 = [col for col in training_col1 if col not in options2] |
|
|
|
|
|
options = st.multiselect( |
|
|
'Select training column (only one)', |
|
|
tel1, |
|
|
key="te2" |
|
|
) |
|
|
|
|
|
if options: |
|
|
st.write("Test Column:") |
|
|
answers = df[options] |
|
|
|
|
|
cleaned_answers = clean_text(answers.astype(str).agg(' '.join, axis=1)).values.flatten() |
|
|
answer = cleaned_answers |
|
|
st.write(answer) |
|
|
with col3: |
|
|
st.subheader("Enter n_estimators") |
|
|
|
|
|
n_estimators = st.number_input(f"Select Value (default: 50)",value=50,min_value=10, max_value=500) |
|
|
|
|
|
st.subheader("Random State") |
|
|
randomstate = st.number_input(f"Select Value (default: 10)",value=10,min_value=1, max_value=50) |
|
|
|
|
|
st.divider() |
|
|
|
|
|
with st.container(): |
|
|
st.subheader("Select an Algorithm(Training Can Take Time!!)") |
|
|
try: |
|
|
options1 = st.selectbox( |
|
|
'Select From DropDown', |
|
|
('None', 'DecisionTreeClassifier','RandomForestClassifier','GradientBoostingClassifier') |
|
|
) |
|
|
|
|
|
st.write('You selected:', options1) |
|
|
st.divider() |
|
|
|
|
|
|
|
|
model = None |
|
|
type2 = None |
|
|
jo = False |
|
|
max_depth = 3 |
|
|
|
|
|
if options1 == 'None': |
|
|
pass |
|
|
elif options1 == "DecisionTreeClassifier": |
|
|
model = DecisionTreeClassifier(max_depth=5,random_state=randomstate) |
|
|
type2 = 'classifier' |
|
|
jo = True |
|
|
elif options1 == "RandomForestClassifier": |
|
|
model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=randomstate) |
|
|
type2 = 'classifier' |
|
|
jo = True |
|
|
elif options1 == "GradientBoostingClassifier": |
|
|
model = GradientBoostingClassifier(n_estimators=n_estimators, learning_rate=0.1, max_depth=max_depth, random_state=randomstate) |
|
|
type2 = 'gbc' |
|
|
Pipe = Pipeline([ |
|
|
('bow', CountVectorizer()), |
|
|
('tfidf', TfidfTransformer()), |
|
|
(type2, model) |
|
|
]) |
|
|
J= Pipe.fit(cleaned_questions, answer) |
|
|
except: |
|
|
pass |
|
|
|
|
|
|
|
|
if jo == True: |
|
|
col1, col2 = st.columns(2) |
|
|
col1.metric(label="Model trained successfully.", value=options1) |
|
|
col2.metric(label="Model score:", value=J.score(cleaned_questions, answer), delta=None) |
|
|
try: |
|
|
st.caption("") |
|
|
st.divider() |
|
|
st.subheader("Predict") |
|
|
with st.container(): |
|
|
left_column1, right_column1 = st.columns([3,1]) |
|
|
with left_column1: |
|
|
gg = st.text_input("Enter Values Here",placeholder="Enter") |
|
|
with right_column1: |
|
|
bu1 = st.button("Predict") |
|
|
if bu1: |
|
|
prediction = Pipe.predict([gg])[0] |
|
|
with st.chat_message("assistant"): |
|
|
st.write(f"Model Predicted: {prediction}") |
|
|
except Exception as e: |
|
|
st.error("Model Not Selected Properly") |
|
|
|
|
|
st.divider() |
|
|
|
|
|
with st.container(): |
|
|
left_column, right_column = st.columns([1,1]) |
|
|
try: |
|
|
with left_column: |
|
|
st.subheader("Download Trained Model") |
|
|
with right_column: |
|
|
if st.button("Download"): |
|
|
pass |
|
|
except: |
|
|
pass |
|
|
st.divider() |
|
|
|
|
|
|
|
|
if tenext == False: |
|
|
|
|
|
with st.container(): |
|
|
st.subheader("Handle Labelled Data") |
|
|
on1 = st.toggle('Activate feature',key="check2") |
|
|
if on1: |
|
|
left_column, right_column = st.columns(2) |
|
|
with left_column: |
|
|
lab = list(df.columns) |
|
|
options1 = st.multiselect( |
|
|
'Select only labelled columns', |
|
|
lab, |
|
|
key = "third" |
|
|
) |
|
|
if options1: |
|
|
le = preprocessing.LabelEncoder() |
|
|
df[options1] = df[options1].apply(le.fit_transform) |
|
|
with right_column: |
|
|
st.write(df) |
|
|
st.divider() |
|
|
|
|
|
with st.container(): |
|
|
left_column, right_column,col3 = st.columns(3) |
|
|
with left_column: |
|
|
st.subheader("Training Column Names") |
|
|
|
|
|
training_col = list(df.columns) |
|
|
options2 = st.multiselect( |
|
|
'Select training columns', |
|
|
training_col, |
|
|
key = "first" |
|
|
) |
|
|
|
|
|
|
|
|
if options2: |
|
|
st.write("Training Columns:") |
|
|
|
|
|
x = df[options2] |
|
|
st.write(x) |
|
|
|
|
|
with right_column: |
|
|
tel = [] |
|
|
st.subheader("Test Column Name") |
|
|
for i in training_col : |
|
|
if i not in options2: |
|
|
tel.append(i) |
|
|
options = st.multiselect( |
|
|
'Select training columns', |
|
|
tel, |
|
|
key="second" |
|
|
) |
|
|
if options: |
|
|
st.write("Test Column:") |
|
|
l = df[options] |
|
|
y = np.ravel(l) |
|
|
st.write(y) |
|
|
|
|
|
with col3: |
|
|
st.subheader("Enter Weight") |
|
|
|
|
|
f = st.number_input(f"Select Value (default: 0.25)",value=0.25,min_value=0.1, max_value=0.9) |
|
|
|
|
|
st.subheader("Random State") |
|
|
g = st.number_input(f"Select Value (default: 3)",value=3,min_value=1, max_value=10) |
|
|
|
|
|
st.divider() |
|
|
st.subheader("Select an Algorithm") |
|
|
option = st.selectbox( |
|
|
'Select From DropDown', |
|
|
('None','LinearRegression', 'LogisticRegression', 'DecisionTreeClassifier','DecisionTreeRegressor','RandomForestClassifier','RandomForestRegressor','SVC','Gaussian Naive Bayes')) |
|
|
|
|
|
st.write('You selected:', option) |
|
|
|
|
|
st.divider() |
|
|
|
|
|
with st.container(): |
|
|
jo = False |
|
|
try: |
|
|
if f>0.1: |
|
|
st.subheader("Model") |
|
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=f,random_state=g) |
|
|
|
|
|
if option == 'None': |
|
|
pass |
|
|
elif option == "DecisionTreeClassifier": |
|
|
model = tree.DecisionTreeClassifier(max_depth = 5) |
|
|
jo = True |
|
|
elif option == "DecisionTreeRegressor": |
|
|
model = tree.DecisionTreeRegressor(max_depth = 5) |
|
|
jo = True |
|
|
elif option == "RandomForestClassifier": |
|
|
model = RandomForestClassifier(max_depth = 5) |
|
|
jo = True |
|
|
elif option == "RandomForestRegressor": |
|
|
model = RandomForestRegressor(max_depth = 5) |
|
|
jo = True |
|
|
elif option == 'SVC': |
|
|
model = svm.SVC() |
|
|
jo = True |
|
|
elif option == "Gaussian Naive Bayes": |
|
|
model = GaussianNB() |
|
|
jo = True |
|
|
else: |
|
|
model = eval(option + '()') |
|
|
jo = True |
|
|
|
|
|
m = model.fit(X_train, y_train) |
|
|
except: |
|
|
st.error("Something went wrong We think model is not selected properly") |
|
|
|
|
|
if jo == True: |
|
|
try: |
|
|
col1, col2 = st.columns(2) |
|
|
col1.metric(label="Model trained successfully.", value=option) |
|
|
col2.metric(label="Model score:", value=m.score(X_test, y_test), delta = None) |
|
|
style_metric_cards() |
|
|
st.caption("Tip: You can change the Score Using Weight") |
|
|
|
|
|
st.divider() |
|
|
|
|
|
st.subheader("Predict") |
|
|
str1 = ", ".join(options2) |
|
|
st.write("Enter the following values separated by commas -->" + " " + str1) |
|
|
with st.container(): |
|
|
left_column, right_column = st.columns([3,1]) |
|
|
with left_column: |
|
|
g = st.text_input("Enter Values Here",placeholder="Enter") |
|
|
with right_column: |
|
|
if st.button("Predict"): |
|
|
values = list(map(float, g.split(','))) |
|
|
values_df = pd.DataFrame([values], columns=X_train.columns) |
|
|
prediction = m.predict(values_df)[0] |
|
|
st.write(f"Model Predicted: {prediction}") |
|
|
|
|
|
st.divider() |
|
|
with st.container(): |
|
|
left_column, right_column = st.columns([1,1]) |
|
|
with left_column: |
|
|
st.subheader("Download Trained Model") |
|
|
with right_column: |
|
|
if st.button("Download"): |
|
|
pass |
|
|
|
|
|
except: |
|
|
st.error("Something went wrong Select Parameters Correctly") |
|
|
|
|
|
st.divider() |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |