import os import time import warnings from sklearn import metrics, preprocessing from sklearn.calibration import LabelEncoder import streamlit as st import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression from sklearn.linear_model import LogisticRegression from sklearn.feature_extraction.text import CountVectorizer from sklearn.feature_extraction.text import TfidfTransformer,TfidfVectorizer from sklearn.pipeline import Pipeline from sklearn.tree import DecisionTreeClassifier from sklearn.metrics import confusion_matrix from sklearn import tree from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier from sklearn.ensemble import RandomForestRegressor from sklearn import svm from sklearn.naive_bayes import GaussianNB from joblib import dump, load from matplotlib import pyplot as plt from sklearn.tree import plot_tree import sweetviz as sv from pathlib import Path import hashlib import google.generativeai as genai from sklearn.feature_extraction.text import CountVectorizer from sklearn.feature_extraction.text import TfidfTransformer,TfidfVectorizer from sklearn.pipeline import Pipeline from streamlit_extras.metric_cards import style_metric_cards from streamlit_extras.colored_header import colored_header st.set_page_config( page_title="Codeless-ML", page_icon=":📈:", layout="wide", menu_items={ 'About': "# Under Construction" } ) @st.experimental_fragment def main(): # Set up Streamlit page warnings.filterwarnings("ignore") hide_st_style = """ """ st.markdown(hide_st_style, unsafe_allow_html=True) page_bg_img = ''' ''' st.markdown(page_bg_img, unsafe_allow_html=True) custom_css = ''' ''' st.markdown(custom_css, unsafe_allow_html=True) st.title("Codeless Machine Learning..") colored_header( label="Train Your Model Online", description="We currently support csv file format only", color_name="violet-70", ) st.text("Add Dataset in CSV format only") st.divider() upload_dir = "./uploads" if not os.path.exists(upload_dir): os.makedirs(upload_dir) try: uploaded_file = st.file_uploader("Choose a CSV file",type=['csv'],accept_multiple_files=False) except: st.warning("Only Csv format supported") left_column, right_column = st.columns([0.4,1]) with left_column: st.text("Use Example Dataset") with right_column: on0 = st.toggle('Yes!! (drug200.csv)',key="check0") onn1 = st.toggle('Yes!! (mentalhealth.csv)',key="checkk1") # if on0: # uploaded_file = ".\drug200.csv" st.divider() df=None if on0: uploaded_file = "./drug200.csv" # Adjust the path as necessary if os.path.exists(uploaded_file): # Check if the file exists df = pd.read_csv(uploaded_file) st.subheader("CSV Data") st.write(df) else: st.error("File 'drug200.csv' does not exist.") elif onn1: uploaded_file = "./mentalhealth.csv" # Adjust the path as necessary if os.path.exists(uploaded_file): # Check if the file exists df = pd.read_csv(uploaded_file) st.subheader("CSV Data") st.write(df) else: st.error("File 'drug200.csv' does not exist.") else: if uploaded_file is not None: file_path = os.path.join(upload_dir, "new.csv") if os.path.exists(file_path): os.remove(file_path) with open(file_path, "wb") as f: f.write(uploaded_file.getbuffer()) df = pd.read_csv(file_path) newfile = os.path.join(upload_dir, "half.csv") if os.path.exists(newfile): os.remove(newfile) if len(df) > 1000: rows_to_keep = len(df) // 12 elif len(df) > 500 or len(df)<1000: rows_to_keep = len(df) // 8 elif len(df) < 500 or len(df) > 100: rows_to_keep = len(df) // 5 else: rows_to_keep = len(df) // 2 df_half = df.iloc[:rows_to_keep] df_half.to_csv(newfile, index=False) st.subheader("CSV Data") st.write(df) if uploaded_file is not None: st.divider() with st.container(): st.subheader("Get Ai Suggestion") left_column, right_column = st.columns([0.4,1]) with left_column: st.write("This will take few seconds :->") with right_column: agree23 = st.button('Get Suggestion',key="check23") if agree23: try: api_key1 = os.getenv("OPENAI_API_KEY") genai.configure(api_key=os.environ["OPENAI_API_KEY"]) def upload_to_gemini(path, mime_type=None): return genai.upload_file(path, mime_type=mime_type) def wait_for_files_active(files): for file in files: while file.state.name == "PROCESSING": time.sleep(10) file = genai.get_file(file.name) if file.state.name != "ACTIVE": raise Exception(f"File {file.name} failed to process") # Generation configuration generation_config = { "temperature": 1, "top_p": 0.95, "top_k": 64, "max_output_tokens": 5000, "response_mime_type": "text/plain", } model = genai.GenerativeModel( model_name="gemini-1.5-flash", generation_config=generation_config, ) if on0: files = [upload_to_gemini(uploaded_file, mime_type="text/csv")] wait_for_files_active(files) elif onn1: files = [upload_to_gemini(uploaded_file, mime_type="text/csv")] wait_for_files_active(files) else: csv_path = "./uploads/half.csv" if os.path.exists(file_path): files = [upload_to_gemini(csv_path, mime_type="text/csv")] wait_for_files_active(files) chat_session = model.start_chat( history=[ { "role": "user", "parts": [files[0]], }, ] ) response = chat_session.send_message("Summarize the dataset and suggest which among these machine learning model to use.(LinearRegression,LogisticRegression,DecisionTreeClassifier,DecisionTreeRegressor,RandomForestClassifier,RandomForestRegressor,SVC,Gaussian Naive Bayes)") with st.chat_message("assistant"): st.write(response.text) except Exception as e: st.error("Oops, something went wrong here") st.divider() with st.container(): st.subheader("Visualise Data") left_column, right_column = st.columns([0.4,1]) with left_column: st.write("This will take few seconds :->") with right_column: agree22 = st.button('Visualise',key="check22") if agree22: try: report_file = 'report.html' if os.path.exists(report_file): os.remove(report_file) report = sv.analyze(df) report.show_html(report_file, layout='vertical', open_browser=True) st.write("Exploratory Data Analysis with Sweetviz") with open(report_file, 'r', encoding='utf-8') as HtmlFile: source_code = HtmlFile.read() st.components.v1.html(source_code, height=1000, scrolling=True) except: st.error("Oops Something went wrong here") st.divider() with st.container(): left_column, right_column = st.columns(2) with left_column: st.subheader("Data Description:") st.write(df.describe()) with right_column: st.subheader("Null Values:") null_values = df.isnull().sum() st.write(null_values) st.divider() with st.container(): st.subheader("Handle Null Values if you have") on = st.toggle('Activate feature',key="check1") if on: left_column, right_column,col3 = st.columns(3) with left_column: genre = st.selectbox( "Select Options To Handle Null Values", ["None","dropna","fillna","ffill","bfill"] ) if genre == "None": None if genre == "dropna": df = df.dropna() st.write(df) if genre == "fillna": df = df.fillna(df.mean()) st.write(df) if genre == "ffill": df = df.ffill() st.write(df) if genre == "bfill": df = df.bfill() st.write(df) with right_column: st.subheader("Null Values Now:") null_values = df.isnull().sum() st.write(null_values) with col3: st.subheader("Present Columns:") st.dataframe(df.columns) st.divider() tenext = False with st.container(): st.subheader("Turn This On If You Have Fully Text Dataset") st.write("To Create a Simple Model choose only 1 Row") te = st.toggle('Activate feature',key="te1") if te: tenext = True def clean_text(series): return series.str.lower().str.replace('[^\w\s]', '', regex=True) with st.container(): left_column, right_column,col3 = st.columns(3) with left_column: st.subheader("Training Column 1 E.g. Questions:") training_col1 = list(df.columns) options2 = st.multiselect( 'Select training column (only one)', training_col1, key="tefirst" ) if options2: st.write("Training Columns:") questions = df[options2] # Clean the questions cleaned_questions = clean_text(questions.astype(str).agg(' '.join, axis=1)).values.flatten() st.write(cleaned_questions) with right_column: st.subheader("Training Column 2 E.g. Answers") tel1 = [col for col in training_col1 if col not in options2] options = st.multiselect( 'Select training column (only one)', tel1, key="te2" ) if options: st.write("Test Column:") answers = df[options] # Clean the answers cleaned_answers = clean_text(answers.astype(str).agg(' '.join, axis=1)).values.flatten() answer = cleaned_answers # Already flattened st.write(answer) with col3: st.subheader("Enter n_estimators") n_estimators = st.number_input(f"Select Value (default: 50)",value=50,min_value=10, max_value=500) st.subheader("Random State") randomstate = st.number_input(f"Select Value (default: 10)",value=10,min_value=1, max_value=50) st.divider() with st.container(): st.subheader("Select an Algorithm(Training Can Take Time!!)") try: options1 = st.selectbox( 'Select From DropDown', ('None', 'DecisionTreeClassifier','RandomForestClassifier','GradientBoostingClassifier') ) st.write('You selected:', options1) st.divider() # Initialize variables model = None type2 = None jo = False max_depth = 3 if options1 == 'None': pass elif options1 == "DecisionTreeClassifier": model = DecisionTreeClassifier(max_depth=5,random_state=randomstate) type2 = 'classifier' jo = True elif options1 == "RandomForestClassifier": model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=randomstate) type2 = 'classifier' jo = True elif options1 == "GradientBoostingClassifier": model = GradientBoostingClassifier(n_estimators=n_estimators, learning_rate=0.1, max_depth=max_depth, random_state=randomstate) type2 = 'gbc' Pipe = Pipeline([ ('bow', CountVectorizer()), ('tfidf', TfidfTransformer()), (type2, model) ]) J= Pipe.fit(cleaned_questions, answer) except: pass if jo == True: col1, col2 = st.columns(2) col1.metric(label="Model trained successfully.", value=options1) col2.metric(label="Model score:", value=J.score(cleaned_questions, answer), delta=None) try: st.caption("") st.divider() st.subheader("Predict") with st.container(): left_column1, right_column1 = st.columns([3,1]) with left_column1: gg = st.text_input("Enter Values Here",placeholder="Enter") with right_column1: bu1 = st.button("Predict") if bu1: prediction = Pipe.predict([gg])[0] with st.chat_message("assistant"): st.write(f"Model Predicted: {prediction}") except Exception as e: st.error("Model Not Selected Properly") st.divider() with st.container(): left_column, right_column = st.columns([1,1]) try: with left_column: st.subheader("Download Trained Model") with right_column: if st.button("Download"): pass except: pass st.divider() if tenext == False: with st.container(): st.subheader("Handle Labelled Data") on1 = st.toggle('Activate feature',key="check2") if on1: left_column, right_column = st.columns(2) with left_column: lab = list(df.columns) options1 = st.multiselect( 'Select only labelled columns', lab, key = "third" ) if options1: le = preprocessing.LabelEncoder() df[options1] = df[options1].apply(le.fit_transform) with right_column: st.write(df) st.divider() #split train test with st.container(): left_column, right_column,col3 = st.columns(3) with left_column: st.subheader("Training Column Names") # Use df.columns directly as options for the multiselect widget training_col = list(df.columns) options2 = st.multiselect( 'Select training columns', training_col, key = "first" ) # Use the selected options directly as column names if options2: st.write("Training Columns:") # Display DataFrame with the selected columns x = df[options2] st.write(x) with right_column: tel = [] st.subheader("Test Column Name") for i in training_col : if i not in options2: tel.append(i) options = st.multiselect( 'Select training columns', tel, key="second" ) if options: st.write("Test Column:") l = df[options] y = np.ravel(l) st.write(y) with col3: st.subheader("Enter Weight") f = st.number_input(f"Select Value (default: 0.25)",value=0.25,min_value=0.1, max_value=0.9) st.subheader("Random State") g = st.number_input(f"Select Value (default: 3)",value=3,min_value=1, max_value=10) st.divider() st.subheader("Select an Algorithm") option = st.selectbox( 'Select From DropDown', ('None','LinearRegression', 'LogisticRegression', 'DecisionTreeClassifier','DecisionTreeRegressor','RandomForestClassifier','RandomForestRegressor','SVC','Gaussian Naive Bayes')) st.write('You selected:', option) st.divider() with st.container(): jo = False try: if f>0.1: st.subheader("Model") X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=f,random_state=g) if option == 'None': pass elif option == "DecisionTreeClassifier": model = tree.DecisionTreeClassifier(max_depth = 5) jo = True elif option == "DecisionTreeRegressor": model = tree.DecisionTreeRegressor(max_depth = 5) jo = True elif option == "RandomForestClassifier": model = RandomForestClassifier(max_depth = 5) jo = True elif option == "RandomForestRegressor": model = RandomForestRegressor(max_depth = 5) jo = True elif option == 'SVC': model = svm.SVC() jo = True elif option == "Gaussian Naive Bayes": model = GaussianNB() jo = True else: model = eval(option + '()') #model name assign linear,logistic jo = True m = model.fit(X_train, y_train) except: st.error("Something went wrong We think model is not selected properly") if jo == True: try: col1, col2 = st.columns(2) col1.metric(label="Model trained successfully.", value=option) col2.metric(label="Model score:", value=m.score(X_test, y_test), delta = None) style_metric_cards() st.caption("Tip: You can change the Score Using Weight") st.divider() st.subheader("Predict") str1 = ", ".join(options2) #column name st.write("Enter the following values separated by commas -->" + " " + str1) with st.container(): left_column, right_column = st.columns([3,1]) with left_column: g = st.text_input("Enter Values Here",placeholder="Enter") with right_column: if st.button("Predict"): values = list(map(float, g.split(','))) values_df = pd.DataFrame([values], columns=X_train.columns) prediction = m.predict(values_df)[0] st.write(f"Model Predicted: {prediction}") st.divider() with st.container(): left_column, right_column = st.columns([1,1]) with left_column: st.subheader("Download Trained Model") with right_column: if st.button("Download"): pass except: st.error("Something went wrong Select Parameters Correctly") st.divider() if __name__ == "__main__": main()