Spaces:

singhtech
/

AutoTabML

Sleeping

App Files Files Community

singhtech commited on Jun 11, 2024

Commit

715848f

verified ·

1 Parent(s): 74387ba

Upload 3 files

Browse files

Code Modularization

Files changed (3) hide show

app.py +283 -0
autotabml_agents.py +90 -0
autotabml_tasks.py +66 -0

app.py ADDED Viewed

	@@ -0,0 +1,283 @@

+import streamlit as st
+import pandas as pd
+import os
+from crewai import Crew
+from langchain_groq import ChatGroq
+import streamlit_ace as st_ace
+import traceback
+import contextlib
+import io
+from crewai_tools import FileReadTool
+import matplotlib.pyplot as plt
+import glob
+from dotenv import load_dotenv
+from autotabml_agents import initialize_agents
+from autotabml_tasks import create_tasks
+TEMP_DIR = "temp_dir"
+OUTPUT_DIR = "Output_dir"
+# Ensure the temporary directory exists
+if not os.path.exists(TEMP_DIR):
+    os.makedirs(TEMP_DIR)
+# Ensure the Output directory exits
+if not os.path.exists(OUTPUT_DIR):
+    os.makedirs(OUTPUT_DIR)
+# Function to save uploaded file
+def save_uploaded_file(uploaded_file):
+    file_path = os.path.join(TEMP_DIR, uploaded_file.name)
+    with open(file_path, 'wb') as f:
+        f.write(uploaded_file.getbuffer())
+    return file_path
+# load the .env file
+load_dotenv()
+# Set up Groq API key
+groq_api_key = os.environ.get("GROQ_API_KEY")  # os.environ["GROQ_API_KEY"] =
+def main():
+    # Set custom CSS for UI
+    set_custom_css()
+    # Initialize session state for edited code
+    if 'edited_code' not in st.session_state:
+        st.session_state['edited_code'] = ""
+    # Initialize session state for whether the initial code is generated
+    if 'code_generated' not in st.session_state:
+        st.session_state['code_generated'] = False
+    # Header with futuristic design
+    st.markdown("""
+        <div class="header">
+            <h1>AutoTabML</h1>
+            <p>Automated Machine Learning Code Generation for Tabluar Data</p>
+        </div>
+    """, unsafe_allow_html=True)
+    # Sidebar for customization options
+    st.sidebar.title('LLM Model')
+    model = st.sidebar.selectbox(
+        'Model',
+        ["llama3-70b-8192"]
+    )
+    # Initialize LLM
+    llm = initialize_llm(model)
+    # User inputs
+    user_question = st.text_area("Describe your ML problem:", key="user_question")
+    uploaded_file = st.file_uploader("Upload a sample .csv of your data", key="uploaded_file")
+    try:
+        file_name = uploaded_file.name
+    except:
+        file_name = "dataset.csv"
+    # Initialize agents
+    agents = initialize_agents(llm,file_name)
+    # Process uploaded file
+    if uploaded_file:
+        try:
+            file_path = save_uploaded_file(uploaded_file)
+            df = pd.read_csv(uploaded_file)
+            st.write("Data successfully uploaded:")
+            st.dataframe(df.head())
+            data_upload = True
+        except Exception as e:
+            st.error(f"Error reading the file: {e}")
+            data_upload = False
+    else:
+        df = None
+        data_upload = False
+    # Process button
+    if st.button('Process'):
+        tasks = create_tasks("Process",user_question,file_name, data_upload, df, None, st.session_state['edited_code'], None, agents)
+        with st.spinner('Processing...'):
+            crew = Crew(
+                agents=list(agents.values()),
+                tasks=tasks,
+                verbose=2
+            )
+            result = crew.kickoff()
+            if result:  # Only call st_ace if code has a valid value
+                code = result.strip("```")
+                try:
+                    filt_idx = code.index("```")
+                    code = code[:filt_idx]
+                except:
+                    pass
+                st.session_state['edited_code'] = code
+                st.session_state['code_generated'] = True
+        st.session_state['edited_code'] = st_ace.st_ace(
+            value=st.session_state['edited_code'],
+            language='python',
+            theme='monokai',
+            keybinding='vscode',
+            min_lines=20,
+            max_lines=50
+        )
+    if st.session_state['code_generated']:
+        # Show options for modification, debugging, and running the code
+        suggestion = st.text_area("Suggest modifications to the generated code (optional):", key="suggestion")
+        if st.button('Modify'):
+            if st.session_state['edited_code'] and suggestion:
+                tasks = create_tasks("Modify",user_question,file_name, data_upload, df, suggestion, st.session_state['edited_code'], None, agents)
+                with st.spinner('Modifying code...'):
+                    crew = Crew(
+                        agents=list(agents.values()),
+                        tasks=tasks,
+                        verbose=2
+                    )
+                    result = crew.kickoff()
+                    if result:  # Only call st_ace if code has a valid value
+                        code = result.strip("```")
+                        try:
+                            filter_idx = code.index("```")
+                            code = code[:filter_idx]
+                        except:
+                            pass
+                        st.session_state['edited_code'] = code
+                st.write("Modified code:")
+                st.session_state['edited_code']= st_ace.st_ace(
+                    value=st.session_state['edited_code'],
+                    language='python',
+                    theme='monokai',
+                    keybinding='vscode',
+                    min_lines=20,
+                    max_lines=50
+                )
+        debugger = st.text_area("Paste error message here for debugging (optional):", key="debugger")
+        if st.button('Debug'):
+            if st.session_state['edited_code'] and debugger:
+                tasks = create_tasks("Debug",user_question,file_name, data_upload, df, None, st.session_state['edited_code'], debugger, agents)
+                with st.spinner('Debugging code...'):
+                    crew = Crew(
+                        agents=list(agents.values()),
+                        tasks=tasks,
+                        verbose=2
+                    )
+                    result = crew.kickoff()
+                    if result:  # Only call st_ace if code has a valid value
+                        code = result.strip("```")
+                        try:
+                            filter_idx = code.index("```")
+                            code = code[:filter_idx]
+                        except:
+                            pass
+                        st.session_state['edited_code'] = code
+                st.write("Debugged code:")
+                st.session_state['edited_code'] = st_ace.st_ace(
+                    value=st.session_state['edited_code'],
+                    language='python',
+                    theme='monokai',
+                    keybinding='vscode',
+                    min_lines=20,
+                    max_lines=50
+                )
+        if st.button('Run'):
+            output = io.StringIO()
+            with contextlib.redirect_stdout(output):
+                try:
+                    globals().update({'dataset': df})
+                    final_code = st.session_state["edited_code"]
+                    with st.expander("Final Code"):
+                        st.code(final_code, language='python')
+                    exec(final_code, globals())
+                    result = output.getvalue()
+                    success = True
+                except Exception as e:
+                    result = str(e)
+                    success = False
+            st.subheader('Output:')
+            st.text(result)
+            figs = [manager.canvas.figure for manager in plt._pylab_helpers.Gcf.get_all_fig_managers()]
+            if figs:
+                st.subheader('Generated Plots:')
+                for fig in figs:
+                    st.pyplot(fig)
+            if success:
+                st.success("Code executed successfully!")
+            else:
+                st.error("Code execution failed! Waiting for debugging input...")
+            # Move the generated files section to the sidebar
+            with st.sidebar:
+                st.header('Output_dir :')
+                files = glob.glob(os.path.join(OUTPUT_DIR,"/", '*'))
+                for file in files:
+                    if os.path.isfile(file):
+                        with open(file, 'rb') as f:
+                            st.download_button(label=f'Download {os.path.basename(file)}', data=f, file_name=os.path.basename(file))
+# Function to set custom CSS for futuristic UI
+def set_custom_css():
+    st.markdown("""
+        <style>
+            body {
+                background: #0e0e0e;
+                color: #e0e0e0;
+                font-family: 'Roboto', sans-serif;
+            }
+            .header {
+                background: linear-gradient(135deg, #6e3aff, #b839ff);
+                padding: 10px;
+                border-radius: 10px;
+            }
+            .header h1, .header p {
+                color: white;
+                text-align: center;
+            }
+            .stButton button {
+                background-color: #b839ff;
+                color: white;
+                border-radius: 10px;
+                font-size: 16px;
+                padding: 10px 20px;
+            }
+            .stButton button:hover {
+                background-color: #6e3aff;
+                color: #e0e0e0;
+            }
+            .spinner {
+                display: flex;
+                justify-content: center;
+                align-items: center;
+            }
+        </style>
+    """, unsafe_allow_html=True)
+# Function to initialize LLM
+def initialize_llm(model):
+    return ChatGroq(
+        temperature=0,
+        groq_api_key=groq_api_key,
+        model_name=model
+    )
+if __name__ == "__main__":
+    main()

autotabml_agents.py ADDED Viewed

	@@ -0,0 +1,90 @@

+from crewai import Agent
+from crewai_tools import FileReadTool
+# Function to initialize agents
+def initialize_agents(llm,file_name):
+    file_read_tool = FileReadTool()
+    return {
+        "Data_Reader_Agent": Agent(
+            role='Data_Reader_Agent',
+            goal="Read the uploaded dataset and provide it to other agents.",
+            backstory="Responsible for reading the uploaded dataset.",
+            verbose=True,
+            allow_delegation=False,
+            llm=llm,
+            tools=[file_read_tool]
+        ),
+        "Problem_Definition_Agent": Agent(
+            role='Problem_Definition_Agent',
+            goal="Clarify the machine learning problem the user wants to solve.",
+            backstory="Expert in defining machine learning problems.",
+            verbose=True,
+            allow_delegation=False,
+            llm=llm,
+        ),
+        "EDA_Agent": Agent(
+            role='EDA_Agent',
+            goal="Perform all possible Exploratory Data Analysis (EDA) on the data provided by the user.",
+            backstory="Specializes in conducting comprehensive EDA to understand the data characteristics, distributions, and relationships.",
+            verbose=True,
+            allow_delegation=False,
+            llm=llm,
+        ),
+        "Feature_Engineering_Agent": Agent(
+            role='Feature_Engineering_Agent',
+            goal="Perform feature engineering on the data based on the EDA results provided by the EDA agent.",
+            backstory="Expert in deriving new features, transforming existing features, and preprocessing data to prepare it for modeling.",
+            verbose=True,
+            allow_delegation=False,
+            llm=llm,
+        ),
+        "Model_Recommendation_Agent": Agent(
+            role='Model_Recommendation_Agent',
+            goal="Suggest the most suitable machine learning models.",
+            backstory="Expert in recommending machine learning algorithms.",
+            verbose=True,
+            allow_delegation=False,
+            llm=llm,
+        ),
+        "Starter_Code_Generator_Agent": Agent(
+            role='Starter_Code_Generator_Agent',
+            goal=f"Generate starter Python code for the project. Always give dataset name as 'temp_files/{file_name}",
+            backstory="Code wizard for generating starter code templates.",
+            verbose=True,
+            allow_delegation=False,
+            llm=llm,
+        ),
+        "Code_Modification_Agent": Agent(
+            role='Code_Modification_Agent',
+            goal="Modify the generated Python code based on user suggestions.",
+            backstory="Expert in adapting code according to user feedback.",
+            verbose=True,
+            allow_delegation=False,
+            llm=llm,
+        ),
+        # "Code_Runner_Agent": Agent(
+        #     role='Code_Runner_Agent',
+        #     goal="Run the generated Python code and catch any errors.",
+        #     backstory="Debugging expert.",
+        #     verbose=True,
+        #     allow_delegation=True,
+        #     llm=llm,
+        # ),
+        "Code_Debugger_Agent": Agent(
+            role='Code_Debugger_Agent',
+            goal="Debug the generated Python code.",
+            backstory="Seasoned code debugger.",
+            verbose=True,
+            allow_delegation=False,
+            llm=llm,
+        ),
+        "Compiler_Agent":Agent(
+            role = "Code_compiler",
+            goal = "Extract only the python code.",
+            backstory = "You are the compiler which extract only the python code.",
+            verbose = True,
+            allow_delegation = False,
+            llm = llm
+        )
+    }

autotabml_tasks.py ADDED Viewed

	@@ -0,0 +1,66 @@

+from crewai import Task
+# Function to create tasks based on user inputs
+def create_tasks(func_call,user_question,file_name, data_upload, df, suggestion, edited_code, debugger, agents):
+    info = df.info()
+    tasks = []
+    if(func_call == "Process"):
+        tasks.append(Task(
+                description=f"Clarify the ML problem: {user_question}",
+                agent=agents["Problem_Definition_Agent"],
+                expected_output="A clear and concise definition of the ML problem."
+            )
+            )
+        if data_upload:
+            tasks.extend([
+                Task(
+                    description=f"Evaluate the data provided by the file name . This is the data: {df}",
+                    agent=agents["EDA_Agent"],
+                    expected_output="An assessment of the EDA and preprocessing like dataset info, missing value, duplication, outliers etc. on the data provided"
+                ),
+                Task(
+                    description=f"Feature Engineering on data {df} based on EDA output: {info}",
+                    agent=agents["Feature_Engineering_Agent"],
+                    expected_output="An assessment of the Featuring Engineering and preprocessing like handling missing values, handling duplication, handling outliers, feature encoding, feature scaling etc. on the data provided"
+                )
+            ])
+        tasks.extend([
+            Task(
+                description="Suggest suitable ML models.",
+                agent=agents["Model_Recommendation_Agent"],
+                expected_output="A list of suitable ML models."
+            ),
+            Task(
+                description=f"Generate starter Python code based on feature engineering, where column names are {df.columns.tolist()}. Generate only the code without any extra text",
+                agent=agents["Starter_Code_Generator_Agent"],
+                expected_output="Starter Python code."
+            ),
+        ])
+    if(func_call == "Modify"):
+        if suggestion:
+            tasks.append(
+                Task(
+                    description=f"Modify the already generated code {edited_code} according to the suggestion: {suggestion} \n\n Do not generate entire new code.",
+                    agent=agents["Code_Modification_Agent"],
+                    expected_output="Modified code."
+                )
+            )
+    if(func_call == "Debug"):
+        if debugger:
+            tasks.append(
+                Task(
+                    description=f"Debug and fix any errors for data with column names {df.columns.tolist()} with data as {df} in the generated code: {edited_code}  \n\n According to the debugging: {debugger}. \n\n Do not generate entire new code. Just remove the error in the code by modifying only necessary parts of the code.",
+                    agent=agents["Code_Debugger_Agent"],
+                    expected_output="Debugged and successfully executed code."
+                )
+            )
+    tasks.append(
+        Task(
+            description = "Your job is to only extract python code from string",
+            agent = agents["Compiler_Agent"],
+            expected_output = "Running python code."
+        )
+    )
+    return tasks