Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from langchain.prompts import PromptTemplate, PipelinePromptTemplate | |
| from langchain_community.callbacks import StreamlitCallbackHandler | |
| first_look_prompt = ''' | |
| {salutation}, You need to explore the dataframe in a few indicated steps below. Please indicate clearly what is the steps being done. | |
| 1. Data Overview: | |
| 1.1. Show first five rows of the data | |
| 1.2. Show the columns name | |
| 1.3. Show the missing values and duplicated for each column | |
| 1.4. Show Data summary: df.describe() | |
| 1.5. Calculate correlation in the data | |
| 1.6. Identify potential outliers | |
| 1.7. Identify potential new features to include | |
| ''' | |
| first_look_template = PromptTemplate.from_template(first_look_prompt) | |
| def text_runner(_agent, df, text): | |
| st.write(text) | |
| st.write(_agent.run(text)) | |
| def function_runner(_agent, text, function): | |
| st.write(text) | |
| st.write(function) | |
| def first_look_function(df, _agent): | |
| st.write('**Data Overview**') | |
| text_runner(_agent, df, "Show columns name") | |
| text_runner(_agent, df, "Show the missing values and duplicated for each column") | |
| function_runner(_agent, "Show data summary", df.describe()) | |
| text_runner(_agent, df, "Identify potential outliers") | |
| text_runner(_agent, df, "Identify potential new features to include") | |
| return None | |
| sb_template = PromptTemplate.from_template( | |
| "Output simple one liner steps for: {question}" | |
| ) | |
| eda_template = ''' | |
| {intro} | |
| {do_not_list} | |
| {dataframe_description} | |
| ''' | |
| eda_prompt = PromptTemplate.from_template(eda_template) | |
| intro_eda_template = ''' | |
| Give me step by step idea for an EDA provided that this is the details of the dataframe. | |
| The answer should be in bullet form, each step should be less than 5 words. | |
| Example format of the list (start with '-', ends with '.'): | |
| - Identify missing values. | |
| ''' | |
| do_not_eda_template = ''' | |
| - Do not show backend work such as import libraries, load dataframe. | |
| - Do not provide the answer to the EDA, i.e. x columns, y rows. | |
| - Do not provide any suggestion related to visualization. | |
| - Provide not more than 8 concrete/ not repetitive steps. | |
| - Do not show Feature Engineering steps | |
| - Do not generate something that we couldn't answer based on the existing dataframe, i.e. corr values when there is no numerical columns in the dataframe | |
| ''' | |
| dataframe_description_template = ''' | |
| Here is the details of the dataframe: {dataframe_details} | |
| ''' | |
| intro_eda_prompt = PromptTemplate.from_template(intro_eda_template) | |
| do_not_eda_prompt = PromptTemplate.from_template(do_not_eda_template) | |
| dataframe_description_eda_prompt = PromptTemplate.from_template(dataframe_description_template) | |
| input_eda_prompts = [ | |
| ("intro", intro_eda_prompt), | |
| ("do_not_list", do_not_eda_prompt), | |
| ("dataframe_description", dataframe_description_eda_prompt), | |
| ] | |
| filled_eda_prompt = PipelinePromptTemplate( | |
| final_prompt=eda_prompt, pipeline_prompts=input_eda_prompts | |
| ) | |
| def eda_selection_generator(_eda_chain, _df_details): | |
| return _eda_chain.invoke({'dataframe_details': _df_details})['text'] | |
| def individual_eda(_pd_agent, _eda_selected, peda_click_count): | |
| st_callback = StreamlitCallbackHandler(st.container()) | |
| st.write(_pd_agent.run(_eda_selected, callbacks=[st_callback])) | |
| aaa_template = ''' | |
| {intro} | |
| {dataframe_description} | |
| {do_not_list} | |
| ''' | |
| aaa_prompt = PromptTemplate.from_template(aaa_template) | |
| # Give me a list of possible questions that Pandas agent can answer well about the dataframe. | |
| intro_aaa_template = ''' | |
| Each sentence should be less than 6 words long and clear. | |
| Provide not more than 8 concrete/ not repetitive questions. | |
| ''' | |
| dataframe_description_aaa_template = ''' | |
| Here is the details of the dataframe: {dataframe_details} | |
| ''' | |
| do_not_aaa_template = ''' | |
| - DO NOT provide any list that is already captured before in the double quotation "{eda_selection}". | |
| - Do not provide list that cannot be answered by pandas agent. | |
| - Do not provide questions about number of rows/ columns, missing values | |
| ''' | |
| intro_aaa_prompt = PromptTemplate.from_template(intro_aaa_template) | |
| dataframe_description_aaa_prompt = PromptTemplate.from_template(dataframe_description_aaa_template) | |
| do_not_eda_prompt = PromptTemplate.from_template(do_not_aaa_template) | |
| input_aaa_prompts = [ | |
| ("intro", intro_aaa_prompt), | |
| ("dataframe_description", dataframe_description_aaa_prompt), | |
| ("do_not_list", do_not_eda_prompt), | |
| ] | |
| filled_aaa_prompt = PipelinePromptTemplate( | |
| final_prompt=aaa_prompt, pipeline_prompts=input_aaa_prompts | |
| ) | |
| def aaa_sample_generator(_aaa_chain, _dataframe_details, _eda_selection): | |
| return _aaa_chain.invoke({'dataframe_details': _dataframe_details, 'eda_selection': _eda_selection})['text'] | |
| def aaa_answer_generator(_pd_agent, _user_prompt, refreshed): | |
| st_callback = StreamlitCallbackHandler(st.container()) | |
| answer_to_user = _pd_agent.run(_user_prompt, callbacks=[st_callback]) | |
| st.write(answer_to_user) |