Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import time | |
| from utils.eda import run_eda | |
| from utils.preprocessing import preprocess | |
| from utils.feature_selection import feature_selection | |
| from utils.visualization import visualize | |
| from utils.report import generate_full_report | |
| from utils.agent_decision import decide_pipeline | |
| from utils.modeling import train_models | |
| from utils.llm_agent import ask_llm | |
| from utils.context_builder import build_context | |
| from utils.query_engine import answer_data_question | |
| from utils.query_executer import execute_query | |
| from utils.dataset_snapshot import build_dataset_snapshot | |
| from utils.memory import add_memory | |
| from utils.memory import get_memory_text | |
| from utils.question_classifier import ( | |
| requires_python_analysis | |
| ) | |
| def main(): | |
| start = time.time() | |
| # load data | |
| path = input("Enter CSV file path: ") | |
| try: | |
| df = pd.read_csv(path) | |
| except Exception as e: | |
| print(f"Error loading file: {e}") | |
| return | |
| # clean columns | |
| df.columns = ( | |
| df.columns | |
| .str.strip() | |
| .str.lower() | |
| .str.replace(" ", "_") | |
| ) | |
| print("\n--- HEAD ---") | |
| print(df.head()) | |
| # initial EDA | |
| print("\n--- EDA ---") | |
| eda_report = run_eda(df) | |
| # target selection | |
| print("\nColumns:") | |
| print(df.columns.tolist()) | |
| print("\nSuggested target columns:") | |
| for col in df.columns: | |
| if df[col].nunique() < 20: | |
| print(f"- {col} (categorical candidate)") | |
| elif df[col].dtype in ['int64', 'float64']: | |
| print(f"- {col} (numerical candidate)") | |
| while True: | |
| target = input("\nEnter target column: ") | |
| if target in df.columns: | |
| break | |
| print("Invalid column! Try again.") | |
| # preprocessing | |
| print("\n--- PREPROCESSING ---") | |
| df = preprocess(df, target) | |
| # make sure target still exsist | |
| if target not in df.columns: | |
| print(f"Target column '{target}' was dropped!") | |
| return | |
| # updated EDA | |
| print("\n--- UPDATED EDA ---") | |
| eda_report = run_eda(df) | |
| # decision engine | |
| #decisions = decide_pipeline(df, target, eda_report) | |
| decisions = decide_pipeline(df, target) | |
| # feature selection | |
| print("\n--- FEATURE SELECTION ---") | |
| selected = feature_selection(df, target, decisions) | |
| if not selected: | |
| print("No important features detected!") | |
| # visualization | |
| #print("\n--- VISUALIZATION ---") | |
| #visualize(df, target, selected, eda_report, decisions) | |
| # train on moddels | |
| print("\n--- MODEL TRAINING ---") | |
| model_results = train_models(df,target,decisions | |
| ) | |
| context = build_context( | |
| df, | |
| target, | |
| eda_report, | |
| decisions, | |
| model_results | |
| ) | |
| # report | |
| generate_full_report( | |
| df, | |
| target, | |
| selected, | |
| eda_report, | |
| decisions | |
| ) | |
| #while True: | |
| # question = input("\nAsk about #your data (or type exit): ") | |
| #if question.lower() == "exit": | |
| # break | |
| #answer = ask_llm(question) | |
| #print("\nAI Analyst:") | |
| #print(answer) | |
| # dataset snapshot | |
| snapshot = build_dataset_snapshot(df) | |
| print("\n--- AI DATA ANALYST CHAT ---") | |
| while True: | |
| question = input( | |
| "\nAsk about your dataset (type exit): " | |
| ) | |
| if question.lower() == "exit": | |
| break | |
| # some computation | |
| result = execute_query( | |
| df, | |
| question | |
| ) | |
| # using memory | |
| memory_text = get_memory_text() | |
| # llm response | |
| answer = ask_llm( | |
| question=question, | |
| analysis_result=result, | |
| dataset_snapshot=snapshot, | |
| context=context, | |
| memory=memory_text | |
| ) | |
| print("\nAI Analyst:") | |
| print(answer) | |
| # save memory | |
| add_memory(question, answer) | |
| end = time.time() | |
| print(f"\nTotal execution time: {end - start:.2f} seconds") | |
| if __name__ == "__main__": | |
| main() | |