Shrouk04's picture
Upload 27 files
b814c5a verified
Raw
History Blame Contribute Delete
3.96 kB
import pandas as pd
import time
from utils.eda import run_eda
from utils.preprocessing import preprocess
from utils.feature_selection import feature_selection
from utils.visualization import visualize
from utils.report import generate_full_report
from utils.agent_decision import decide_pipeline
from utils.modeling import train_models
from utils.llm_agent import ask_llm
from utils.context_builder import build_context
from utils.query_engine import answer_data_question
from utils.query_executer import execute_query
from utils.dataset_snapshot import build_dataset_snapshot
from utils.memory import add_memory
from utils.memory import get_memory_text
from utils.question_classifier import (
requires_python_analysis
)
def main():
start = time.time()
# load data
path = input("Enter CSV file path: ")
try:
df = pd.read_csv(path)
except Exception as e:
print(f"Error loading file: {e}")
return
# clean columns
df.columns = (
df.columns
.str.strip()
.str.lower()
.str.replace(" ", "_")
)
print("\n--- HEAD ---")
print(df.head())
# initial EDA
print("\n--- EDA ---")
eda_report = run_eda(df)
# target selection
print("\nColumns:")
print(df.columns.tolist())
print("\nSuggested target columns:")
for col in df.columns:
if df[col].nunique() < 20:
print(f"- {col} (categorical candidate)")
elif df[col].dtype in ['int64', 'float64']:
print(f"- {col} (numerical candidate)")
while True:
target = input("\nEnter target column: ")
if target in df.columns:
break
print("Invalid column! Try again.")
# preprocessing
print("\n--- PREPROCESSING ---")
df = preprocess(df, target)
# make sure target still exsist
if target not in df.columns:
print(f"Target column '{target}' was dropped!")
return
# updated EDA
print("\n--- UPDATED EDA ---")
eda_report = run_eda(df)
# decision engine
#decisions = decide_pipeline(df, target, eda_report)
decisions = decide_pipeline(df, target)
# feature selection
print("\n--- FEATURE SELECTION ---")
selected = feature_selection(df, target, decisions)
if not selected:
print("No important features detected!")
# visualization
#print("\n--- VISUALIZATION ---")
#visualize(df, target, selected, eda_report, decisions)
# train on moddels
print("\n--- MODEL TRAINING ---")
model_results = train_models(df,target,decisions
)
context = build_context(
df,
target,
eda_report,
decisions,
model_results
)
# report
generate_full_report(
df,
target,
selected,
eda_report,
decisions
)
#while True:
# question = input("\nAsk about #your data (or type exit): ")
#if question.lower() == "exit":
# break
#answer = ask_llm(question)
#print("\nAI Analyst:")
#print(answer)
# dataset snapshot
snapshot = build_dataset_snapshot(df)
print("\n--- AI DATA ANALYST CHAT ---")
while True:
question = input(
"\nAsk about your dataset (type exit): "
)
if question.lower() == "exit":
break
# some computation
result = execute_query(
df,
question
)
# using memory
memory_text = get_memory_text()
# llm response
answer = ask_llm(
question=question,
analysis_result=result,
dataset_snapshot=snapshot,
context=context,
memory=memory_text
)
print("\nAI Analyst:")
print(answer)
# save memory
add_memory(question, answer)
end = time.time()
print(f"\nTotal execution time: {end - start:.2f} seconds")
if __name__ == "__main__":
main()