Spaces:
Sleeping
Sleeping
Commit
·
39e56b0
1
Parent(s):
fce9b42
Initial commit of AutoML project
Browse files- .env +1 -0
- .gitignore +0 -0
- app.py +2 -0
- frontend/main.js +13 -8
- groq_config.json +1 -0
- models/xgboost_model.py +0 -37
- requirements.txt +0 -3
- utils/export.py +0 -12
.env
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# GROQ_API_KEY="YOUR_GROQ_API_KEY_HERE"
|
.gitignore
CHANGED
|
Binary files a/.gitignore and b/.gitignore differ
|
|
|
app.py
CHANGED
|
@@ -18,6 +18,7 @@ import re
|
|
| 18 |
import matplotlib.pyplot as plt
|
| 19 |
import io
|
| 20 |
import base64
|
|
|
|
| 21 |
|
| 22 |
# Configure logging
|
| 23 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
@@ -71,6 +72,7 @@ def get_learning_type():
|
|
| 71 |
logging.warning("No dataset uploaded when requesting learning type.")
|
| 72 |
return jsonify({"error": "No dataset uploaded yet."}), 400
|
| 73 |
|
|
|
|
| 74 |
prompt = (
|
| 75 |
"You are an expert data scientist. Your task is to analyze a dataset and determine its learning type (supervised or unsupervised). "
|
| 76 |
"If it's a supervised learning problem, you MUST identify the single target column that the other columns would predict. "
|
|
|
|
| 18 |
import matplotlib.pyplot as plt
|
| 19 |
import io
|
| 20 |
import base64
|
| 21 |
+
import numpy as np
|
| 22 |
|
| 23 |
# Configure logging
|
| 24 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
|
|
| 72 |
logging.warning("No dataset uploaded when requesting learning type.")
|
| 73 |
return jsonify({"error": "No dataset uploaded yet."}), 400
|
| 74 |
|
| 75 |
+
dtypes_str = df.dtypes.to_string()
|
| 76 |
prompt = (
|
| 77 |
"You are an expert data scientist. Your task is to analyze a dataset and determine its learning type (supervised or unsupervised). "
|
| 78 |
"If it's a supervised learning problem, you MUST identify the single target column that the other columns would predict. "
|
frontend/main.js
CHANGED
|
@@ -42,10 +42,10 @@ document.addEventListener('DOMContentLoaded', () => {
|
|
| 42 |
});
|
| 43 |
|
| 44 |
function formatAIResponse(text) {
|
| 45 |
-
text = text.replace(/\*\*(.*?)\*\*/g, '<strong>$1
|
| 46 |
-
text = text.replace(/^\d+\.\s+(.*)/gm, '<li>$1
|
| 47 |
text = text.replace(/(<li>.*<\/li>)/s, '<ol>$1<\/ol>');
|
| 48 |
-
text = text.replace(/^\*\s+(.*)/gm, '<li>$1
|
| 49 |
text = text.replace(/(<li>.*<\/li>)/s, '<ul>$1<\/ul>');
|
| 50 |
return text;
|
| 51 |
}
|
|
@@ -167,7 +167,14 @@ document.addEventListener('DOMContentLoaded', () => {
|
|
| 167 |
function formatMetrics(metrics) {
|
| 168 |
let formatted = '\n';
|
| 169 |
for (const [key, value] of Object.entries(metrics)) {
|
| 170 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
}
|
| 172 |
return formatted;
|
| 173 |
}
|
|
@@ -234,13 +241,11 @@ document.addEventListener('DOMContentLoaded', () => {
|
|
| 234 |
output += formatMetrics(result.metrics);
|
| 235 |
}
|
| 236 |
if (result.result) {
|
| 237 |
-
output += `\n<strong>Result
|
| 238 |
}
|
| 239 |
trainOutput.innerHTML = output;
|
| 240 |
} catch (error) {
|
| 241 |
trainOutput.textContent = `Error: ${error.message}`;
|
| 242 |
-
} finally {
|
| 243 |
-
loader.style.display = 'none';
|
| 244 |
}
|
| 245 |
});
|
| 246 |
|
|
@@ -295,4 +300,4 @@ document.addEventListener('DOMContentLoaded', () => {
|
|
| 295 |
});
|
| 296 |
});
|
| 297 |
});
|
| 298 |
-
});
|
|
|
|
| 42 |
});
|
| 43 |
|
| 44 |
function formatAIResponse(text) {
|
| 45 |
+
text = text.replace(/\*\*(.*?)\*\*/g, '<strong>$1<\/strong>');
|
| 46 |
+
text = text.replace(/^\d+\.\s+(.*)/gm, '<li>$1<\/li>');
|
| 47 |
text = text.replace(/(<li>.*<\/li>)/s, '<ol>$1<\/ol>');
|
| 48 |
+
text = text.replace(/^\*\s+(.*)/gm, '<li>$1<\/li>');
|
| 49 |
text = text.replace(/(<li>.*<\/li>)/s, '<ul>$1<\/ul>');
|
| 50 |
return text;
|
| 51 |
}
|
|
|
|
| 167 |
function formatMetrics(metrics) {
|
| 168 |
let formatted = '\n';
|
| 169 |
for (const [key, value] of Object.entries(metrics)) {
|
| 170 |
+
if (typeof value === 'object' && value !== null) {
|
| 171 |
+
formatted += `<strong>${key}:<\/strong>\n`;
|
| 172 |
+
for (const [subKey, subValue] of Object.entries(value)) {
|
| 173 |
+
formatted += ` ${subKey}: ${subValue}\n`;
|
| 174 |
+
}
|
| 175 |
+
} else {
|
| 176 |
+
formatted += `<strong>${key}:<\/strong> ${value}\n`;
|
| 177 |
+
}
|
| 178 |
}
|
| 179 |
return formatted;
|
| 180 |
}
|
|
|
|
| 241 |
output += formatMetrics(result.metrics);
|
| 242 |
}
|
| 243 |
if (result.result) {
|
| 244 |
+
output += `\n<strong>Result:<\/strong> ${JSON.stringify(result.result, null, 2)}`;
|
| 245 |
}
|
| 246 |
trainOutput.innerHTML = output;
|
| 247 |
} catch (error) {
|
| 248 |
trainOutput.textContent = `Error: ${error.message}`;
|
|
|
|
|
|
|
| 249 |
}
|
| 250 |
});
|
| 251 |
|
|
|
|
| 300 |
});
|
| 301 |
});
|
| 302 |
});
|
| 303 |
+
});
|
groq_config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"GROQ_API_KEY": "YOUR_GROQ_API_KEY_HERE"}
|
models/xgboost_model.py
DELETED
|
@@ -1,37 +0,0 @@
|
|
| 1 |
-
import pandas as pd
|
| 2 |
-
from sklearn.model_selection import train_test_split
|
| 3 |
-
from sklearn.preprocessing import LabelEncoder
|
| 4 |
-
from xgboost import XGBClassifier, XGBRegressor
|
| 5 |
-
from utils.metrics import classification_metrics, regression_metrics
|
| 6 |
-
|
| 7 |
-
def encode_dataframe(df):
|
| 8 |
-
label_encoders = {}
|
| 9 |
-
for col in df.select_dtypes(include='object'):
|
| 10 |
-
le = LabelEncoder()
|
| 11 |
-
df[col] = le.fit_transform(df[col])
|
| 12 |
-
label_encoders[col] = le
|
| 13 |
-
return df, label_encoders
|
| 14 |
-
|
| 15 |
-
def train_xgboost(df, target_column, task='classification'):
|
| 16 |
-
df = df.dropna()
|
| 17 |
-
df, encoders = encode_dataframe(df)
|
| 18 |
-
|
| 19 |
-
X = df.drop(columns=[target_column])
|
| 20 |
-
y = df[target_column]
|
| 21 |
-
|
| 22 |
-
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
|
| 23 |
-
|
| 24 |
-
if task == 'classification':
|
| 25 |
-
model = XGBClassifier()
|
| 26 |
-
else:
|
| 27 |
-
model = XGBRegressor()
|
| 28 |
-
|
| 29 |
-
model.fit(X_train, y_train)
|
| 30 |
-
y_pred = model.predict(X_test)
|
| 31 |
-
|
| 32 |
-
if task == 'classification':
|
| 33 |
-
metrics = classification_metrics(y_test, y_pred)
|
| 34 |
-
else:
|
| 35 |
-
metrics = regression_metrics(y_test, y_pred)
|
| 36 |
-
|
| 37 |
-
return model, metrics
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
|
@@ -6,10 +6,7 @@ seaborn
|
|
| 6 |
plotly
|
| 7 |
xgboost
|
| 8 |
catboost
|
| 9 |
-
langgraph
|
| 10 |
python-dotenv
|
| 11 |
-
faiss-cpu
|
| 12 |
-
tiktoken
|
| 13 |
groq
|
| 14 |
flask
|
| 15 |
scipy
|
|
|
|
| 6 |
plotly
|
| 7 |
xgboost
|
| 8 |
catboost
|
|
|
|
| 9 |
python-dotenv
|
|
|
|
|
|
|
| 10 |
groq
|
| 11 |
flask
|
| 12 |
scipy
|
utils/export.py
DELETED
|
@@ -1,12 +0,0 @@
|
|
| 1 |
-
import pickle
|
| 2 |
-
import os
|
| 3 |
-
|
| 4 |
-
def save_model(model, path='model.pkl'):
|
| 5 |
-
with open(path, 'wb') as f:
|
| 6 |
-
pickle.dump(model, f)
|
| 7 |
-
|
| 8 |
-
def load_model(path='model.pkl'):
|
| 9 |
-
if os.path.exists(path):
|
| 10 |
-
with open(path, 'rb') as f:
|
| 11 |
-
return pickle.load(f)
|
| 12 |
-
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|