Al1Abdullah commited on
Commit
39e56b0
·
1 Parent(s): fce9b42

Initial commit of AutoML project

Browse files
Files changed (8) hide show
  1. .env +1 -0
  2. .gitignore +0 -0
  3. app.py +2 -0
  4. frontend/main.js +13 -8
  5. groq_config.json +1 -0
  6. models/xgboost_model.py +0 -37
  7. requirements.txt +0 -3
  8. utils/export.py +0 -12
.env ADDED
@@ -0,0 +1 @@
 
 
1
+ # GROQ_API_KEY="YOUR_GROQ_API_KEY_HERE"
.gitignore CHANGED
Binary files a/.gitignore and b/.gitignore differ
 
app.py CHANGED
@@ -18,6 +18,7 @@ import re
18
  import matplotlib.pyplot as plt
19
  import io
20
  import base64
 
21
 
22
  # Configure logging
23
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -71,6 +72,7 @@ def get_learning_type():
71
  logging.warning("No dataset uploaded when requesting learning type.")
72
  return jsonify({"error": "No dataset uploaded yet."}), 400
73
 
 
74
  prompt = (
75
  "You are an expert data scientist. Your task is to analyze a dataset and determine its learning type (supervised or unsupervised). "
76
  "If it's a supervised learning problem, you MUST identify the single target column that the other columns would predict. "
 
18
  import matplotlib.pyplot as plt
19
  import io
20
  import base64
21
+ import numpy as np
22
 
23
  # Configure logging
24
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
72
  logging.warning("No dataset uploaded when requesting learning type.")
73
  return jsonify({"error": "No dataset uploaded yet."}), 400
74
 
75
+ dtypes_str = df.dtypes.to_string()
76
  prompt = (
77
  "You are an expert data scientist. Your task is to analyze a dataset and determine its learning type (supervised or unsupervised). "
78
  "If it's a supervised learning problem, you MUST identify the single target column that the other columns would predict. "
frontend/main.js CHANGED
@@ -42,10 +42,10 @@ document.addEventListener('DOMContentLoaded', () => {
42
  });
43
 
44
  function formatAIResponse(text) {
45
- text = text.replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>');
46
- text = text.replace(/^\d+\.\s+(.*)/gm, '<li>$1</li>');
47
  text = text.replace(/(<li>.*<\/li>)/s, '<ol>$1<\/ol>');
48
- text = text.replace(/^\*\s+(.*)/gm, '<li>$1</li>');
49
  text = text.replace(/(<li>.*<\/li>)/s, '<ul>$1<\/ul>');
50
  return text;
51
  }
@@ -167,7 +167,14 @@ document.addEventListener('DOMContentLoaded', () => {
167
  function formatMetrics(metrics) {
168
  let formatted = '\n';
169
  for (const [key, value] of Object.entries(metrics)) {
170
- formatted += `<strong>${key}:</strong> ${JSON.stringify(value, null, 2)}\n`;
 
 
 
 
 
 
 
171
  }
172
  return formatted;
173
  }
@@ -234,13 +241,11 @@ document.addEventListener('DOMContentLoaded', () => {
234
  output += formatMetrics(result.metrics);
235
  }
236
  if (result.result) {
237
- output += `\n<strong>Result:</strong> ${JSON.stringify(result.result, null, 2)}`;
238
  }
239
  trainOutput.innerHTML = output;
240
  } catch (error) {
241
  trainOutput.textContent = `Error: ${error.message}`;
242
- } finally {
243
- loader.style.display = 'none';
244
  }
245
  });
246
 
@@ -295,4 +300,4 @@ document.addEventListener('DOMContentLoaded', () => {
295
  });
296
  });
297
  });
298
- });
 
42
  });
43
 
44
  function formatAIResponse(text) {
45
+ text = text.replace(/\*\*(.*?)\*\*/g, '<strong>$1<\/strong>');
46
+ text = text.replace(/^\d+\.\s+(.*)/gm, '<li>$1<\/li>');
47
  text = text.replace(/(<li>.*<\/li>)/s, '<ol>$1<\/ol>');
48
+ text = text.replace(/^\*\s+(.*)/gm, '<li>$1<\/li>');
49
  text = text.replace(/(<li>.*<\/li>)/s, '<ul>$1<\/ul>');
50
  return text;
51
  }
 
167
  function formatMetrics(metrics) {
168
  let formatted = '\n';
169
  for (const [key, value] of Object.entries(metrics)) {
170
+ if (typeof value === 'object' && value !== null) {
171
+ formatted += `<strong>${key}:<\/strong>\n`;
172
+ for (const [subKey, subValue] of Object.entries(value)) {
173
+ formatted += ` ${subKey}: ${subValue}\n`;
174
+ }
175
+ } else {
176
+ formatted += `<strong>${key}:<\/strong> ${value}\n`;
177
+ }
178
  }
179
  return formatted;
180
  }
 
241
  output += formatMetrics(result.metrics);
242
  }
243
  if (result.result) {
244
+ output += `\n<strong>Result:<\/strong> ${JSON.stringify(result.result, null, 2)}`;
245
  }
246
  trainOutput.innerHTML = output;
247
  } catch (error) {
248
  trainOutput.textContent = `Error: ${error.message}`;
 
 
249
  }
250
  });
251
 
 
300
  });
301
  });
302
  });
303
+ });
groq_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"GROQ_API_KEY": "YOUR_GROQ_API_KEY_HERE"}
models/xgboost_model.py DELETED
@@ -1,37 +0,0 @@
1
- import pandas as pd
2
- from sklearn.model_selection import train_test_split
3
- from sklearn.preprocessing import LabelEncoder
4
- from xgboost import XGBClassifier, XGBRegressor
5
- from utils.metrics import classification_metrics, regression_metrics
6
-
7
- def encode_dataframe(df):
8
- label_encoders = {}
9
- for col in df.select_dtypes(include='object'):
10
- le = LabelEncoder()
11
- df[col] = le.fit_transform(df[col])
12
- label_encoders[col] = le
13
- return df, label_encoders
14
-
15
- def train_xgboost(df, target_column, task='classification'):
16
- df = df.dropna()
17
- df, encoders = encode_dataframe(df)
18
-
19
- X = df.drop(columns=[target_column])
20
- y = df[target_column]
21
-
22
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
23
-
24
- if task == 'classification':
25
- model = XGBClassifier()
26
- else:
27
- model = XGBRegressor()
28
-
29
- model.fit(X_train, y_train)
30
- y_pred = model.predict(X_test)
31
-
32
- if task == 'classification':
33
- metrics = classification_metrics(y_test, y_pred)
34
- else:
35
- metrics = regression_metrics(y_test, y_pred)
36
-
37
- return model, metrics
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -6,10 +6,7 @@ seaborn
6
  plotly
7
  xgboost
8
  catboost
9
- langgraph
10
  python-dotenv
11
- faiss-cpu
12
- tiktoken
13
  groq
14
  flask
15
  scipy
 
6
  plotly
7
  xgboost
8
  catboost
 
9
  python-dotenv
 
 
10
  groq
11
  flask
12
  scipy
utils/export.py DELETED
@@ -1,12 +0,0 @@
1
- import pickle
2
- import os
3
-
4
- def save_model(model, path='model.pkl'):
5
- with open(path, 'wb') as f:
6
- pickle.dump(model, f)
7
-
8
- def load_model(path='model.pkl'):
9
- if os.path.exists(path):
10
- with open(path, 'rb') as f:
11
- return pickle.load(f)
12
- return None