Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| import json | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.preprocessing import StandardScaler, LabelEncoder | |
| from sklearn.feature_selection import SelectKBest, f_classif | |
| from sklearn.impute import SimpleImputer | |
| from imblearn.over_sampling import SMOTE | |
| from sklearn.metrics import accuracy_score, classification_report, mean_squared_error, mean_absolute_error, r2_score | |
| # Import ML Models | |
| from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor | |
| from sklearn.svm import SVC, SVR | |
| from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor | |
| from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor | |
| from sklearn.linear_model import Perceptron | |
| # Sidebar UI | |
| st.sidebar.title("AI Code Generator π§ ") | |
| st.sidebar.markdown("Generate AI models instantly!") | |
| # Model Selection | |
| model_options = ["KNN", "SVM", "Random Forest", "Decision Tree", "Perceptron"] | |
| model = st.sidebar.selectbox("Choose a Model:", model_options) | |
| # Task Selection | |
| task_options = ["Classification", "Regression"] | |
| task = st.sidebar.selectbox("Choose a Task:", task_options) | |
| # Problem Selection based on Task and Model | |
| problems = { | |
| "Classification": { | |
| "KNN": ["Spam Detection", "Disease Prediction"], | |
| "SVM": ["Image Recognition", "Text Classification"], | |
| "Random Forest": ["Fraud Detection", "Customer Segmentation"], | |
| "Decision Tree": ["Loan Approval", "Churn Prediction"], | |
| "Perceptron": ["Handwritten Digit Recognition", "Sentiment Analysis"] | |
| }, | |
| "Regression": { | |
| "KNN": ["House Price Prediction", "Stock Prediction"], | |
| "SVM": ["Sales Forecasting", "Stock Market Trends"], | |
| "Random Forest": ["Energy Consumption", "Patient Survival Prediction"], | |
| "Decision Tree": ["House Price Estimation", "Revenue Prediction"], | |
| "Perceptron": ["Weather Forecasting", "Traffic Flow Prediction"] | |
| } | |
| } | |
| problem = st.sidebar.selectbox("Choose a Problem:", problems[task][model], key="problem_selection") | |
| # Dataset Selection (Simulated dataset paths) | |
| dataset_mapping = { | |
| "Spam Detection": "datasets/spam_detection.csv", | |
| "Disease Prediction": "datasets/disease_prediction.csv", | |
| "Image Recognition": "datasets/image_recognition.csv", | |
| "Text Classification": "datasets/text_classification.csv", | |
| "Fraud Detection": "datasets/fraud_detection.csv", | |
| "Customer Segmentation": "datasets/customer_segmentation.csv", | |
| "Loan Approval": "datasets/loan_approval.csv", | |
| "Churn Prediction": "datasets/churn_prediction.csv", | |
| "Handwritten Digit Recognition": "datasets/handwritten_digit_recognition.csv", | |
| "Sentiment Analysis": "datasets/sentiment_analysis.csv", | |
| "House Price Prediction": "datasets/house_price_prediction.csv", | |
| "Stock Prediction": "datasets/stock_prediction.csv", | |
| "Sales Forecasting": "datasets/sales_forecasting.csv", | |
| "Stock Market Trends": "datasets/stock_market_trends.csv", | |
| "Energy Consumption": "datasets/energy_consumption.csv", | |
| "Patient Survival Prediction": "datasets/patient_survival_prediction.csv", | |
| "House Price Estimation": "datasets/house_price_estimation.csv", | |
| "Revenue Prediction": "datasets/revenue_prediction.csv", | |
| "Weather Forecasting": "datasets/weather_forecasting.csv", | |
| "Traffic Flow Prediction": "datasets/traffic_flow_prediction.csv" | |
| } | |
| dataset_path = dataset_mapping.get(problem, "datasets/spam_detection.csv") | |
| df = pd.read_csv(dataset_path) | |
| # Model Initialization | |
| model_mapping = { | |
| "KNN": KNeighborsClassifier(n_neighbors=5) if task == "Classification" else KNeighborsRegressor(), | |
| "SVM": SVC() if task == "Classification" else SVR(), | |
| "Random Forest": RandomForestClassifier() if task == "Classification" else RandomForestRegressor(), | |
| "Decision Tree": DecisionTreeClassifier() if task == "Classification" else DecisionTreeRegressor(), | |
| "Perceptron": Perceptron() | |
| } | |
| # Generated AI Code | |
| generated_code = f""" | |
| # AI Model Code | |
| import pandas as pd | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.preprocessing import StandardScaler | |
| from {model_mapping[model].__class__.__module__} import {model_mapping[model].__class__.__name__} | |
| # Load Data | |
| df = pd.read_csv('{dataset_path}') | |
| X = df.iloc[:, :-1] | |
| y = df.iloc[:, -1] | |
| # Train/Test Split | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
| # Scaling | |
| scaler = StandardScaler() | |
| X_train = scaler.fit_transform(X_train) | |
| X_test = scaler.transform(X_test) | |
| # Train Model | |
| model = {model_mapping[model].__class__.__name__}() | |
| model.fit(X_train, y_train) | |
| # Predict | |
| y_pred = model.predict(X_test) | |
| print(y_pred) | |
| """ | |
| # Display AI Code | |
| st.subheader("π Generated AI Model Code") | |
| st.code(generated_code, language="python") | |
| # Download Buttons (Top of UI) | |
| st.download_button("π₯ Download Python Script (.py)", generated_code, file_name="ai_model.py", mime="text/x-python") | |
| st.download_button("π₯ Download Jupyter Notebook (.ipynb)", json.dumps({"cells": [{"cell_type": "code", "source": generated_code.split("\n"), "metadata": {}}], "metadata": {}, "nbformat": 4, "nbformat_minor": 2}), file_name="ai_model.ipynb", mime="application/json") | |
| # Display dataset | |
| st.subheader("π Sample Dataset") | |
| st.write(df.head()) | |
| # Preprocessing Steps | |
| st.subheader("π Preprocessing Steps") | |
| st.markdown(""" | |
| - β Handle Missing Values | |
| - β Encoding Categorical Variables | |
| - β Feature Scaling | |
| - β Feature Selection | |
| - β Handling Imbalanced Data using **SMOTE** | |
| """) | |
| # Handle missing values | |
| imputer = SimpleImputer(strategy='mean') | |
| df = df.apply(lambda col: imputer.fit_transform(col.values.reshape(-1, 1)).flatten() if col.dtypes == 'float64' else col) | |
| # Encoding categorical variables | |
| label_encoders = {} | |
| for col in df.select_dtypes(include=['object']).columns: | |
| label_encoders[col] = LabelEncoder() | |
| df[col] = label_encoders[col].fit_transform(df[col]) | |
| # Split Data | |
| X = df.iloc[:, :-1] # Features | |
| y = df.iloc[:, -1] # Target | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
| # Feature Scaling | |
| scaler = StandardScaler() | |
| X_train = scaler.fit_transform(X_train) | |
| X_test = scaler.transform(X_test) | |
| # Feature Selection | |
| selector = SelectKBest(score_func=f_classif, k=min(5, X.shape[1])) # Ensure k does not exceed available features | |
| X_train = selector.fit_transform(X_train, y_train) | |
| X_test = selector.transform(X_test) | |
| # Handle imbalanced data (only for classification) | |
| if task == "Classification": | |
| if len(set(y_train)) > 1 and len(y_train) > 5: # Avoid SMOTE errors | |
| smote = SMOTE() | |
| X_train, y_train = smote.fit_resample(X_train, y_train) | |
| model_instance = model_mapping[model] | |
| # Train Model | |
| model_instance.fit(X_train, y_train) | |
| y_pred = model_instance.predict(X_test) | |
| # Model Evaluation | |
| st.subheader("π Model Evaluation") | |
| if task == "Classification": | |
| accuracy = accuracy_score(y_test, y_pred) | |
| report = classification_report(y_test, y_pred, output_dict=True) | |
| st.write(f"**Accuracy:** {accuracy:.2f}") | |
| st.json(report) | |
| elif task == "Regression": | |
| mse = mean_squared_error(y_test, y_pred) | |
| mae = mean_absolute_error(y_test, y_pred) | |
| r2 = r2_score(y_test, y_pred) | |
| st.write(f"**Mean Squared Error (MSE):** {mse:.4f}") | |
| st.write(f"**Mean Absolute Error (MAE):** {mae:.4f}") | |
| st.write(f"**RΒ² Score:** {r2:.4f}") | |
| # Data Visualization | |
| st.subheader("π Data Visualization") | |
| # Heatmap | |
| st.write("### π₯ Feature Correlation") | |
| fig, ax = plt.subplots(figsize=(8, 5)) | |
| sns.heatmap(df.corr(), annot=True, cmap="coolwarm", ax=ax) | |
| st.pyplot(fig) | |
| # Feature Importance (for tree-based models) | |
| if model in ["Random Forest", "Decision Tree"]: | |
| feature_importances = model_instance.feature_importances_ | |
| feature_names = X.columns | |
| importance_df = pd.DataFrame({"Feature": feature_names, "Importance": feature_importances}).sort_values(by="Importance", ascending=False) | |
| st.write("### π Feature Importance") | |
| fig, ax = plt.subplots() | |
| sns.barplot(x=importance_df["Importance"], y=importance_df["Feature"], ax=ax) | |
| st.pyplot(fig) | |