Upload folder using huggingface_hub

Browse files

Files changed (11) hide show

.gitattributes +2 -0
rocketship-ml-model-train/app.py +11 -0
rocketship-ml-model-train/assets/all_genres_clean.csv +3 -0
rocketship-ml-model-train/assets/features_dataset.csv +3 -0
rocketship-ml-model-train/genre_pipeline.pkl +3 -0
rocketship-ml-model-train/genre_pipeline_v2.pkl +3 -0
rocketship-ml-model-train/get_db_info.py +73 -0
rocketship-ml-model-train/model_train.py +58 -0
rocketship-ml-model-train/prediction_.py +36 -0
rocketship-ml-model-train/train_ml_model.ipynb +1028 -0
rocketship-ml-model-train/train_ml_model_1.ipynb +0 -0

.gitattributes CHANGED Viewed

@@ -105,3 +105,5 @@ genre_finder/assets/model_ready_dataset_final1.csv filter=lfs diff=lfs merge=lfs
 genre_finder/assets/model_ready_dataset1.csv filter=lfs diff=lfs merge=lfs -text
 genre_finder/find_genre_different_apporoch/features_dataset.csv filter=lfs diff=lfs merge=lfs -text
 genre_finder/find_genre_different_apporoch/all_genre.csv filter=lfs diff=lfs merge=lfs -text

 genre_finder/assets/model_ready_dataset1.csv filter=lfs diff=lfs merge=lfs -text
 genre_finder/find_genre_different_apporoch/features_dataset.csv filter=lfs diff=lfs merge=lfs -text
 genre_finder/find_genre_different_apporoch/all_genre.csv filter=lfs diff=lfs merge=lfs -text
+rocketship-ml-model-train/assets/all_genres_clean.csv filter=lfs diff=lfs merge=lfs -text
+rocketship-ml-model-train/assets/features_dataset.csv filter=lfs diff=lfs merge=lfs -text

rocketship-ml-model-train/app.py ADDED Viewed

	@@ -0,0 +1,11 @@

+import os, re, json
+from get_db_info import extract_the_db_data2
+from prediction_ import predict_genre
+while True:
+    input_id = input('Enter input id: ')
+    if input_id.lower() == 'exit':
+        break
+    dict_data = extract_the_db_data2(input_id)
+    answer = predict_genre(dict_data)
+    print(answer)

rocketship-ml-model-train/assets/all_genres_clean.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7d0daf801b311511ab134fe677cadbf75c30f55953a3ade5ee16c0dfbef9987b
+size 71663849

rocketship-ml-model-train/assets/features_dataset.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4ac64a40014104b2c4f9f822fe956e876db8eeeb0c7c8e1caaf8e4a0df0cc515
+size 73116979

rocketship-ml-model-train/genre_pipeline.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:87e0209eacb5b5cf7cfe67eac22786ae34306fc53170a0ae8838f62842718870
+size 129741572

rocketship-ml-model-train/genre_pipeline_v2.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:615bebec486a76b3e34ba0986a4921c031b437ff8b53398f4705db94b7984472
+size 128260374

rocketship-ml-model-train/get_db_info.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import os
+from dotenv import load_dotenv
+from pymongo import MongoClient
+from bson.objectid import ObjectId
+load_dotenv()
+mongo_db_client = MongoClient(os.getenv("MONGO_URI"))
+db = mongo_db_client[os.getenv("DB_NAME")]
+collections = db[os.getenv("COLLECTION_NAME")]
+oid = "69cb6b4bd86a47b0a1581017"
+def extract_the_db_data1(oid):
+    collection = collections.find_one({"_id": ObjectId(oid)})
+    features_answer = collection.get("features","")
+    dict_data = {
+        'spectral_contrast_mean (mix)': features_answer.get('spectral_contrast_mean (mix)',0.0),
+        'chroma_mean (mix)': features_answer.get('chroma_mean (mix)',0.0),
+        'melody_variability (vocals)': features_answer.get('melody_variability (vocals)',0.0),
+        'rhythm_onset_rate (mix)': features_answer.get('rhythm_onset_rate (mix)',0.0),
+        'spectral_centroid_mean custom (mix)': features_answer.get('spectral_centroid_mean custom (mix)',0.0),
+        'repetition_score custom (mix)': features_answer.get('repetition_score custom (mix)',0.0),
+        'pitch_std (mix)': features_answer.get('pitch_std (mix)',0.0),
+        'tempo_bpm_corrected (mix)': features_answer.get('tempo_bpm_corrected (mix)',0.0),
+        'tempo_bpm_original (mix)': features_answer.get('tempo_bpm_original (mix)',0.0),
+        'mfcc_mean_1 (mix)': features_answer.get('mfcc_mean_1 (mix)',0.0),
+        'mfcc_mean_2 (mix)': features_answer.get('mfcc_mean_2 (mix)',0.0),
+        'loudness_range_lu custom (mix)': features_answer.get('loudness_range_lu custom (mix)',0.0),
+        'zero_crossing_rate (mix)': features_answer.get('zero_crossing_rate (mix)',0.0),
+        'loudness_integrated_lufs custom (mix)': features_answer.get('loudness_integrated_lufs custom (mix)',0.0),
+        'energy_essentia (mix)': features_answer.get('energy_essentia (mix)',0.0),
+        'energy_librosa (mix)': features_answer.get('energy_librosa (mix)',0.0),
+        'rms_energy_mean (mix)': features_answer.get('rms_energy_mean (mix)',0.0),
+        'melody_complexity (vocals)': features_answer.get('melody_complexity (vocals)',0.0),
+    }
+    #print(features_answer.keys())
+    return dict_data
+def extract_the_db_data2(oid):
+    collection = collections.find_one({"_id": ObjectId(oid)})
+    features_answer = collection.get("features","")
+    dict_data = {
+        'melody_complexity (vocals)': features_answer.get('melody_complexity (vocals)',0.0),
+        'melody_range (vocals)': features_answer.get('melody_range (vocals)',0.0),
+        'melody_variability (vocals)': features_answer.get('melody_variability (vocals)',0.0),
+        'tempo_bpm_original (mix)': features_answer.get('tempo_bpm_original (mix)',0.0),
+        'danceability custom (mix)': features_answer.get('danceability custom (mix)',0.0),
+        'loudness_integrated_lufs custom (mix)': features_answer.get('loudness_integrated_lufs custom (mix)',0.0),
+        'loudness_range_lu custom (mix)': features_answer.get('loudness_range_lu custom (mix)',0.0),
+        'energy_librosa (mix)': features_answer.get('energy_librosa (mix)',0.0),
+        'energy_librosa_std (mix)': features_answer.get('energy_librosa_std (mix)',0.0),
+        'energy_essentia (mix)': features_answer.get('energy_essentia (mix)',0.0),
+        'energy_essentia_std (mix)': features_answer.get('energy_essentia_std (mix)',0.0),
+        'energy_combined (mix)': features_answer.get('energy_combined (mix)',0.0),
+        'spectral_centroid_mean custom (mix)': features_answer.get('spectral_centroid_mean custom (mix)',0.0),
+        'mfcc_mean_1 (mix)': features_answer.get('mfcc_mean_1 (mix)',0.0),
+        'mfcc_mean_2 (mix)': features_answer.get('mfcc_mean_2 (mix)',0.0),
+        'chroma_mean (mix)': features_answer.get('chroma_mean (mix)',0.0),
+        'spectral_contrast_mean (mix)': features_answer.get('spectral_contrast_mean (mix)',0.0),
+        'repetition_score custom (mix)': features_answer.get('repetition_score custom (mix)',0.0),
+        'pitch_mean (mix)': features_answer.get('pitch_mean (mix)',0.0),
+        'pitch_std (mix)': features_answer.get('pitch_std (mix)',0.0),
+        'rms_energy_mean (mix)': features_answer.get('rms_energy_mean (mix)',0.0),
+        'rms_energy_std (mix)': features_answer.get('rms_energy_std (mix)',0.0),
+        'zero_crossing_rate (mix)': features_answer.get('zero_crossing_rate (mix)',0.0),
+    }
+    print(dict_data)
+    return dict_data
+# answer = extract_the_db_data2("69cb624e4801e0963cda8568")
+# print(answer)

rocketship-ml-model-train/model_train.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import pandas as pd
+import numpy as np
+import ast
+import joblib
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import LabelEncoder
+from sklearn.multioutput import MultiOutputClassifier
+from sklearn.metrics import classification_report
+from collections import Counter
+from xgboost import XGBClassifier
+from sklearn.ensemble import RandomForestClassifier
+from imblearn.over_sampling import SMOTE
+pd.set_option("display.max_columns", None)
+df.to_csv("all_genres_clean.csv", index=False)
+df = pd.read_csv("all_genres_clean.csv",low_memory=False)
+drop_cols = ['track_url','name']
+df = df.drop(columns=drop_cols)
+X = df.drop(columns=["genre", "sub_genres"])
+y_genre = df["genre"]
+genre_encoder = LabelEncoder()
+y_genre_encoded = genre_encoder.fit_transform(y_genre)
+smote = SMOTE(random_state=42)
+X_resampled, y_resampled = smote.fit_resample(X, y_genre_encoded)
+counter = Counter(y_resampled)
+X_train, X_test, y_train, y_test = train_test_split(
+    X_resampled, y_resampled,
+    test_size=0.2,
+    random_state=42
+)
+genre_model = XGBClassifier(
+    n_estimators=1000,
+    max_depth=8,
+    learning_rate=0.05,
+    subsample=0.8,
+    colsample_bytree=0.8,
+    min_child_weight=5,
+    gamma=0.1,
+    reg_lambda=1,
+    tree_method="hist",
+    eval_metric="mlogloss"
+)
+genre_model.fit(X_train, y_train)
+y_pred = genre_model.predict(X_test)
+print(classification_report(y_test, y_pred))
+pipeline_data = {
+    "model": genre_model,
+    "label_encoder": genre_encoder,
+    "features": X.columns.tolist(),
+    "train_data": df
+}
+joblib.dump(pipeline_data, "genre_pipeline_v2.pkl")

rocketship-ml-model-train/prediction_.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import pandas as pd
+import os,joblib
+import numpy as np
+path = r"genre_pipeline_v2.pkl"
+pipeline = joblib.load(path)
+model = pipeline["model"]
+le = pipeline["label_encoder"]
+features = pipeline["features"]
+df_full = pipeline["train_data"]
+X_full = df_full[features]
+def predict_genre(sample):
+    # = X_full.iloc[-2].to_dict()
+    input_df = pd.DataFrame([sample])
+    for col in features:
+        if col not in input_df.columns:
+            input_df[col] = 0
+    input_df = input_df[features]
+    pred_encoded = model.predict(input_df)
+    prediction = le.inverse_transform(pred_encoded)[0]
+    filtered_df = df_full[df_full["genre"] == prediction].copy()
+    def find_best_match(input_row, df_subset):
+        X_subset = df_subset[features]
+        distances = np.linalg.norm(X_subset.values - input_row.values, axis=1)
+        best_idx = np.argmin(distances)
+        return df_subset.iloc[best_idx]
+    best_row = find_best_match(input_df.iloc[0], filtered_df)
+    final_output = pd.DataFrame([{
+        "predict_genre": prediction,
+        "genre_subgenre_list": best_row["sub_genres"]
+    }])
+    return final_output

rocketship-ml-model-train/train_ml_model.ipynb ADDED Viewed

	@@ -0,0 +1,1028 @@

+{
+ "cells": [
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2026-03-31T12:26:36.441430700Z",
+     "start_time": "2026-03-31T12:26:36.428176800Z"
+    }
+   },
+   "cell_type": "code",
+   "source": "#!pip install imbalanced-learn",
+   "id": "6061b6e1a4964764",
+   "outputs": [],
+   "execution_count": 50
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "## 1. IMPORTS",
+   "id": "80a50f1f4cac28af"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2026-03-31T15:50:53.238168500Z",
+     "start_time": "2026-03-31T15:50:53.191532300Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import ast\n",
+    "import joblib\n",
+    "\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.preprocessing import LabelEncoder\n",
+    "from sklearn.multioutput import MultiOutputClassifier\n",
+    "from sklearn.metrics import classification_report\n",
+    "\n",
+    "from xgboost import XGBClassifier\n",
+    "from sklearn.ensemble import RandomForestClassifier\n",
+    "from imblearn.over_sampling import SMOTE"
+   ],
+   "id": "a1a47a2f55d6d805",
+   "outputs": [],
+   "execution_count": 138
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "## 2. LOAD DATA",
+   "id": "eaf2ccbd5817b489"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2026-03-31T15:50:55.726659200Z",
+     "start_time": "2026-03-31T15:50:54.991904900Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "df = pd.read_csv(\"assets/all_genre.csv\")\n",
+    "df.shape"
+   ],
+   "id": "cd568b9bd5b06a09",
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(125169, 28)"
+      ]
+     },
+     "execution_count": 139,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "execution_count": 139
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2026-03-31T15:51:05.654993800Z",
+     "start_time": "2026-03-31T15:51:05.582282300Z"
+    }
+   },
+   "cell_type": "code",
+   "source": "df.tail(2)",
+   "id": "a1d0c8b7cde52c38",
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "                             _id     genre  \\\n",
+       "125167  698ebc778a361f5ae9bc3b9b  RnB Soul   \n",
+       "125168  6996dc6afd7fa1d0c64b9096  RnB Soul   \n",
+       "\n",
+       "                                           genre_subgenre  \\\n",
+       "125167  RnB Soul --- ['Contemporary RnB', 'Hip Hop Rap...   \n",
+       "125168                                    RnB Soul --- []   \n",
+       "\n",
+       "        trimmed_audio_duration_sec  syllable_count  word_count  \\\n",
+       "125167                  190.132381           534.0       431.0   \n",
+       "125168                  100.680000           148.0       122.0   \n",
+       "\n",
+       "        spectral_contrast_mean (mix)  chroma_mean (mix)  \\\n",
+       "125167                     19.721407           0.529660   \n",
+       "125168                     20.478757           0.498923   \n",
+       "\n",
+       "        melody_variability (vocals)  rhythm_onset_rate (mix)  ...  \\\n",
+       "125167                     0.640377                 3.994444  ...   \n",
+       "125168                     0.570758                 3.436631  ...   \n",
+       "\n",
+       "        zero_crossing_rate (mix)  vocab_richness  \\\n",
+       "125167                  0.087692           0.411   \n",
+       "125168                  0.075311           0.541   \n",
+       "\n",
+       "        loudness_integrated_lufs custom (mix)  readability_score  \\\n",
+       "125167                             -15.104306                1.5   \n",
+       "125168                             -15.867487               21.2   \n",
+       "\n",
+       "        energy_essentia (mix)  energy_librosa (mix)  rms_energy_mean (mix)  \\\n",
+       "125167               0.185114              0.360564               0.403058   \n",
+       "125168               0.280897              0.475058               0.475100   \n",
+       "\n",
+       "        sentiment_score  melody_complexity (vocals)  avg_word_length  \n",
+       "125167            0.002                    3.416667             3.77  \n",
+       "125168            0.036                    2.666667             3.52  \n",
+       "\n",
+       "[2 rows x 28 columns]"
+      ],
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>_id</th>\n",
+       "      <th>genre</th>\n",
+       "      <th>genre_subgenre</th>\n",
+       "      <th>trimmed_audio_duration_sec</th>\n",
+       "      <th>syllable_count</th>\n",
+       "      <th>word_count</th>\n",
+       "      <th>spectral_contrast_mean (mix)</th>\n",
+       "      <th>chroma_mean (mix)</th>\n",
+       "      <th>melody_variability (vocals)</th>\n",
+       "      <th>rhythm_onset_rate (mix)</th>\n",
+       "      <th>...</th>\n",
+       "      <th>zero_crossing_rate (mix)</th>\n",
+       "      <th>vocab_richness</th>\n",
+       "      <th>loudness_integrated_lufs custom (mix)</th>\n",
+       "      <th>readability_score</th>\n",
+       "      <th>energy_essentia (mix)</th>\n",
+       "      <th>energy_librosa (mix)</th>\n",
+       "      <th>rms_energy_mean (mix)</th>\n",
+       "      <th>sentiment_score</th>\n",
+       "      <th>melody_complexity (vocals)</th>\n",
+       "      <th>avg_word_length</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>125167</th>\n",
+       "      <td>698ebc778a361f5ae9bc3b9b</td>\n",
+       "      <td>RnB Soul</td>\n",
+       "      <td>RnB Soul --- ['Contemporary RnB', 'Hip Hop Rap...</td>\n",
+       "      <td>190.132381</td>\n",
+       "      <td>534.0</td>\n",
+       "      <td>431.0</td>\n",
+       "      <td>19.721407</td>\n",
+       "      <td>0.529660</td>\n",
+       "      <td>0.640377</td>\n",
+       "      <td>3.994444</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.087692</td>\n",
+       "      <td>0.411</td>\n",
+       "      <td>-15.104306</td>\n",
+       "      <td>1.5</td>\n",
+       "      <td>0.185114</td>\n",
+       "      <td>0.360564</td>\n",
+       "      <td>0.403058</td>\n",
+       "      <td>0.002</td>\n",
+       "      <td>3.416667</td>\n",
+       "      <td>3.77</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>125168</th>\n",
+       "      <td>6996dc6afd7fa1d0c64b9096</td>\n",
+       "      <td>RnB Soul</td>\n",
+       "      <td>RnB Soul --- []</td>\n",
+       "      <td>100.680000</td>\n",
+       "      <td>148.0</td>\n",
+       "      <td>122.0</td>\n",
+       "      <td>20.478757</td>\n",
+       "      <td>0.498923</td>\n",
+       "      <td>0.570758</td>\n",
+       "      <td>3.436631</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.075311</td>\n",
+       "      <td>0.541</td>\n",
+       "      <td>-15.867487</td>\n",
+       "      <td>21.2</td>\n",
+       "      <td>0.280897</td>\n",
+       "      <td>0.475058</td>\n",
+       "      <td>0.475100</td>\n",
+       "      <td>0.036</td>\n",
+       "      <td>2.666667</td>\n",
+       "      <td>3.52</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>2 rows × 28 columns</p>\n",
+       "</div>"
+      ]
+     },
+     "execution_count": 140,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "execution_count": 140
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "## 3. EXTRACT SUBGENRE LIST",
+   "id": "25e6116f88f4e7b5"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2026-03-31T15:32:35.481030700Z",
+     "start_time": "2026-03-31T15:32:33.709931Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "def extract_list(value):\n",
+    "    if pd.isna(value):\n",
+    "        return []\n",
+    "    try:\n",
+    "        parts = value.split('---')\n",
+    "        if len(parts) < 2:\n",
+    "            return []\n",
+    "        return ast.literal_eval(parts[1].strip())\n",
+    "    except:\n",
+    "        return []\n",
+    "\n",
+    "df[\"genre_subgenre_list\"] = df[\"genre_subgenre\"].apply(extract_list)\n",
+    "df.drop(columns=[\"genre_subgenre\"], inplace=True)"
+   ],
+   "id": "ceb7a07f3a0ef9c",
+   "outputs": [],
+   "execution_count": 137
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2026-03-31T14:48:22.250988500Z",
+     "start_time": "2026-03-31T14:48:22.187035700Z"
+    }
+   },
+   "cell_type": "code",
+   "source": "df.head(2)",
+   "id": "64c82db63a7d7cda",
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "                        _id        genre  trimmed_audio_duration_sec  \\\n",
+       "0  69143037d64595f86b812d77  Hip Hop Rap                  183.843991   \n",
+       "1  691448a64bef1dcbb1d3da1b  Hip Hop Rap                  160.786576   \n",
+       "\n",
+       "   syllable_count  word_count  spectral_contrast_mean (mix)  \\\n",
+       "0           355.0       298.0                     20.440557   \n",
+       "1           285.0       236.0                     19.657228   \n",
+       "\n",
+       "   chroma_mean (mix)  melody_variability (vocals)  rhythm_onset_rate (mix)  \\\n",
+       "0           0.464204                     0.434021                 3.250000   \n",
+       "1           0.424237                     0.351001                 2.425576   \n",
+       "\n",
+       "   spectral_centroid_mean custom (mix)  ...  vocab_richness  \\\n",
+       "0                             0.132002  ...           0.305   \n",
+       "1                             0.139757  ...           0.352   \n",
+       "\n",
+       "   loudness_integrated_lufs custom (mix)  readability_score  \\\n",
+       "0                             -12.104649               20.9   \n",
+       "1                             -11.814197               89.4   \n",
+       "\n",
+       "   energy_essentia (mix)  energy_librosa (mix)  rms_energy_mean (mix)  \\\n",
+       "0               0.349783              0.576250               0.578454   \n",
+       "1               0.429792              0.619931               0.632507   \n",
+       "\n",
+       "   sentiment_score  melody_complexity (vocals)  avg_word_length  \\\n",
+       "0            0.116                    2.250000             3.76   \n",
+       "1            0.173                    1.916667             3.50   \n",
+       "\n",
+       "                                 genre_subgenre_list  \n",
+       "0  [Alternative Hip Hop, Boom Bap, Contemporary H...  \n",
+       "1     [Americana, Contemporary Country, Bro Country]  \n",
+       "\n",
+       "[2 rows x 28 columns]"
+      ],
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>_id</th>\n",
+       "      <th>genre</th>\n",
+       "      <th>trimmed_audio_duration_sec</th>\n",
+       "      <th>syllable_count</th>\n",
+       "      <th>word_count</th>\n",
+       "      <th>spectral_contrast_mean (mix)</th>\n",
+       "      <th>chroma_mean (mix)</th>\n",
+       "      <th>melody_variability (vocals)</th>\n",
+       "      <th>rhythm_onset_rate (mix)</th>\n",
+       "      <th>spectral_centroid_mean custom (mix)</th>\n",
+       "      <th>...</th>\n",
+       "      <th>vocab_richness</th>\n",
+       "      <th>loudness_integrated_lufs custom (mix)</th>\n",
+       "      <th>readability_score</th>\n",
+       "      <th>energy_essentia (mix)</th>\n",
+       "      <th>energy_librosa (mix)</th>\n",
+       "      <th>rms_energy_mean (mix)</th>\n",
+       "      <th>sentiment_score</th>\n",
+       "      <th>melody_complexity (vocals)</th>\n",
+       "      <th>avg_word_length</th>\n",
+       "      <th>genre_subgenre_list</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>69143037d64595f86b812d77</td>\n",
+       "      <td>Hip Hop Rap</td>\n",
+       "      <td>183.843991</td>\n",
+       "      <td>355.0</td>\n",
+       "      <td>298.0</td>\n",
+       "      <td>20.440557</td>\n",
+       "      <td>0.464204</td>\n",
+       "      <td>0.434021</td>\n",
+       "      <td>3.250000</td>\n",
+       "      <td>0.132002</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.305</td>\n",
+       "      <td>-12.104649</td>\n",
+       "      <td>20.9</td>\n",
+       "      <td>0.349783</td>\n",
+       "      <td>0.576250</td>\n",
+       "      <td>0.578454</td>\n",
+       "      <td>0.116</td>\n",
+       "      <td>2.250000</td>\n",
+       "      <td>3.76</td>\n",
+       "      <td>[Alternative Hip Hop, Boom Bap, Contemporary H...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>691448a64bef1dcbb1d3da1b</td>\n",
+       "      <td>Hip Hop Rap</td>\n",
+       "      <td>160.786576</td>\n",
+       "      <td>285.0</td>\n",
+       "      <td>236.0</td>\n",
+       "      <td>19.657228</td>\n",
+       "      <td>0.424237</td>\n",
+       "      <td>0.351001</td>\n",
+       "      <td>2.425576</td>\n",
+       "      <td>0.139757</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.352</td>\n",
+       "      <td>-11.814197</td>\n",
+       "      <td>89.4</td>\n",
+       "      <td>0.429792</td>\n",
+       "      <td>0.619931</td>\n",
+       "      <td>0.632507</td>\n",
+       "      <td>0.173</td>\n",
+       "      <td>1.916667</td>\n",
+       "      <td>3.50</td>\n",
+       "      <td>[Americana, Contemporary Country, Bro Country]</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>2 rows × 28 columns</p>\n",
+       "</div>"
+      ]
+     },
+     "execution_count": 100,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "execution_count": 100
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "## 4. CLEAN FEATURES",
+   "id": "7a0ce7b0f4ea696a"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2026-03-31T14:29:37.586057300Z",
+     "start_time": "2026-03-31T14:29:37.559184400Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "drop_cols = ['_id','trimmed_audio_duration_sec', 'syllable_count', 'word_count',\n",
+    "             'sentiment_score', 'avg_word_length', 'avg_word_length', 'readability_score','vocab_richness']\n",
+    "\n",
+    "\n",
+    "df = df.drop(columns=drop_cols)"
+   ],
+   "id": "10232ce5c7e02ebe",
+   "outputs": [],
+   "execution_count": 74
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "## 5. SPLIT FEATURES",
+   "id": "e501c73d9a21d15b"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2026-03-31T14:29:39.435021600Z",
+     "start_time": "2026-03-31T14:29:39.364438700Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "X = df.drop(columns=[\"genre\", \"genre_subgenre_list\"])\n",
+    "y_genre = df[\"genre\"]"
+   ],
+   "id": "46943cd9570c5ef1",
+   "outputs": [],
+   "execution_count": 75
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "## 6. ENCODE GENRE",
+   "id": "9fe48aab3a37b04a"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2026-03-31T14:29:40.867349200Z",
+     "start_time": "2026-03-31T14:29:40.820224600Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "genre_encoder = LabelEncoder()\n",
+    "y_genre_encoded = genre_encoder.fit_transform(y_genre)"
+   ],
+   "id": "9cfcd4b98c05576b",
+   "outputs": [],
+   "execution_count": 76
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "## 7. BALANCE DATA (SMOTE)",
+   "id": "c37f54cc88a20873"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2026-03-31T14:29:43.867990400Z",
+     "start_time": "2026-03-31T14:29:42.314962900Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "smote = SMOTE(random_state=42)\n",
+    "X_resampled, y_resampled = smote.fit_resample(X, y_genre_encoded)"
+   ],
+   "id": "4058a4ecea99d77",
+   "outputs": [],
+   "execution_count": 77
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "## 8. TRAIN GENRE MODEL",
+   "id": "383f3a8e280c7d78"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2026-03-31T14:33:26.436070600Z",
+     "start_time": "2026-03-31T14:29:48.404312300Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "X_train, X_test, y_train, y_test = train_test_split(\n",
+    "    X_resampled, y_resampled,\n",
+    "    test_size=0.2,\n",
+    "    random_state=42\n",
+    ")\n",
+    "\n",
+    "genre_model = XGBClassifier(\n",
+    "    n_estimators=1000,\n",
+    "    max_depth=8,\n",
+    "    learning_rate=0.05,\n",
+    "    subsample=0.8,\n",
+    "    colsample_bytree=0.8,\n",
+    "    min_child_weight=5,\n",
+    "    gamma=0.1,\n",
+    "    reg_lambda=1,\n",
+    "    tree_method=\"hist\",\n",
+    "    eval_metric=\"mlogloss\"\n",
+    ")\n",
+    "\n",
+    "genre_model.fit(X_train, y_train)\n",
+    "y_pred = genre_model.predict(X_test)\n",
+    "print(classification_report(y_test, y_pred))"
+   ],
+   "id": "6669f4cb87c22b0d",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "           0       0.72      0.66      0.69      5785\n",
+      "           1       0.85      0.84      0.85      5860\n",
+      "           2       0.71      0.71      0.71      5766\n",
+      "           3       0.69      0.73      0.71      5956\n",
+      "           4       0.93      0.98      0.95      5752\n",
+      "           5       0.68      0.75      0.71      5824\n",
+      "           6       1.00      1.00      1.00      5865\n",
+      "           7       0.51      0.41      0.45      5788\n",
+      "           8       0.98      1.00      0.99      5804\n",
+      "           9       0.56      0.56      0.56      5856\n",
+      "          10       0.56      0.60      0.58      5775\n",
+      "\n",
+      "    accuracy                           0.75     64031\n",
+      "   macro avg       0.74      0.75      0.75     64031\n",
+      "weighted avg       0.74      0.75      0.75     64031\n",
+      "\n"
+     ]
+    }
+   ],
+   "execution_count": 78
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2026-03-31T14:35:17.020787300Z",
+     "start_time": "2026-03-31T14:35:08.071408100Z"
+    }
+   },
+   "cell_type": "code",
+   "source": "genre_model.score(X_train, y_train)",
+   "id": "118a434daae8c565",
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.9226663647295841"
+      ]
+     },
+     "execution_count": 80,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "execution_count": 80
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2026-03-31T14:35:22.584004600Z",
+     "start_time": "2026-03-31T14:35:20.164379Z"
+    }
+   },
+   "cell_type": "code",
+   "source": "genre_model.score(X_test, y_test)",
+   "id": "d0cc6a38f09dfaf3",
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.7481063859692961"
+      ]
+     },
+     "execution_count": 81,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "execution_count": 81
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "## 9. SAVE MODEL",
+   "id": "44d62a34d7c97a4d"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2026-03-31T15:25:11.541174Z",
+     "start_time": "2026-03-31T15:25:10.667061500Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "pipeline_data = {\n",
+    "    \"model\": genre_model,\n",
+    "    \"label_encoder\": genre_encoder,\n",
+    "    \"features\": X.columns.tolist(),\n",
+    "    \"train_data\": df\n",
+    "}\n",
+    "joblib.dump(pipeline_data, \"genre_pipeline.pkl\")"
+   ],
+   "id": "99a419eddad21a44",
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['genre_pipeline.pkl']"
+      ]
+     },
+     "execution_count": 126,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "execution_count": 126
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "## 10. LOAD + PREDICT (Using same .pkl)",
+   "id": "41cb3fb66288af9e"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2026-03-31T15:28:15.462539200Z",
+     "start_time": "2026-03-31T15:28:14.847279900Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "pipeline = joblib.load(\"genre_pipeline.pkl\")\n",
+    "\n",
+    "model = pipeline[\"model\"]\n",
+    "le = pipeline[\"label_encoder\"]\n",
+    "features = pipeline[\"features\"]\n",
+    "df_full = pipeline[\"train_data\"]\n",
+    "\n",
+    "# Input sample\n",
+    "sample = X.iloc[92229].to_dict()\n",
+    "input_df = pd.DataFrame([sample])\n",
+    "\n",
+    "for col in features:\n",
+    "    if col not in input_df.columns:\n",
+    "        input_df[col] = 0\n",
+    "\n",
+    "input_df = input_df[features]\n",
+    "pred_encoded = model.predict(input_df)\n",
+    "prediction = le.inverse_transform(pred_encoded)[0]\n",
+    "filtered_df = df_full[df_full[\"genre\"] == prediction].copy()\n",
+    "def find_best_match(input_row, df_subset):\n",
+    "    X_subset = df_subset[features]\n",
+    "    distances = np.linalg.norm(X_subset.values - input_row.values, axis=1)\n",
+    "    best_idx = np.argmin(distances)\n",
+    "    return df_subset.iloc[best_idx]\n",
+    "best_row = find_best_match(input_df.iloc[0], filtered_df)\n",
+    "final_output = pd.DataFrame([{\n",
+    "    \"predict_genre\": prediction,\n",
+    "    \"genre_subgenre_list\": best_row[\"genre_subgenre_list\"]\n",
+    "}])\n",
+    "\n",
+    "print(final_output)"
+   ],
+   "id": "c4fc96bfd6a8636e",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  predict_genre genre_subgenre_list\n",
+      "0           EDM                  []\n"
+     ]
+    }
+   ],
+   "execution_count": 133
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2026-03-31T15:28:18.801970900Z",
+     "start_time": "2026-03-31T15:28:18.721374900Z"
+    }
+   },
+   "cell_type": "code",
+   "source": "print(best_row[\"genre_subgenre_list\"])",
+   "id": "800aba339a69aa9c",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[]\n"
+     ]
+    }
+   ],
+   "execution_count": 134
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2026-03-31T15:27:37.723854800Z",
+     "start_time": "2026-03-31T15:27:37.661366500Z"
+    }
+   },
+   "cell_type": "code",
+   "source": "df.sample(5)",
+   "id": "1a304b17152705c9",
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "                             _id     genre  trimmed_audio_duration_sec  \\\n",
+       "86885   6996acb01af4c88ae2d95a39       EDM                  190.320000   \n",
+       "92229   699c338e0b2a57db2a93724f       EDM                  190.792290   \n",
+       "123297  697c327515cf3a48da484dff  RnB Soul                  138.646349   \n",
+       "44658   68fa1e2f4677fa714c26e8ae      Rock                  212.214240   \n",
+       "86793   6996a38b7c4d735613b230a3       EDM                  239.879977   \n",
+       "\n",
+       "        syllable_count  word_count  spectral_contrast_mean (mix)  \\\n",
+       "86885            372.0       334.0                     20.215855   \n",
+       "92229            194.0       161.0                     19.167682   \n",
+       "123297           264.0       219.0                     18.698643   \n",
+       "44658            109.0        85.0                     20.150875   \n",
+       "86793            300.0       234.0                     18.801814   \n",
+       "\n",
+       "        chroma_mean (mix)  melody_variability (vocals)  \\\n",
+       "86885            0.411808                     0.584400   \n",
+       "92229            0.527524                     0.401779   \n",
+       "123297           0.573993                     0.438452   \n",
+       "44658            0.455458                     0.420922   \n",
+       "86793            0.507734                     0.410575   \n",
+       "\n",
+       "        rhythm_onset_rate (mix)  spectral_centroid_mean custom (mix)  ...  \\\n",
+       "86885                  3.905556                             0.139566  ...   \n",
+       "92229                  4.311111                             0.129095  ...   \n",
+       "123297                 5.907116                             0.111474  ...   \n",
+       "44658                  4.944444                             0.160074  ...   \n",
+       "86793                  1.938889                             0.169642  ...   \n",
+       "\n",
+       "        vocab_richness  loudness_integrated_lufs custom (mix)  \\\n",
+       "86885            0.210                             -13.503495   \n",
+       "92229            0.410                             -15.650211   \n",
+       "123297           0.363                             -12.241549   \n",
+       "44658            0.612                             -12.745431   \n",
+       "86793            0.303                             -11.677942   \n",
+       "\n",
+       "        readability_score  energy_essentia (mix)  energy_librosa (mix)  \\\n",
+       "86885                 5.7               0.298788              0.496576   \n",
+       "92229                61.4               0.197921              0.394782   \n",
+       "123297               23.1               0.342908              0.552086   \n",
+       "44658                 6.9               0.433083              0.633059   \n",
+       "86793                29.0               0.440966              0.612347   \n",
+       "\n",
+       "        rms_energy_mean (mix)  sentiment_score  melody_complexity (vocals)  \\\n",
+       "86885                0.513011            0.039                    2.916667   \n",
+       "92229                0.395509           -0.052                    2.333333   \n",
+       "123297               0.559722            0.083                    0.416667   \n",
+       "44658                0.636330           -0.197                    2.666667   \n",
+       "86793                0.661424           -0.040                    2.333333   \n",
+       "\n",
+       "        avg_word_length                                genre_subgenre_list  \n",
+       "86885              3.08                      [Indie Electronic, Indie Pop]  \n",
+       "92229              3.83                                                 []  \n",
+       "123297             3.58                     [Funk, Soul, Neo Soul, Motown]  \n",
+       "44658              3.92  [Classic Alternative Rock, Modern Alternative ...  \n",
+       "86793              3.64                                                 []  \n",
+       "\n",
+       "[5 rows x 28 columns]"
+      ],
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>_id</th>\n",
+       "      <th>genre</th>\n",
+       "      <th>trimmed_audio_duration_sec</th>\n",
+       "      <th>syllable_count</th>\n",
+       "      <th>word_count</th>\n",
+       "      <th>spectral_contrast_mean (mix)</th>\n",
+       "      <th>chroma_mean (mix)</th>\n",
+       "      <th>melody_variability (vocals)</th>\n",
+       "      <th>rhythm_onset_rate (mix)</th>\n",
+       "      <th>spectral_centroid_mean custom (mix)</th>\n",
+       "      <th>...</th>\n",
+       "      <th>vocab_richness</th>\n",
+       "      <th>loudness_integrated_lufs custom (mix)</th>\n",
+       "      <th>readability_score</th>\n",
+       "      <th>energy_essentia (mix)</th>\n",
+       "      <th>energy_librosa (mix)</th>\n",
+       "      <th>rms_energy_mean (mix)</th>\n",
+       "      <th>sentiment_score</th>\n",
+       "      <th>melody_complexity (vocals)</th>\n",
+       "      <th>avg_word_length</th>\n",
+       "      <th>genre_subgenre_list</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>86885</th>\n",
+       "      <td>6996acb01af4c88ae2d95a39</td>\n",
+       "      <td>EDM</td>\n",
+       "      <td>190.320000</td>\n",
+       "      <td>372.0</td>\n",
+       "      <td>334.0</td>\n",
+       "      <td>20.215855</td>\n",
+       "      <td>0.411808</td>\n",
+       "      <td>0.584400</td>\n",
+       "      <td>3.905556</td>\n",
+       "      <td>0.139566</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.210</td>\n",
+       "      <td>-13.503495</td>\n",
+       "      <td>5.7</td>\n",
+       "      <td>0.298788</td>\n",
+       "      <td>0.496576</td>\n",
+       "      <td>0.513011</td>\n",
+       "      <td>0.039</td>\n",
+       "      <td>2.916667</td>\n",
+       "      <td>3.08</td>\n",
+       "      <td>[Indie Electronic, Indie Pop]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>92229</th>\n",
+       "      <td>699c338e0b2a57db2a93724f</td>\n",
+       "      <td>EDM</td>\n",
+       "      <td>190.792290</td>\n",
+       "      <td>194.0</td>\n",
+       "      <td>161.0</td>\n",
+       "      <td>19.167682</td>\n",
+       "      <td>0.527524</td>\n",
+       "      <td>0.401779</td>\n",
+       "      <td>4.311111</td>\n",
+       "      <td>0.129095</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.410</td>\n",
+       "      <td>-15.650211</td>\n",
+       "      <td>61.4</td>\n",
+       "      <td>0.197921</td>\n",
+       "      <td>0.394782</td>\n",
+       "      <td>0.395509</td>\n",
+       "      <td>-0.052</td>\n",
+       "      <td>2.333333</td>\n",
+       "      <td>3.83</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>123297</th>\n",
+       "      <td>697c327515cf3a48da484dff</td>\n",
+       "      <td>RnB Soul</td>\n",
+       "      <td>138.646349</td>\n",
+       "      <td>264.0</td>\n",
+       "      <td>219.0</td>\n",
+       "      <td>18.698643</td>\n",
+       "      <td>0.573993</td>\n",
+       "      <td>0.438452</td>\n",
+       "      <td>5.907116</td>\n",
+       "      <td>0.111474</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.363</td>\n",
+       "      <td>-12.241549</td>\n",
+       "      <td>23.1</td>\n",
+       "      <td>0.342908</td>\n",
+       "      <td>0.552086</td>\n",
+       "      <td>0.559722</td>\n",
+       "      <td>0.083</td>\n",
+       "      <td>0.416667</td>\n",
+       "      <td>3.58</td>\n",
+       "      <td>[Funk, Soul, Neo Soul, Motown]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>44658</th>\n",
+       "      <td>68fa1e2f4677fa714c26e8ae</td>\n",
+       "      <td>Rock</td>\n",
+       "      <td>212.214240</td>\n",
+       "      <td>109.0</td>\n",
+       "      <td>85.0</td>\n",
+       "      <td>20.150875</td>\n",
+       "      <td>0.455458</td>\n",
+       "      <td>0.420922</td>\n",
+       "      <td>4.944444</td>\n",
+       "      <td>0.160074</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.612</td>\n",
+       "      <td>-12.745431</td>\n",
+       "      <td>6.9</td>\n",
+       "      <td>0.433083</td>\n",
+       "      <td>0.633059</td>\n",
+       "      <td>0.636330</td>\n",
+       "      <td>-0.197</td>\n",
+       "      <td>2.666667</td>\n",
+       "      <td>3.92</td>\n",
+       "      <td>[Classic Alternative Rock, Modern Alternative ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>86793</th>\n",
+       "      <td>6996a38b7c4d735613b230a3</td>\n",
+       "      <td>EDM</td>\n",
+       "      <td>239.879977</td>\n",
+       "      <td>300.0</td>\n",
+       "      <td>234.0</td>\n",
+       "      <td>18.801814</td>\n",
+       "      <td>0.507734</td>\n",
+       "      <td>0.410575</td>\n",
+       "      <td>1.938889</td>\n",
+       "      <td>0.169642</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.303</td>\n",
+       "      <td>-11.677942</td>\n",
+       "      <td>29.0</td>\n",
+       "      <td>0.440966</td>\n",
+       "      <td>0.612347</td>\n",
+       "      <td>0.661424</td>\n",
+       "      <td>-0.040</td>\n",
+       "      <td>2.333333</td>\n",
+       "      <td>3.64</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 28 columns</p>\n",
+       "</div>"
+      ]
+     },
+     "execution_count": 132,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "execution_count": 132
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "outputs": [],
+   "execution_count": null,
+   "source": "",
+   "id": "d4e8ce5000dcff26"
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

rocketship-ml-model-train/train_ml_model_1.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff