{ "cells": [ { "cell_type": "markdown", "id": "0327aa17", "metadata": {}, "source": [ "# Data Preprocessing for NLP Project\n", "\n", "This notebook contains the data preprocessing steps for our NLP project." ] }, { "cell_type": "code", "execution_count": null, "id": "a4c43313", "metadata": {}, "outputs": [], "source": [ "# Import necessary libraries\n", "import pandas as pd\n", "import numpy as np\n", "import nltk\n", "from sklearn.model_selection import train_test_split\n", "import re\n", "import string" ] }, { "cell_type": "markdown", "id": "1d4c2a2b", "metadata": {}, "source": [ "## Load Raw Data\n", "\n", "Load the raw data from the data/raw directory." ] }, { "cell_type": "code", "execution_count": null, "id": "9b2784d5", "metadata": {}, "outputs": [], "source": [ "# Load your raw data here\n", "df = pd.read_csv('../data/raw/your_dataset.csv')\n", "print(df.head())" ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "name": "python", "version": "3.13.5" } }, "nbformat": 4, "nbformat_minor": 5 }