Upload 2 files

Browse files

Files changed (3) hide show

.gitattributes +1 -0
SentimentTensor.ipynb +412 -0
sentitensor1.keras +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+sentitensor1.keras filter=lfs diff=lfs merge=lfs -text

SentimentTensor.ipynb ADDED Viewed

	@@ -0,0 +1,412 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7e59ad5c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "import random\n",
+    "\n",
+    "# Define the path to the full Yelp dataset file\n",
+    "full_data_path = \"yelp_academic_dataset_review.json\"\n",
+    "\n",
+    "# Define the path to save the sampled dataset file\n",
+    "sampled_data_path = \"yelp_academic_dataset_review_sampled.json\"\n",
+    "\n",
+    "# Define the number of reviews to sample (adjust as needed)\n",
+    "num_reviews_to_sample = 10000  # Example: Sample 10,000 reviews\n",
+    "\n",
+    "# Load all reviews from the full dataset\n",
+    "all_reviews = []\n",
+    "with open(full_data_path, \"r\", encoding=\"utf-8\") as f:\n",
+    "    for line in f:\n",
+    "        review = json.loads(line)\n",
+    "        all_reviews.append(review)\n",
+    "\n",
+    "# Randomly sample a subset of reviews\n",
+    "sampled_reviews = random.sample(all_reviews, num_reviews_to_sample)\n",
+    "\n",
+    "# Save the sampled reviews to a new JSON file\n",
+    "with open(sampled_data_path, \"w\", encoding=\"utf-8\") as f:\n",
+    "    for review in sampled_reviews:\n",
+    "        json.dump(review, f)\n",
+    "        f.write(\"\\n\")\n",
+    "\n",
+    "print(f\"Sampled {num_reviews_to_sample} reviews and saved to {sampled_data_path}\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f562ff04",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import gzip\n",
+    "\n",
+    "# Define the path to save the compressed dataset file\n",
+    "compressed_data_path = \"yelp_academic_dataset_review_sampled.json.gz\"\n",
+    "\n",
+    "# Compress the sampled dataset file using gzip\n",
+    "with open(sampled_data_path, \"rb\") as f_in:\n",
+    "    with gzip.open(compressed_data_path, \"wb\") as f_out:\n",
+    "        f_out.writelines(f_in)\n",
+    "\n",
+    "print(f\"Compressed file saved to {compressed_data_path}\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "337f6649",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.linear_model import LogisticRegression\n",
+    "from sklearn.metrics import classification_report, accuracy_score\n",
+    "\n",
+    "# Load the preprocessed Yelp dataset (sampled and compressed if applicable)\n",
+    "data_path = \"yelp_academic_dataset_review_sampled.json.gz\"  # Adjust the path\n",
+    "data = pd.read_json(data_path, lines=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "e0936968",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>review_id</th>\n",
+       "      <th>user_id</th>\n",
+       "      <th>business_id</th>\n",
+       "      <th>stars</th>\n",
+       "      <th>useful</th>\n",
+       "      <th>funny</th>\n",
+       "      <th>cool</th>\n",
+       "      <th>text</th>\n",
+       "      <th>date</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>f9khuhJxadQhg6CaI1cRdA</td>\n",
+       "      <td>4Qijwb2RDiUGc4SBjA2lJg</td>\n",
+       "      <td>nTBStZYJfHGdSZJbpaBiPA</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>I had read about this place adding a second lo...</td>\n",
+       "      <td>2011-02-08 17:48:40</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>WH0c1wEMu4XRTIysI7uMig</td>\n",
+       "      <td>7JeW4Mlvqdp7R-FAUBB_vA</td>\n",
+       "      <td>H3Tmgv94pbGvBIKZ4Rs9Cw</td>\n",
+       "      <td>5</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>I had dinner at Tin Angel on Saturday and was ...</td>\n",
+       "      <td>2012-04-16 13:30:02</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>S1Lg07IGrupUDk7Uu9rnQQ</td>\n",
+       "      <td>umUy5DTpVrvQDXLR4gywHA</td>\n",
+       "      <td>H7BikysfQbS9bMULQsCU_Q</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>I was really excited to visit the store, havin...</td>\n",
+       "      <td>2019-10-05 00:17:15</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>AH4_Pua0yzK4oU9FoU8hXQ</td>\n",
+       "      <td>uwYw0KKj16lC_nq_HsQGVQ</td>\n",
+       "      <td>Xb6QfBbleg2aJT2cG807jQ</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>I hired Two Men and a Truck for my recent move...</td>\n",
+       "      <td>2016-06-02 13:27:24</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>9_CIDS98p6ZsTRiCvmuIKA</td>\n",
+       "      <td>l9bVKgzvjjcU8Iang3Tvtg</td>\n",
+       "      <td>lqSJkyNSE1yPeux4PoR-pg</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>i was very disappointed to this company. They ...</td>\n",
+       "      <td>2020-06-05 22:28:47</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9995</th>\n",
+       "      <td>5MknizHCBH3jpj5DJd-6Uw</td>\n",
+       "      <td>d2VrfngFJ1f1nvNAsojJzw</td>\n",
+       "      <td>hy-E7DdXbdgTbwphKUYW1w</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>This was such a trash experience. We signed up...</td>\n",
+       "      <td>2021-07-29 16:10:10</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9996</th>\n",
+       "      <td>mXFlaWuiCnyCkZ_SIAGqew</td>\n",
+       "      <td>cHWDGVf4LofBk9wZ2mnXQQ</td>\n",
+       "      <td>AYWSFv6QxF5IjQSxITMUug</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>I have been going to Goshen Nail Salon for the...</td>\n",
+       "      <td>2018-03-16 00:30:50</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9997</th>\n",
+       "      <td>W1Ij-zC3ufRU5MTEgHLjmg</td>\n",
+       "      <td>aN9nWudz5rfar7rHr9lHfA</td>\n",
+       "      <td>oyJ3gXNkV0DO0YxcaTgtTg</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>Ok. This place surprised me. I always thought ...</td>\n",
+       "      <td>2018-06-01 23:56:44</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9998</th>\n",
+       "      <td>HNejB5H9iD1qe3MMKxg6sg</td>\n",
+       "      <td>6JejVLZl5M-IB3UkNTkXtQ</td>\n",
+       "      <td>WJLKQTduGumxjlXelqiuKg</td>\n",
+       "      <td>3</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>Meets expectations, but quirky.  The trucks re...</td>\n",
+       "      <td>2016-06-29 15:57:34</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9999</th>\n",
+       "      <td>LSJGzHJ7whqNn5uPxidMjQ</td>\n",
+       "      <td>_Av1LaAAY0Y8YcPp7Ck7fg</td>\n",
+       "      <td>M983OPfVRnwvG7zEOzykCA</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>Jordan was our waiter. He was very attentive a...</td>\n",
+       "      <td>2017-03-15 23:54:07</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>10000 rows × 9 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                   review_id                 user_id             business_id  \\\n",
+       "0     f9khuhJxadQhg6CaI1cRdA  4Qijwb2RDiUGc4SBjA2lJg  nTBStZYJfHGdSZJbpaBiPA   \n",
+       "1     WH0c1wEMu4XRTIysI7uMig  7JeW4Mlvqdp7R-FAUBB_vA  H3Tmgv94pbGvBIKZ4Rs9Cw   \n",
+       "2     S1Lg07IGrupUDk7Uu9rnQQ  umUy5DTpVrvQDXLR4gywHA  H7BikysfQbS9bMULQsCU_Q   \n",
+       "3     AH4_Pua0yzK4oU9FoU8hXQ  uwYw0KKj16lC_nq_HsQGVQ  Xb6QfBbleg2aJT2cG807jQ   \n",
+       "4     9_CIDS98p6ZsTRiCvmuIKA  l9bVKgzvjjcU8Iang3Tvtg  lqSJkyNSE1yPeux4PoR-pg   \n",
+       "...                      ...                     ...                     ...   \n",
+       "9995  5MknizHCBH3jpj5DJd-6Uw  d2VrfngFJ1f1nvNAsojJzw  hy-E7DdXbdgTbwphKUYW1w   \n",
+       "9996  mXFlaWuiCnyCkZ_SIAGqew  cHWDGVf4LofBk9wZ2mnXQQ  AYWSFv6QxF5IjQSxITMUug   \n",
+       "9997  W1Ij-zC3ufRU5MTEgHLjmg  aN9nWudz5rfar7rHr9lHfA  oyJ3gXNkV0DO0YxcaTgtTg   \n",
+       "9998  HNejB5H9iD1qe3MMKxg6sg  6JejVLZl5M-IB3UkNTkXtQ  WJLKQTduGumxjlXelqiuKg   \n",
+       "9999  LSJGzHJ7whqNn5uPxidMjQ  _Av1LaAAY0Y8YcPp7Ck7fg  M983OPfVRnwvG7zEOzykCA   \n",
+       "\n",
+       "      stars  useful  funny  cool  \\\n",
+       "0         4       1      0     1   \n",
+       "1         5       1      0     1   \n",
+       "2         2       4      1     0   \n",
+       "3         1       1      0     0   \n",
+       "4         1       0      0     0   \n",
+       "...     ...     ...    ...   ...   \n",
+       "9995      1       1      0     0   \n",
+       "9996      5       0      0     0   \n",
+       "9997      5       0      0     0   \n",
+       "9998      3       0      0     0   \n",
+       "9999      5       0      0     0   \n",
+       "\n",
+       "                                                   text                date  \n",
+       "0     I had read about this place adding a second lo... 2011-02-08 17:48:40  \n",
+       "1     I had dinner at Tin Angel on Saturday and was ... 2012-04-16 13:30:02  \n",
+       "2     I was really excited to visit the store, havin... 2019-10-05 00:17:15  \n",
+       "3     I hired Two Men and a Truck for my recent move... 2016-06-02 13:27:24  \n",
+       "4     i was very disappointed to this company. They ... 2020-06-05 22:28:47  \n",
+       "...                                                 ...                 ...  \n",
+       "9995  This was such a trash experience. We signed up... 2021-07-29 16:10:10  \n",
+       "9996  I have been going to Goshen Nail Salon for the... 2018-03-16 00:30:50  \n",
+       "9997  Ok. This place surprised me. I always thought ... 2018-06-01 23:56:44  \n",
+       "9998  Meets expectations, but quirky.  The trucks re... 2016-06-29 15:57:34  \n",
+       "9999  Jordan was our waiter. He was very attentive a... 2017-03-15 23:54:07  \n",
+       "\n",
+       "[10000 rows x 9 columns]"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "466ef010",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Map stars to sentiment labels\n",
+    "def map_sentiment(stars):\n",
+    "    if stars >= 4:\n",
+    "        return \"positive\"\n",
+    "    elif stars <= 2:\n",
+    "        return \"negative\"\n",
+    "    else:\n",
+    "        return \"neutral\"  # Optional: Handle neutral sentiment if needed\n",
+    "\n",
+    "# Apply sentiment mapping to stars\n",
+    "data['sentiment'] = data['stars'].apply(map_sentiment)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "756b3285",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Apply sentiment mapping to stars\n",
+    "data['sentiment'] = data['stars'].apply(map_sentiment)\n",
+    "\n",
+    "# Split the data into training and testing sets\n",
+    "train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)\n",
+    "\n",
+    "# Save the preprocessed data\n",
+    "train_data.to_csv(\"preprocessed_train_data.csv\", index=False)\n",
+    "test_data.to_csv(\"preprocessed_test_data.csv\", index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7257dd9d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pip install torch\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f03a2ad5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pip install transformers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ecdcf9c9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import torch\n",
+    "from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments\n",
+    "from sklearn.metrics import accuracy_score, precision_recall_fscore_support\n",
+    "\n",
+    "# Load the preprocessed training and testing data\n",
+    "train_data = pd.read_csv(\"preprocessed_train_data.csv\")  # Adjust the path\n",
+    "test_data = pd.read_csv(\"preprocessed_test_data.csv\")  # Adjust the path\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c83718d7",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

sentitensor1.keras ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ad5474595c05edd8d3ff4aa18749c0c6accb27f2402ab53c4fc35ff3d0286995
+size 29927801