{ "cells": [ { "cell_type": "code", "id": "initial_id", "metadata": { "collapsed": true, "ExecuteTime": { "end_time": "2026-03-21T05:23:49.442453Z", "start_time": "2026-03-21T05:23:49.438223Z" } }, "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "import nltk\n", "import re\n", "from nltk.sentiment.vader import SentimentIntensityAnalyzer\n", "from transformers import pipeline\n", "from tqdm import tqdm\n", "# 初始化 pandas 的进度条支持\n", "tqdm.pandas(desc=\"正在进行情感分析推理\")\n", "\n", "# 下载 NLTK 依赖包\n", "nltk.download('vader_lexicon')\n", "nltk.download('punkt')\n", "nltk.download('punkt_tab')\n", "\n", "# 设置 seaborn 的绘图风格\n", "sns.set_theme(style=\"whitegrid\")" ], "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[nltk_data] Downloading package vader_lexicon to\n", "[nltk_data] C:\\Users\\17164/nltk_data...\n", "[nltk_data] Package vader_lexicon is already up-to-date!\n", "[nltk_data] Downloading package punkt to C:\\Users\\17164/nltk_data...\n", "[nltk_data] Package punkt is already up-to-date!\n", "[nltk_data] Downloading package punkt_tab to\n", "[nltk_data] C:\\Users\\17164/nltk_data...\n", "[nltk_data] Package punkt_tab is already up-to-date!\n" ] } ], "execution_count": 9 }, { "metadata": { "ExecuteTime": { "end_time": "2026-03-21T05:23:49.489464Z", "start_time": "2026-03-21T05:23:49.449918Z" } }, "cell_type": "code", "source": [ "# 定义香港迪士尼处理后数据集的文件路径\n", "file_path = 'data/Disneyland_HongKong_Processed.csv'\n", "\n", "# 读取 CSV 文件到 pandas DataFrame 中\n", "df_hk = pd.read_csv(file_path)\n", "\n", "display(df_hk.head())" ], "id": "5120c0f04bd1d27d", "outputs": [ { "data": { "text/plain": [ " Review_ID Rating Year_Month Reviewer_Location \\\n", "0 670772142 4 2019-04 Australia \n", "1 670682799 4 2019-05 Philippines \n", "2 670623270 4 2019-04 United Arab Emirates \n", "3 670607911 4 2019-04 Australia \n", "4 670607296 4 2019-04 United Kingdom \n", "\n", " Review_Text Branch \\\n", "0 If you've ever been to Disneyland anywhere you... Disneyland_HongKong \n", "1 Its been a while since d last time we visit HK... Disneyland_HongKong \n", "2 Thanks God it wasn t too hot or too humid wh... Disneyland_HongKong \n", "3 HK Disneyland is a great compact park. Unfortu... Disneyland_HongKong \n", "4 the location is not in the city, took around 1... Disneyland_HongKong \n", "\n", " Monthly_Other_Visitor_Arrivals Expected_Staying_Days_Other_Visitors \n", "0 1042157.0 7295099.0 \n", "1 950880.0 6656160.0 \n", "2 1042157.0 7295099.0 \n", "3 1042157.0 7295099.0 \n", "4 1042157.0 7295099.0 " ], "text/html": [ "
| \n", " | Review_ID | \n", "Rating | \n", "Year_Month | \n", "Reviewer_Location | \n", "Review_Text | \n", "Branch | \n", "Monthly_Other_Visitor_Arrivals | \n", "Expected_Staying_Days_Other_Visitors | \n", "
|---|---|---|---|---|---|---|---|---|
| 0 | \n", "670772142 | \n", "4 | \n", "2019-04 | \n", "Australia | \n", "If you've ever been to Disneyland anywhere you... | \n", "Disneyland_HongKong | \n", "1042157.0 | \n", "7295099.0 | \n", "
| 1 | \n", "670682799 | \n", "4 | \n", "2019-05 | \n", "Philippines | \n", "Its been a while since d last time we visit HK... | \n", "Disneyland_HongKong | \n", "950880.0 | \n", "6656160.0 | \n", "
| 2 | \n", "670623270 | \n", "4 | \n", "2019-04 | \n", "United Arab Emirates | \n", "Thanks God it wasn t too hot or too humid wh... | \n", "Disneyland_HongKong | \n", "1042157.0 | \n", "7295099.0 | \n", "
| 3 | \n", "670607911 | \n", "4 | \n", "2019-04 | \n", "Australia | \n", "HK Disneyland is a great compact park. Unfortu... | \n", "Disneyland_HongKong | \n", "1042157.0 | \n", "7295099.0 | \n", "
| 4 | \n", "670607296 | \n", "4 | \n", "2019-04 | \n", "United Kingdom | \n", "the location is not in the city, took around 1... | \n", "Disneyland_HongKong | \n", "1042157.0 | \n", "7295099.0 | \n", "