Spaces:
Paused
Paused
| # �� Copilot �ͦ� | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| import pandas as pd | |
| import numpy as np | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| from plotly.subplots import make_subplots | |
| import json | |
| from typing import Dict, List | |
| # �]�w����r�� | |
| plt.rcParams['font.sans-serif'] = ['Microsoft JhengHei', 'SimHei', 'Arial Unicode MS'] | |
| plt.rcParams['axes.unicode_minus'] = False | |
| class RentalDataVisualizer: | |
| """���θ�Ƶ�ı�ƾ�""" | |
| def __init__(self, df: pd.DataFrame = None, analysis_results: Dict = None): | |
| """ | |
| ��l�Ƶ�ı�ƾ� | |
| Args: | |
| df: ���DataFrame | |
| analysis_results: ���R���G�r�� | |
| """ | |
| self.df = df | |
| self.analysis_results = analysis_results | |
| self.colors = px.colors.qualitative.Set3 | |
| def load_data(self, data_path: str): | |
| """���J���""" | |
| try: | |
| if data_path.endswith('.csv'): | |
| self.df = pd.read_csv(data_path, encoding='utf-8-sig') | |
| else: | |
| raise ValueError("�д���CSV�榡������ɮ�") | |
| print(f"���\���J {len(self.df)} ����ƥΩ��ı��") | |
| except Exception as e: | |
| print(f"���J��Ʈɵo�Ϳ��~: {e}") | |
| def load_analysis_results(self, results_path: str): | |
| """���J���R���G""" | |
| try: | |
| with open(results_path, 'r', encoding='utf-8') as f: | |
| self.analysis_results = json.load(f) | |
| print("���R���G���J���\") | |
| except Exception as e: | |
| print(f"���J���R���G�ɵo�Ϳ��~: {e}") | |
| def plot_price_distribution(self, save_path: str = "output/price_distribution.png"): | |
| """ø�s����������""" | |
| if self.df is None or 'price' not in self.df.columns: | |
| print("�L�kø�s���������ϡG�ʤָ��") | |
| return | |
| fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6)) | |
| # ����� | |
| ax1.hist(self.df['price'], bins=20, alpha=0.7, color='skyblue', edgecolor='black') | |
| ax1.set_xlabel('���� (��)') | |
| ax1.set_ylabel('����ƶq') | |
| ax1.set_title('�������������') | |
| ax1.grid(True, alpha=0.3) | |
| # �c�ι� | |
| ax2.boxplot(self.df['price'], vert=True, patch_artist=True, | |
| boxprops=dict(facecolor='lightgreen', alpha=0.7)) | |
| ax2.set_ylabel('���� (��)') | |
| ax2.set_title('���������c�ι�') | |
| ax2.grid(True, alpha=0.3) | |
| plt.tight_layout() | |
| plt.savefig(save_path, dpi=300, bbox_inches='tight') | |
| plt.close() | |
| print(f"���������Ϥw�x�s: {save_path}") | |
| def plot_price_ranges(self, save_path: str = "output/price_ranges.png"): | |
| """ø�s�����϶�������""" | |
| if not self.analysis_results or 'price_distribution' not in self.analysis_results: | |
| print("�L�kø�s�����϶��ϡG�ʤ֤��R���G") | |
| return | |
| dist_data = self.analysis_results['price_distribution'] | |
| fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6)) | |
| # ������ | |
| bars = ax1.bar(dist_data['ranges'], dist_data['counts'], | |
| color=self.colors[:len(dist_data['ranges'])], alpha=0.8) | |
| ax1.set_xlabel('�����϶�') | |
| ax1.set_ylabel('����ƶq') | |
| ax1.set_title('�U�����϶�����ƶq') | |
| ax1.tick_params(axis='x', rotation=45) | |
| # �b�����W��ܼƭ� | |
| for bar, count in zip(bars, dist_data['counts']): | |
| height = bar.get_height() | |
| ax1.text(bar.get_x() + bar.get_width()/2., height + 0.5, | |
| f'{count}', ha='center', va='bottom') | |
| # ���� | |
| ax2.pie(dist_data['percentages'], labels=dist_data['ranges'], autopct='%1.1f%%', | |
| colors=self.colors[:len(dist_data['ranges'])], startangle=90) | |
| ax2.set_title('�����϶���Ҥ���') | |
| plt.tight_layout() | |
| plt.savefig(save_path, dpi=300, bbox_inches='tight') | |
| plt.close() | |
| print(f"�����϶��Ϥw�x�s: {save_path}") | |
| def plot_area_analysis(self, save_path: str = "output/area_analysis.png"): | |
| """ø�s�W�Ƥ��R��""" | |
| if self.df is None or 'area' not in self.df.columns: | |
| print("�L�kø�s�W�Ƥ��R�ϡG�ʤָ��") | |
| return | |
| # �����ŭ� | |
| area_data = self.df['area'].dropna() | |
| if len(area_data) == 0: | |
| print("�L�kø�s�W�Ƥ��R�ϡG�S�����Ī��W�Ƹ��") | |
| return | |
| fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6)) | |
| # ���I�� - �W�ƻP�������Y | |
| if 'price' in self.df.columns: | |
| valid_data = self.df.dropna(subset=['area', 'price']) | |
| if len(valid_data) > 0: | |
| ax1.scatter(valid_data['area'], valid_data['price'], | |
| alpha=0.6, color='coral', s=50) | |
| ax1.set_xlabel('�W��') | |
| ax1.set_ylabel('���� (��)') | |
| ax1.set_title('�W�ƻP�������Y') | |
| ax1.grid(True, alpha=0.3) | |
| # �K�[�Ͷսu | |
| z = np.polyfit(valid_data['area'], valid_data['price'], 1) | |
| p = np.poly1d(z) | |
| ax1.plot(valid_data['area'], p(valid_data['area']), "r--", alpha=0.8) | |
| # �W�Ƥ�������� | |
| ax2.hist(area_data, bins=15, alpha=0.7, color='lightgreen', edgecolor='black') | |
| ax2.set_xlabel('�W��') | |
| ax2.set_ylabel('����ƶq') | |
| ax2.set_title('�W�Ƥ���') | |
| ax2.grid(True, alpha=0.3) | |
| plt.tight_layout() | |
| plt.savefig(save_path, dpi=300, bbox_inches='tight') | |
| plt.close() | |
| print(f"�W�Ƥ��R�Ϥw�x�s: {save_path}") | |
| def plot_price_per_ping(self, save_path: str = "output/price_per_ping.png"): | |
| """ø�s�C�W�������R��""" | |
| if self.df is None or 'price_per_ping' not in self.df.columns: | |
| print("�L�kø�s�C�W�����ϡG�ʤָ��") | |
| return | |
| price_per_ping_data = self.df['price_per_ping'].dropna() | |
| if len(price_per_ping_data) == 0: | |
| print("�L�kø�s�C�W�����ϡG�S�����Ī��C�W�������") | |
| return | |
| fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6)) | |
| # �C�W�������� | |
| ax1.hist(price_per_ping_data, bins=20, alpha=0.7, color='gold', edgecolor='black') | |
| ax1.set_xlabel('�C�W���� (��/�W)') | |
| ax1.set_ylabel('����ƶq') | |
| ax1.set_title('�C�W��������') | |
| ax1.grid(True, alpha=0.3) | |
| # �c�ι� | |
| ax2.boxplot(price_per_ping_data, vert=True, patch_artist=True, | |
| boxprops=dict(facecolor='orange', alpha=0.7)) | |
| ax2.set_ylabel('�C�W���� (��/�W)') | |
| ax2.set_title('�C�W�����c�ι�') | |
| ax2.grid(True, alpha=0.3) | |
| plt.tight_layout() | |
| plt.savefig(save_path, dpi=300, bbox_inches='tight') | |
| plt.close() | |
| print(f"�C�W�����Ϥw�x�s: {save_path}") | |
| def plot_keywords_analysis(self, save_path: str = "output/keywords_analysis.png"): | |
| """ø�s����r���R��""" | |
| if not self.analysis_results or 'description_analysis' not in self.analysis_results: | |
| print("�L�kø�s����r���R�ϡG�ʤ֤��R���G") | |
| return | |
| desc_analysis = self.analysis_results['description_analysis'] | |
| if 'keywords_frequency' not in desc_analysis: | |
| print("�L�kø�s����r���R�ϡG�ʤ�����r���") | |
| return | |
| keywords_data = desc_analysis['keywords_frequency'] | |
| # �L�o�X���ƾڪ�����r | |
| filtered_keywords = {k: v for k, v in keywords_data.items() if v > 0} | |
| if not filtered_keywords: | |
| print("�S������������r���") | |
| return | |
| keywords = list(filtered_keywords.keys()) | |
| frequencies = list(filtered_keywords.values()) | |
| plt.figure(figsize=(12, 8)) | |
| bars = plt.barh(keywords, frequencies, color=self.colors[:len(keywords)]) | |
| plt.xlabel('�X�{����') | |
| plt.ylabel('����r') | |
| plt.title('����y�z����r�W�v���R') | |
| plt.grid(True, alpha=0.3, axis='x') | |
| # �b�����W��ܼƭ� | |
| for bar, freq in zip(bars, frequencies): | |
| width = bar.get_width() | |
| plt.text(width + 0.1, bar.get_y() + bar.get_height()/2., | |
| f'{freq}', ha='left', va='center') | |
| plt.tight_layout() | |
| plt.savefig(save_path, dpi=300, bbox_inches='tight') | |
| plt.close() | |
| print(f"����r���R�Ϥw�x�s: {save_path}") | |
| def create_interactive_dashboard(self, save_path: str = "output/dashboard.html"): | |
| """�Ыؤ��ʦ������O""" | |
| if self.df is None: | |
| print("�L�k�Ыػ����O�G�ʤָ��") | |
| return | |
| # �Ыؤl�� | |
| fig = make_subplots( | |
| rows=2, cols=2, | |
| subplot_titles=('��������', '�W��vs����', '�����϶�����', '�C�W��������'), | |
| specs=[[{"secondary_y": False}, {"secondary_y": False}], | |
| [{"type": "bar"}, {"secondary_y": False}]] | |
| ) | |
| # 1. ������������� | |
| fig.add_trace( | |
| go.Histogram(x=self.df['price'], name='��������', nbinsx=20, | |
| marker_color='skyblue', opacity=0.7), | |
| row=1, col=1 | |
| ) | |
| # 2. �W��vs�������I�� | |
| if 'area' in self.df.columns: | |
| valid_data = self.df.dropna(subset=['area', 'price']) | |
| if len(valid_data) > 0: | |
| fig.add_trace( | |
| go.Scatter(x=valid_data['area'], y=valid_data['price'], | |
| mode='markers', name='�W��vs����', | |
| marker=dict(color='coral', size=8, opacity=0.6)), | |
| row=1, col=2 | |
| ) | |
| # 3. �����϶����� | |
| if self.analysis_results and 'price_distribution' in self.analysis_results: | |
| dist_data = self.analysis_results['price_distribution'] | |
| fig.add_trace( | |
| go.Bar(x=dist_data['ranges'], y=dist_data['counts'], | |
| name='�����϶�', marker_color='lightgreen'), | |
| row=2, col=1 | |
| ) | |
| # 4. �C�W�������� | |
| if 'price_per_ping' in self.df.columns: | |
| price_per_ping_data = self.df['price_per_ping'].dropna() | |
| if len(price_per_ping_data) > 0: | |
| fig.add_trace( | |
| go.Histogram(x=price_per_ping_data, name='�C�W����', nbinsx=15, | |
| marker_color='gold', opacity=0.7), | |
| row=2, col=2 | |
| ) | |
| # ��s���� | |
| fig.update_layout( | |
| title_text="���������s�ϯ��Υ������R�����O", | |
| title_x=0.5, | |
| height=800, | |
| showlegend=False | |
| ) | |
| # ��s�b���� | |
| fig.update_xaxes(title_text="���� (��)", row=1, col=1) | |
| fig.update_yaxes(title_text="����ƶq", row=1, col=1) | |
| fig.update_xaxes(title_text="�W��", row=1, col=2) | |
| fig.update_yaxes(title_text="���� (��)", row=1, col=2) | |
| fig.update_xaxes(title_text="�����϶�", row=2, col=1) | |
| fig.update_yaxes(title_text="����ƶq", row=2, col=1) | |
| fig.update_xaxes(title_text="�C�W���� (��/�W)", row=2, col=2) | |
| fig.update_yaxes(title_text="����ƶq", row=2, col=2) | |
| # �x�s���ʦ��Ϫ� | |
| fig.write_html(save_path) | |
| print(f"���ʦ������O�w�x�s: {save_path}") | |
| def generate_all_visualizations(self): | |
| """�ͦ��Ҧ���ı�ƹϪ�""" | |
| print("�}�l�ͦ���ı�ƹϪ�...") | |
| # �R�A�Ϫ� | |
| self.plot_price_distribution() | |
| self.plot_price_ranges() | |
| self.plot_area_analysis() | |
| self.plot_price_per_ping() | |
| self.plot_keywords_analysis() | |
| # ���ʦ������O | |
| self.create_interactive_dashboard() | |
| print("�Ҧ���ı�ƹϪ��ͦ������I") | |
| def create_summary_report(self, save_path: str = "output/summary_report.png"): | |
| """�ЫغK�n���i��""" | |
| if not self.analysis_results or 'basic_stats' not in self.analysis_results: | |
| print("�L�k�ЫغK�n���i�G�ʤ֤��R���G") | |
| return | |
| fig, ax = plt.subplots(figsize=(12, 8)) | |
| ax.axis('off') | |
| # ���D | |
| fig.suptitle('���������s�ϯ��Υ������R�K�n���i', fontsize=20, fontweight='bold', y=0.95) | |
| # �έp��T | |
| stats = self.analysis_results['basic_stats'] | |
| # �Ыؤ�r���e | |
| report_text = f""" | |
| ? �������p | |
| ? �`�����: {stats['total_properties']} �� | |
| ? ��ƽd��: 2�СB��h�B�q��j�� | |
| ? �����έp | |
| ? ��������: {stats['price_stats']['mean']:,} �� | |
| ? ����Ư���: {stats['price_stats']['median']:,} �� | |
| ? �̧C����: {stats['price_stats']['min']:,} �� | |
| ? �̰�����: {stats['price_stats']['max']:,} �� | |
| ? �зǮt: {stats['price_stats']['std']:,} �� | |
| ? �����S�x | |
| ? �Ĥ@�|�����: {stats['price_stats']['q25']:,} �� | |
| ? �ĤT�|�����: {stats['price_stats']['q75']:,} �� | |
| """ | |
| # �K�[���n�έp�]�p�G�����ܡ^ | |
| if 'area_stats' in stats and stats['area_stats']: | |
| area_stats = stats['area_stats'] | |
| report_text += f""" | |
| ? �W�Ʋέp | |
| ? �����W��: {area_stats['mean']} �W | |
| ? ����ƩW��: {area_stats['median']} �W | |
| ? �̤p�W��: {area_stats['min']} �W | |
| ? �̤j�W��: {area_stats['max']} �W | |
| """ | |
| # �K�[�C�W�����έp�]�p�G�����ܡ^ | |
| if 'price_per_ping_stats' in stats and stats['price_per_ping_stats']: | |
| pp_stats = stats['price_per_ping_stats'] | |
| report_text += f""" | |
| ? �C�W�����έp | |
| ? �����C�W����: {pp_stats['mean']:,} ��/�W | |
| ? ����ƨC�W����: {pp_stats['median']:,} ��/�W | |
| ? �̧C�C�W����: {pp_stats['min']:,} ��/�W | |
| ? �̰��C�W����: {pp_stats['max']:,} ��/�W | |
| """ | |
| # �K�[�}��]�p�G�����ܡ^ | |
| if 'insights' in self.analysis_results: | |
| report_text += "\n\n? ���n�}��\n" | |
| for i, insight in enumerate(self.analysis_results['insights'], 1): | |
| report_text += f"? {insight}\n" | |
| # ��ܤ�r | |
| ax.text(0.05, 0.95, report_text, transform=ax.transAxes, fontsize=12, | |
| verticalalignment='top', fontfamily='monospace', | |
| bbox=dict(boxstyle="round,pad=0.5", facecolor="lightblue", alpha=0.8)) | |
| plt.tight_layout() | |
| plt.savefig(save_path, dpi=300, bbox_inches='tight') | |
| plt.close() | |
| print(f"�K�n���i�w�x�s: {save_path}") | |
| if __name__ == "__main__": | |
| # ���յ�ı�ƾ� | |
| visualizer = RentalDataVisualizer() | |
| # ���J��� | |
| visualizer.load_data("output/rental_data.csv") | |
| visualizer.load_analysis_results("output/analysis_results.json") | |
| # �ͦ��Ҧ���ı�ƹϪ� | |
| visualizer.generate_all_visualizations() | |
| # �ЫغK�n���i | |
| visualizer.create_summary_report() |