591Analyzer / visualizer.py
54justin's picture
Upload 7 files
f205f47 verified
# �� Copilot �ͦ�
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import json
from typing import Dict, List
# �]�w����r��
plt.rcParams['font.sans-serif'] = ['Microsoft JhengHei', 'SimHei', 'Arial Unicode MS']
plt.rcParams['axes.unicode_minus'] = False
class RentalDataVisualizer:
"""���θ�Ƶ�ı�ƾ�"""
def __init__(self, df: pd.DataFrame = None, analysis_results: Dict = None):
"""
��l�Ƶ�ı�ƾ�
Args:
df: ���DataFrame
analysis_results: ���R���G�r��
"""
self.df = df
self.analysis_results = analysis_results
self.colors = px.colors.qualitative.Set3
def load_data(self, data_path: str):
"""���J���"""
try:
if data_path.endswith('.csv'):
self.df = pd.read_csv(data_path, encoding='utf-8-sig')
else:
raise ValueError("�д���CSV�榡������ɮ�")
print(f"���\���J {len(self.df)} ����ƥΩ��ı��")
except Exception as e:
print(f"���J��Ʈɵo�Ϳ��~: {e}")
def load_analysis_results(self, results_path: str):
"""���J���R���G"""
try:
with open(results_path, 'r', encoding='utf-8') as f:
self.analysis_results = json.load(f)
print("���R���G���J���\")
except Exception as e:
print(f"���J���R���G�ɵo�Ϳ��~: {e}")
def plot_price_distribution(self, save_path: str = "output/price_distribution.png"):
"""ø�s����������"""
if self.df is None or 'price' not in self.df.columns:
print("�L�kø�s���������ϡG�ʤָ��")
return
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
# �����
ax1.hist(self.df['price'], bins=20, alpha=0.7, color='skyblue', edgecolor='black')
ax1.set_xlabel('���� (��)')
ax1.set_ylabel('����ƶq')
ax1.set_title('�������������')
ax1.grid(True, alpha=0.3)
# �c�ι�
ax2.boxplot(self.df['price'], vert=True, patch_artist=True,
boxprops=dict(facecolor='lightgreen', alpha=0.7))
ax2.set_ylabel('���� (��)')
ax2.set_title('���������c�ι�')
ax2.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(save_path, dpi=300, bbox_inches='tight')
plt.close()
print(f"���������Ϥw�x�s: {save_path}")
def plot_price_ranges(self, save_path: str = "output/price_ranges.png"):
"""ø�s�����϶�������"""
if not self.analysis_results or 'price_distribution' not in self.analysis_results:
print("�L�kø�s�����϶��ϡG�ʤ֤��R���G")
return
dist_data = self.analysis_results['price_distribution']
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
# ������
bars = ax1.bar(dist_data['ranges'], dist_data['counts'],
color=self.colors[:len(dist_data['ranges'])], alpha=0.8)
ax1.set_xlabel('�����϶�')
ax1.set_ylabel('����ƶq')
ax1.set_title('�U�����϶�����ƶq')
ax1.tick_params(axis='x', rotation=45)
# �b�����W��ܼƭ�
for bar, count in zip(bars, dist_data['counts']):
height = bar.get_height()
ax1.text(bar.get_x() + bar.get_width()/2., height + 0.5,
f'{count}', ha='center', va='bottom')
# ����
ax2.pie(dist_data['percentages'], labels=dist_data['ranges'], autopct='%1.1f%%',
colors=self.colors[:len(dist_data['ranges'])], startangle=90)
ax2.set_title('�����϶���Ҥ���')
plt.tight_layout()
plt.savefig(save_path, dpi=300, bbox_inches='tight')
plt.close()
print(f"�����϶��Ϥw�x�s: {save_path}")
def plot_area_analysis(self, save_path: str = "output/area_analysis.png"):
"""ø�s�W�Ƥ��R��"""
if self.df is None or 'area' not in self.df.columns:
print("�L�kø�s�W�Ƥ��R�ϡG�ʤָ��")
return
# �����ŭ�
area_data = self.df['area'].dropna()
if len(area_data) == 0:
print("�L�kø�s�W�Ƥ��R�ϡG�S�����Ī��W�Ƹ��")
return
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
# ���I�� - �W�ƻP�������Y
if 'price' in self.df.columns:
valid_data = self.df.dropna(subset=['area', 'price'])
if len(valid_data) > 0:
ax1.scatter(valid_data['area'], valid_data['price'],
alpha=0.6, color='coral', s=50)
ax1.set_xlabel('�W��')
ax1.set_ylabel('���� (��)')
ax1.set_title('�W�ƻP�������Y')
ax1.grid(True, alpha=0.3)
# �K�[�Ͷսu
z = np.polyfit(valid_data['area'], valid_data['price'], 1)
p = np.poly1d(z)
ax1.plot(valid_data['area'], p(valid_data['area']), "r--", alpha=0.8)
# �W�Ƥ��������
ax2.hist(area_data, bins=15, alpha=0.7, color='lightgreen', edgecolor='black')
ax2.set_xlabel('�W��')
ax2.set_ylabel('����ƶq')
ax2.set_title('�W�Ƥ���')
ax2.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(save_path, dpi=300, bbox_inches='tight')
plt.close()
print(f"�W�Ƥ��R�Ϥw�x�s: {save_path}")
def plot_price_per_ping(self, save_path: str = "output/price_per_ping.png"):
"""ø�s�C�W�������R��"""
if self.df is None or 'price_per_ping' not in self.df.columns:
print("�L�kø�s�C�W�����ϡG�ʤָ��")
return
price_per_ping_data = self.df['price_per_ping'].dropna()
if len(price_per_ping_data) == 0:
print("�L�kø�s�C�W�����ϡG�S�����Ī��C�W�������")
return
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
# �C�W��������
ax1.hist(price_per_ping_data, bins=20, alpha=0.7, color='gold', edgecolor='black')
ax1.set_xlabel('�C�W���� (��/�W)')
ax1.set_ylabel('����ƶq')
ax1.set_title('�C�W��������')
ax1.grid(True, alpha=0.3)
# �c�ι�
ax2.boxplot(price_per_ping_data, vert=True, patch_artist=True,
boxprops=dict(facecolor='orange', alpha=0.7))
ax2.set_ylabel('�C�W���� (��/�W)')
ax2.set_title('�C�W�����c�ι�')
ax2.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(save_path, dpi=300, bbox_inches='tight')
plt.close()
print(f"�C�W�����Ϥw�x�s: {save_path}")
def plot_keywords_analysis(self, save_path: str = "output/keywords_analysis.png"):
"""ø�s����r���R��"""
if not self.analysis_results or 'description_analysis' not in self.analysis_results:
print("�L�kø�s����r���R�ϡG�ʤ֤��R���G")
return
desc_analysis = self.analysis_results['description_analysis']
if 'keywords_frequency' not in desc_analysis:
print("�L�kø�s����r���R�ϡG�ʤ�����r���")
return
keywords_data = desc_analysis['keywords_frequency']
# �L�o�X���ƾڪ�����r
filtered_keywords = {k: v for k, v in keywords_data.items() if v > 0}
if not filtered_keywords:
print("�S������������r���")
return
keywords = list(filtered_keywords.keys())
frequencies = list(filtered_keywords.values())
plt.figure(figsize=(12, 8))
bars = plt.barh(keywords, frequencies, color=self.colors[:len(keywords)])
plt.xlabel('�X�{����')
plt.ylabel('����r')
plt.title('����y�z����r�W�v���R')
plt.grid(True, alpha=0.3, axis='x')
# �b�����W��ܼƭ�
for bar, freq in zip(bars, frequencies):
width = bar.get_width()
plt.text(width + 0.1, bar.get_y() + bar.get_height()/2.,
f'{freq}', ha='left', va='center')
plt.tight_layout()
plt.savefig(save_path, dpi=300, bbox_inches='tight')
plt.close()
print(f"����r���R�Ϥw�x�s: {save_path}")
def create_interactive_dashboard(self, save_path: str = "output/dashboard.html"):
"""�Ыؤ��ʦ������O"""
if self.df is None:
print("�L�k�Ыػ����O�G�ʤָ��")
return
# �Ыؤl��
fig = make_subplots(
rows=2, cols=2,
subplot_titles=('��������', '�W��vs����', '�����϶�����', '�C�W��������'),
specs=[[{"secondary_y": False}, {"secondary_y": False}],
[{"type": "bar"}, {"secondary_y": False}]]
)
# 1. �������������
fig.add_trace(
go.Histogram(x=self.df['price'], name='��������', nbinsx=20,
marker_color='skyblue', opacity=0.7),
row=1, col=1
)
# 2. �W��vs�������I��
if 'area' in self.df.columns:
valid_data = self.df.dropna(subset=['area', 'price'])
if len(valid_data) > 0:
fig.add_trace(
go.Scatter(x=valid_data['area'], y=valid_data['price'],
mode='markers', name='�W��vs����',
marker=dict(color='coral', size=8, opacity=0.6)),
row=1, col=2
)
# 3. �����϶�����
if self.analysis_results and 'price_distribution' in self.analysis_results:
dist_data = self.analysis_results['price_distribution']
fig.add_trace(
go.Bar(x=dist_data['ranges'], y=dist_data['counts'],
name='�����϶�', marker_color='lightgreen'),
row=2, col=1
)
# 4. �C�W��������
if 'price_per_ping' in self.df.columns:
price_per_ping_data = self.df['price_per_ping'].dropna()
if len(price_per_ping_data) > 0:
fig.add_trace(
go.Histogram(x=price_per_ping_data, name='�C�W����', nbinsx=15,
marker_color='gold', opacity=0.7),
row=2, col=2
)
# ��s����
fig.update_layout(
title_text="���������s�ϯ��Υ������R�����O",
title_x=0.5,
height=800,
showlegend=False
)
# ��s�b����
fig.update_xaxes(title_text="���� (��)", row=1, col=1)
fig.update_yaxes(title_text="����ƶq", row=1, col=1)
fig.update_xaxes(title_text="�W��", row=1, col=2)
fig.update_yaxes(title_text="���� (��)", row=1, col=2)
fig.update_xaxes(title_text="�����϶�", row=2, col=1)
fig.update_yaxes(title_text="����ƶq", row=2, col=1)
fig.update_xaxes(title_text="�C�W���� (��/�W)", row=2, col=2)
fig.update_yaxes(title_text="����ƶq", row=2, col=2)
# �x�s���ʦ��Ϫ�
fig.write_html(save_path)
print(f"���ʦ������O�w�x�s: {save_path}")
def generate_all_visualizations(self):
"""�ͦ��Ҧ���ı�ƹϪ�"""
print("�}�l�ͦ���ı�ƹϪ�...")
# �R�A�Ϫ�
self.plot_price_distribution()
self.plot_price_ranges()
self.plot_area_analysis()
self.plot_price_per_ping()
self.plot_keywords_analysis()
# ���ʦ������O
self.create_interactive_dashboard()
print("�Ҧ���ı�ƹϪ��ͦ������I")
def create_summary_report(self, save_path: str = "output/summary_report.png"):
"""�ЫغK�n���i��"""
if not self.analysis_results or 'basic_stats' not in self.analysis_results:
print("�L�k�ЫغK�n���i�G�ʤ֤��R���G")
return
fig, ax = plt.subplots(figsize=(12, 8))
ax.axis('off')
# ���D
fig.suptitle('���������s�ϯ��Υ������R�K�n���i', fontsize=20, fontweight='bold', y=0.95)
# �򥻲έp��T
stats = self.analysis_results['basic_stats']
# �Ыؤ�r���e
report_text = f"""
? �������p
? �`�����: {stats['total_properties']} ��
? ��ƽd��: 2�СB��h�B�q��j��
? �����έp
? ��������: {stats['price_stats']['mean']:,} ��
? ����Ư���: {stats['price_stats']['median']:,} ��
? �̧C����: {stats['price_stats']['min']:,} ��
? �̰�����: {stats['price_stats']['max']:,} ��
? �зǮt: {stats['price_stats']['std']:,} ��
? �����S�x
? �Ĥ@�|�����: {stats['price_stats']['q25']:,} ��
? �ĤT�|�����: {stats['price_stats']['q75']:,} ��
"""
# �K�[���n�έp�]�p�G�����ܡ^
if 'area_stats' in stats and stats['area_stats']:
area_stats = stats['area_stats']
report_text += f"""
? �W�Ʋέp
? �����W��: {area_stats['mean']} �W
? ����ƩW��: {area_stats['median']} �W
? �̤p�W��: {area_stats['min']} �W
? �̤j�W��: {area_stats['max']} �W
"""
# �K�[�C�W�����έp�]�p�G�����ܡ^
if 'price_per_ping_stats' in stats and stats['price_per_ping_stats']:
pp_stats = stats['price_per_ping_stats']
report_text += f"""
? �C�W�����έp
? �����C�W����: {pp_stats['mean']:,} ��/�W
? ����ƨC�W����: {pp_stats['median']:,} ��/�W
? �̧C�C�W����: {pp_stats['min']:,} ��/�W
? �̰��C�W����: {pp_stats['max']:,} ��/�W
"""
# �K�[�}��]�p�G�����ܡ^
if 'insights' in self.analysis_results:
report_text += "\n\n? ���n�}��\n"
for i, insight in enumerate(self.analysis_results['insights'], 1):
report_text += f"? {insight}\n"
# ��ܤ�r
ax.text(0.05, 0.95, report_text, transform=ax.transAxes, fontsize=12,
verticalalignment='top', fontfamily='monospace',
bbox=dict(boxstyle="round,pad=0.5", facecolor="lightblue", alpha=0.8))
plt.tight_layout()
plt.savefig(save_path, dpi=300, bbox_inches='tight')
plt.close()
print(f"�K�n���i�w�x�s: {save_path}")
if __name__ == "__main__":
# ���յ�ı�ƾ�
visualizer = RentalDataVisualizer()
# ���J���
visualizer.load_data("output/rental_data.csv")
visualizer.load_analysis_results("output/analysis_results.json")
# �ͦ��Ҧ���ı�ƹϪ�
visualizer.generate_all_visualizations()
# �ЫغK�n���i
visualizer.create_summary_report()