Spaces:

54justin
/

591Analyzer

Paused

App Files Files Community

591Analyzer / visualizer.py

54justin

Upload 7 files

f205f47 verified 4 months ago

raw

history blame contribute delete

15.2 kB

	# �� Copilot �ͦ�
	import matplotlib.pyplot as plt
	import seaborn as sns
	import pandas as pd
	import numpy as np
	import plotly.express as px
	import plotly.graph_objects as go
	from plotly.subplots import make_subplots
	import json
	from typing import Dict, List

	# �]�w��r��
	plt.rcParams['font.sans-serif'] = ['Microsoft JhengHei', 'SimHei', 'Arial Unicode MS']
	plt.rcParams['axes.unicode_minus'] = False

	class RentalDataVisualizer:
	"""��θ�Ƶ�ı�ƾ�"""

	def __init__(self, df: pd.DataFrame = None, analysis_results: Dict = None):
	"""
	��l�Ƶ�ı�ƾ�

	Args:
	df: ��DataFrame
	analysis_results: ��R��G�r��
	"""
	self.df = df
	self.analysis_results = analysis_results
	self.colors = px.colors.qualitative.Set3

	def load_data(self, data_path: str):
	"""��J��"""
	try:
	if data_path.endswith('.csv'):
	self.df = pd.read_csv(data_path, encoding='utf-8-sig')
	else:
	raise ValueError("�д��CSV�榡��ɮ�")
	print(f"��\��J {len(self.df)} ��ƥΩ��ı��")
	except Exception as e:
	print(f"��J��Ʈɵo�Ϳ��~: {e}")

	def load_analysis_results(self, results_path: str):
	"""��J��R��G"""
	try:
	with open(results_path, 'r', encoding='utf-8') as f:
	self.analysis_results = json.load(f)
	print("��R��G��J��\")
	except Exception as e:
	print(f"��J��R��G�ɵo�Ϳ��~: {e}")

	def plot_price_distribution(self, save_path: str = "output/price_distribution.png"):
	"""ø�s��"""
	if self.df is None or 'price' not in self.df.columns:
	print("�L�kø�s��ϡG�ʤָ��")
	return

	fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

	# ��
	ax1.hist(self.df['price'], bins=20, alpha=0.7, color='skyblue', edgecolor='black')
	ax1.set_xlabel('�� (��)')
	ax1.set_ylabel('��ƶq')
	ax1.set_title('��')
	ax1.grid(True, alpha=0.3)

	# �c�ι�
	ax2.boxplot(self.df['price'], vert=True, patch_artist=True,
	boxprops=dict(facecolor='lightgreen', alpha=0.7))
	ax2.set_ylabel('�� (��)')
	ax2.set_title('��c�ι�')
	ax2.grid(True, alpha=0.3)

	plt.tight_layout()
	plt.savefig(save_path, dpi=300, bbox_inches='tight')
	plt.close()
	print(f"��Ϥw�x�s: {save_path}")

	def plot_price_ranges(self, save_path: str = "output/price_ranges.png"):
	"""ø�s��϶��"""
	if not self.analysis_results or 'price_distribution' not in self.analysis_results:
	print("�L�kø�s��϶��ϡG�ʤ֤��R��G")
	return

	dist_data = self.analysis_results['price_distribution']

	fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

	# ��
	bars = ax1.bar(dist_data['ranges'], dist_data['counts'],
	color=self.colors[:len(dist_data['ranges'])], alpha=0.8)
	ax1.set_xlabel('��϶�')
	ax1.set_ylabel('��ƶq')
	ax1.set_title('�U��϶��ƶq')
	ax1.tick_params(axis='x', rotation=45)

	# �b��W��ܼƭ�
	for bar, count in zip(bars, dist_data['counts']):
	height = bar.get_height()
	ax1.text(bar.get_x() + bar.get_width()/2., height + 0.5,
	f'{count}', ha='center', va='bottom')

	# ��
	ax2.pie(dist_data['percentages'], labels=dist_data['ranges'], autopct='%1.1f%%',
	colors=self.colors[:len(dist_data['ranges'])], startangle=90)
	ax2.set_title('��϶��Ҥ��')

	plt.tight_layout()
	plt.savefig(save_path, dpi=300, bbox_inches='tight')
	plt.close()
	print(f"��϶��Ϥw�x�s: {save_path}")

	def plot_area_analysis(self, save_path: str = "output/area_analysis.png"):
	"""ø�s�W�Ƥ��R��"""
	if self.df is None or 'area' not in self.df.columns:
	print("�L�kø�s�W�Ƥ��R�ϡG�ʤָ��")
	return

	# ��ŭ�
	area_data = self.df['area'].dropna()

	if len(area_data) == 0:
	print("�L�kø�s�W�Ƥ��R�ϡG�S��Ī��W�Ƹ��")
	return

	fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

	# ��I�� - �W�ƻP��Y
	if 'price' in self.df.columns:
	valid_data = self.df.dropna(subset=['area', 'price'])
	if len(valid_data) > 0:
	ax1.scatter(valid_data['area'], valid_data['price'],
	alpha=0.6, color='coral', s=50)
	ax1.set_xlabel('�W��')
	ax1.set_ylabel('�� (��)')
	ax1.set_title('�W�ƻP��Y')
	ax1.grid(True, alpha=0.3)

	# �K�[�Ͷսu
	z = np.polyfit(valid_data['area'], valid_data['price'], 1)
	p = np.poly1d(z)
	ax1.plot(valid_data['area'], p(valid_data['area']), "r--", alpha=0.8)

	# �W�Ƥ��
	ax2.hist(area_data, bins=15, alpha=0.7, color='lightgreen', edgecolor='black')
	ax2.set_xlabel('�W��')
	ax2.set_ylabel('��ƶq')
	ax2.set_title('�W�Ƥ��')
	ax2.grid(True, alpha=0.3)

	plt.tight_layout()
	plt.savefig(save_path, dpi=300, bbox_inches='tight')
	plt.close()
	print(f"�W�Ƥ��R�Ϥw�x�s: {save_path}")

	def plot_price_per_ping(self, save_path: str = "output/price_per_ping.png"):
	"""ø�s�C�W��R��"""
	if self.df is None or 'price_per_ping' not in self.df.columns:
	print("�L�kø�s�C�W��ϡG�ʤָ��")
	return

	price_per_ping_data = self.df['price_per_ping'].dropna()

	if len(price_per_ping_data) == 0:
	print("�L�kø�s�C�W��ϡG�S��Ī��C�W��")
	return

	fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

	# �C�W��
	ax1.hist(price_per_ping_data, bins=20, alpha=0.7, color='gold', edgecolor='black')
	ax1.set_xlabel('�C�W�� (��/�W)')
	ax1.set_ylabel('��ƶq')
	ax1.set_title('�C�W��')
	ax1.grid(True, alpha=0.3)

	# �c�ι�
	ax2.boxplot(price_per_ping_data, vert=True, patch_artist=True,
	boxprops=dict(facecolor='orange', alpha=0.7))
	ax2.set_ylabel('�C�W�� (��/�W)')
	ax2.set_title('�C�W��c�ι�')
	ax2.grid(True, alpha=0.3)

	plt.tight_layout()
	plt.savefig(save_path, dpi=300, bbox_inches='tight')
	plt.close()
	print(f"�C�W��Ϥw�x�s: {save_path}")

	def plot_keywords_analysis(self, save_path: str = "output/keywords_analysis.png"):
	"""ø�s��r��R��"""
	if not self.analysis_results or 'description_analysis' not in self.analysis_results:
	print("�L�kø�s��r��R�ϡG�ʤ֤��R��G")
	return

	desc_analysis = self.analysis_results['description_analysis']
	if 'keywords_frequency' not in desc_analysis:
	print("�L�kø�s��r��R�ϡG�ʤ��r��")
	return

	keywords_data = desc_analysis['keywords_frequency']

	# �L�o�X��ƾڪ��r
	filtered_keywords = {k: v for k, v in keywords_data.items() if v > 0}

	if not filtered_keywords:
	print("�S��r��")
	return

	keywords = list(filtered_keywords.keys())
	frequencies = list(filtered_keywords.values())

	plt.figure(figsize=(12, 8))
	bars = plt.barh(keywords, frequencies, color=self.colors[:len(keywords)])
	plt.xlabel('�X�{��')
	plt.ylabel('��r')
	plt.title('��y�z��r�W�v��R')
	plt.grid(True, alpha=0.3, axis='x')

	# �b��W��ܼƭ�
	for bar, freq in zip(bars, frequencies):
	width = bar.get_width()
	plt.text(width + 0.1, bar.get_y() + bar.get_height()/2.,
	f'{freq}', ha='left', va='center')

	plt.tight_layout()
	plt.savefig(save_path, dpi=300, bbox_inches='tight')
	plt.close()
	print(f"��r��R�Ϥw�x�s: {save_path}")

	def create_interactive_dashboard(self, save_path: str = "output/dashboard.html"):
	"""�Ыؤ��ʦ��O"""
	if self.df is None:
	print("�L�k�Ыػ��O�G�ʤָ��")
	return

	# �Ыؤl��
	fig = make_subplots(
	rows=2, cols=2,
	subplot_titles=('��', '�W��vs��', '��϶��', '�C�W��'),
	specs=[[{"secondary_y": False}, {"secondary_y": False}],
	[{"type": "bar"}, {"secondary_y": False}]]
	)

	# 1. ��
	fig.add_trace(
	go.Histogram(x=self.df['price'], name='��', nbinsx=20,
	marker_color='skyblue', opacity=0.7),
	row=1, col=1
	)

	# 2. �W��vs��I��
	if 'area' in self.df.columns:
	valid_data = self.df.dropna(subset=['area', 'price'])
	if len(valid_data) > 0:
	fig.add_trace(
	go.Scatter(x=valid_data['area'], y=valid_data['price'],
	mode='markers', name='�W��vs��',
	marker=dict(color='coral', size=8, opacity=0.6)),
	row=1, col=2
	)

	# 3. ��϶��
	if self.analysis_results and 'price_distribution' in self.analysis_results:
	dist_data = self.analysis_results['price_distribution']
	fig.add_trace(
	go.Bar(x=dist_data['ranges'], y=dist_data['counts'],
	name='��϶�', marker_color='lightgreen'),
	row=2, col=1
	)

	# 4. �C�W��
	if 'price_per_ping' in self.df.columns:
	price_per_ping_data = self.df['price_per_ping'].dropna()
	if len(price_per_ping_data) > 0:
	fig.add_trace(
	go.Histogram(x=price_per_ping_data, name='�C�W��', nbinsx=15,
	marker_color='gold', opacity=0.7),
	row=2, col=2
	)

	# ��s��
	fig.update_layout(
	title_text="��s�ϯ��Υ��R��O",
	title_x=0.5,
	height=800,
	showlegend=False
	)

	# ��s�b��
	fig.update_xaxes(title_text="�� (��)", row=1, col=1)
	fig.update_yaxes(title_text="��ƶq", row=1, col=1)
	fig.update_xaxes(title_text="�W��", row=1, col=2)
	fig.update_yaxes(title_text="�� (��)", row=1, col=2)
	fig.update_xaxes(title_text="��϶�", row=2, col=1)
	fig.update_yaxes(title_text="��ƶq", row=2, col=1)
	fig.update_xaxes(title_text="�C�W�� (��/�W)", row=2, col=2)
	fig.update_yaxes(title_text="��ƶq", row=2, col=2)

	# �x�s��ʦ��Ϫ�
	fig.write_html(save_path)
	print(f"��ʦ��O�w�x�s: {save_path}")

	def generate_all_visualizations(self):
	"""�ͦ��Ҧ��ı�ƹϪ�"""
	print("�}�l�ͦ��ı�ƹϪ�...")

	# �R�A�Ϫ�
	self.plot_price_distribution()
	self.plot_price_ranges()
	self.plot_area_analysis()
	self.plot_price_per_ping()
	self.plot_keywords_analysis()

	# ��ʦ��O
	self.create_interactive_dashboard()

	print("�Ҧ��ı�ƹϪ��ͦ��I")

	def create_summary_report(self, save_path: str = "output/summary_report.png"):
	"""�ЫغK�n��i��"""
	if not self.analysis_results or 'basic_stats' not in self.analysis_results:
	print("�L�k�ЫغK�n��i�G�ʤ֤��R��G")
	return

	fig, ax = plt.subplots(figsize=(12, 8))
	ax.axis('off')

	# ��D
	fig.suptitle('��s�ϯ��Υ��R�K�n��i', fontsize=20, fontweight='bold', y=0.95)

	# �򥻲έp��T
	stats = self.analysis_results['basic_stats']

	# �Ыؤ�r��e
	report_text = f"""

	? ��p
	? �`��: {stats['total_properties']} ��
	? ��ƽd��: 2�СB��h�B�q��j��

	? ��έp
	? ��: {stats['price_stats']['mean']:,} ��
	? ��Ư��: {stats['price_stats']['median']:,} ��
	? �̧C��: {stats['price_stats']['min']:,} ��
	? �̰��: {stats['price_stats']['max']:,} ��
	? �зǮt: {stats['price_stats']['std']:,} ��

	? ��S�x
	? �Ĥ@�\|��: {stats['price_stats']['q25']:,} ��
	? �ĤT�\|��: {stats['price_stats']['q75']:,} ��
	"""

	# �K�[��n�έp�]�p�G��ܡ^
	if 'area_stats' in stats and stats['area_stats']:
	area_stats = stats['area_stats']
	report_text += f"""
	? �W�Ʋέp
	? ��W��: {area_stats['mean']} �W
	? ��ƩW��: {area_stats['median']} �W
	? �̤p�W��: {area_stats['min']} �W
	? �̤j�W��: {area_stats['max']} �W
	"""

	# �K�[�C�W��έp�]�p�G��ܡ^
	if 'price_per_ping_stats' in stats and stats['price_per_ping_stats']:
	pp_stats = stats['price_per_ping_stats']
	report_text += f"""
	? �C�W��έp
	? ��C�W��: {pp_stats['mean']:,} ��/�W
	? ��ƨC�W��: {pp_stats['median']:,} ��/�W
	? �̧C�C�W��: {pp_stats['min']:,} ��/�W
	? �̰��C�W��: {pp_stats['max']:,} ��/�W
	"""

	# �K�[�}��]�p�G��ܡ^
	if 'insights' in self.analysis_results:
	report_text += "\n\n? ��n�}��\n"
	for i, insight in enumerate(self.analysis_results['insights'], 1):
	report_text += f"? {insight}\n"

	# ��ܤ�r
	ax.text(0.05, 0.95, report_text, transform=ax.transAxes, fontsize=12,
	verticalalignment='top', fontfamily='monospace',
	bbox=dict(boxstyle="round,pad=0.5", facecolor="lightblue", alpha=0.8))

	plt.tight_layout()
	plt.savefig(save_path, dpi=300, bbox_inches='tight')
	plt.close()
	print(f"�K�n��i�w�x�s: {save_path}")

	if __name__ == "__main__":
	# ��յ�ı�ƾ�
	visualizer = RentalDataVisualizer()

	# ��J��
	visualizer.load_data("output/rental_data.csv")
	visualizer.load_analysis_results("output/analysis_results.json")

	# �ͦ��Ҧ��ı�ƹϪ�
	visualizer.generate_all_visualizations()

	# �ЫغK�n��i
	visualizer.create_summary_report()