| import gradio as gr |
| import numpy as np |
| import matplotlib.pyplot as plt |
| from scipy import stats |
| import random |
| import warnings |
| warnings.filterwarnings('ignore') |
|
|
| |
| plt.rcParams['font.sans-serif'] = ['Microsoft JhengHei', 'Arial Unicode MS', 'SimHei'] |
| plt.rcParams['axes.unicode_minus'] = False |
|
|
| def generate_random_data(n, min_val=1, max_val=100, target_mean=None, target_std=None, target_skew=None, target_kurt=None): |
| """生成隨機數據,可選擇性地控制平均數、標準差、偏態和峰態""" |
| np.random.seed(random.randint(1, 10000)) |
| |
| |
| if all(param is None for param in [target_mean, target_std, target_skew, target_kurt]): |
| return [random.randint(min_val, max_val) for _ in range(n)] |
| |
| |
| mean_target = target_mean if target_mean is not None else (min_val + max_val) / 2 |
| std_target = target_std if target_std is not None else (max_val - min_val) / 6 |
| |
| |
| if target_skew is not None and abs(target_skew) > 0.1: |
| |
| if target_skew > 0: |
| |
| shape = max(0.5, 4 / (target_skew ** 2)) |
| scale = 1 |
| data = np.random.gamma(shape, scale, n) |
| else: |
| |
| shape = max(0.5, 4 / (target_skew ** 2)) |
| scale = 1 |
| data = -np.random.gamma(shape, scale, n) |
| else: |
| |
| data = np.random.normal(0, 1, n) |
| |
| |
| if target_kurt is not None and abs(target_kurt) > 0.1: |
| if target_kurt > 0: |
| |
| normal_data = np.random.normal(0, 1, n) |
| concentrated_data = np.random.normal(0, 0.3, n) |
| weight = min(0.8, target_kurt / 5) |
| data = (1 - weight) * normal_data + weight * concentrated_data |
| else: |
| |
| normal_data = np.random.normal(0, 1, n) |
| uniform_data = np.random.uniform(-2, 2, n) |
| weight = min(0.8, abs(target_kurt) / 2) |
| data = (1 - weight) * normal_data + weight * uniform_data |
| |
| |
| if len(set(data)) > 1: |
| data = (data - np.mean(data)) / np.std(data) |
| |
| |
| data = data * std_target + mean_target |
| |
| |
| if min_val is not None and max_val is not None: |
| |
| if target_mean is None and target_std is None: |
| data = np.clip(data, min_val, max_val) |
| |
| return [round(x, 2) for x in data] |
|
|
| def generate_random_data_for_input(n, min_val, max_val, target_mean, target_std, target_skew, target_kurt): |
| """生成隨機數據並返回字符串格式""" |
| |
| mean_val = target_mean if target_mean != 0 else None |
| std_val = target_std if target_std != 0 else None |
| skew_val = target_skew if target_skew != 0 else None |
| kurt_val = target_kurt if target_kurt != 0 else None |
| |
| data = generate_random_data(n, min_val, max_val, mean_val, std_val, skew_val, kurt_val) |
| return ' '.join(map(str, data)) |
|
|
| def calculate_statistics(data_str): |
| """計算統計指標 - 簡化版本,直接使用輸入框的數據""" |
| try: |
| |
| if not data_str.strip(): |
| return "請輸入數據", None, None |
| |
| data = [] |
| for item in data_str.split(): |
| try: |
| data.append(float(item)) |
| except ValueError: |
| return f"無效數字: {item}", None, None |
| |
| if len(data) == 0: |
| return "沒有有效數據", None, None |
| |
| data = np.array(data) |
| |
| |
| |
| arithmetic_mean = np.mean(data) |
| geometric_mean = stats.gmean(data) if np.all(data > 0) else "N/A (需要正數)" |
| median = np.median(data) |
| |
| |
| mode_result = stats.mode(data, keepdims=True) |
| mode_values = mode_result.mode |
| mode_counts = mode_result.count |
| mode_str = f"{mode_values[0]:.2f} (出現 {mode_counts[0]} 次)" if len(mode_values) > 0 else "無眾數" |
| |
| |
| q1 = np.percentile(data, 25) |
| q3 = np.percentile(data, 75) |
| |
| |
| deciles = [np.percentile(data, i*10) for i in range(1, 10)] |
| |
| |
| percentiles = { |
| '5%': np.percentile(data, 5), |
| '10%': np.percentile(data, 10), |
| '25%': q1, |
| '50%': median, |
| '75%': q3, |
| '90%': np.percentile(data, 90), |
| '95%': np.percentile(data, 95) |
| } |
| |
| |
| data_range = np.max(data) - np.min(data) |
| iqr = q3 - q1 |
| std_dev = np.std(data, ddof=1) |
| variance = np.var(data, ddof=1) |
| |
| |
| skewness = stats.skew(data) |
| kurtosis = stats.kurtosis(data) |
| |
| |
| results = f""" |
| 📊 **資料摘要** |
| • 資料點數: {len(data)} 最大值: {np.max(data):.4f} 最小值: {np.min(data):.4f} |
| |
| 📈 **集中趨勢** |
| • 算術平均數: {arithmetic_mean:.4f} |
| • 幾何平均數: {geometric_mean if isinstance(geometric_mean, str) else f'{geometric_mean:.4f}'} |
| • 中位數: {median:.4f} |
| • 眾數: {mode_str} |
| |
| 📏 **分位數** |
| • 第一四分位數 (Q1): {q1:.4f} |
| • 第二四分位數 (Q2/中位數): {median:.4f} |
| • 第三四分位數 (Q3): {q3:.4f} |
| |
| 🔢 **十分位數** |
| • D1: {deciles[0]:.2f}, D2: {deciles[1]:.2f}, D3: {deciles[2]:.2f} |
| • D4: {deciles[3]:.2f}, D5: {deciles[4]:.2f}, D6: {deciles[5]:.2f} |
| • D7: {deciles[6]:.2f}, D8: {deciles[7]:.2f}, D9: {deciles[8]:.2f} |
| |
| 📊 **關鍵百分位數** |
| • P5: {percentiles['5%']:.2f}, P10: {percentiles['10%']:.2f} |
| • P25: {percentiles['25%']:.2f}, P50: {percentiles['50%']:.2f} |
| • P75: {percentiles['75%']:.2f}, P90: {percentiles['90%']:.2f}, P95: {percentiles['95%']:.2f} |
| |
| 📐 **變異性指標** |
| • 全距 (Range): {data_range:.4f} |
| • 四分位距 (IQR): {iqr:.4f} |
| • 變異數: {variance:.4f} |
| • 標準差: {std_dev:.4f} |
| • 變異係數: {(std_dev/arithmetic_mean)*100:.2f}% |
| |
| 📉 **分佈形狀** |
| • 偏態 (Skewness): {skewness:.4f} {'(右偏)' if skewness > 0 else '(左偏)' if skewness < 0 else '(對稱)'} |
| • 峰態 (Kurtosis): {kurtosis:.4f} {'(尖峭)' if kurtosis > 0 else '(平坦)' if kurtosis < 0 else '(正常)'} |
| """ |
| |
| |
| fig, axes = plt.subplots(2, 1, figsize=(8, 10)) |
| fig.suptitle('Statistical analysis visualization', fontsize=16, fontweight='bold') |
| |
| |
| axes[0].hist(data, bins=min(100, len(set(data))), alpha=0.7, color='skyblue', edgecolor='black') |
| axes[0].axvline(arithmetic_mean, color='red', linestyle='--', linewidth=2, label=f'Mean: {arithmetic_mean:.2f}') |
| axes[0].axvline(median, color='green', linestyle='--', linewidth=2, label=f'Median: {median:.2f}') |
| axes[0].set_title('Histograms and Central Tendency', fontsize=14) |
| axes[0].set_xlabel('Value') |
| axes[0].set_ylabel('Frequency') |
| axes[0].legend() |
| axes[0].grid(True, alpha=0.3) |
| |
| |
| box_data = axes[1].boxplot(data, patch_artist=True, widths=0.6, vert=False, whis=[0, 100]) |
| box_data['boxes'][0].set_facecolor('lightblue') |
| box_data['boxes'][0].set_alpha(0.7) |
| axes[1].set_title('Box Plot (Quartiles)', fontsize=14) |
| axes[1].set_xlabel('Value') |
| axes[1].grid(True, alpha=0.3) |
| |
| |
| |
| |
| |
| |
| |
| |
| y_offset = [1.05, 1.10, 1.15, 1.20, 1.25] |
| axes[1].text(q1, y_offset[0], f'Q1: {q1:.2f}', ha='center', fontsize=10) |
| axes[1].text(median, y_offset[1], f'Q2: {median:.2f}', ha='center', fontsize=10, weight='bold') |
| axes[1].text(q3, y_offset[2], f'Q3: {q3:.2f}', ha='center', fontsize=10) |
| axes[1].text(np.min(data), y_offset[3], f'Min: {np.min(data):.2f}', ha='center', fontsize=9, alpha=0.7) |
| axes[1].text(np.max(data), y_offset[4], f'Max: {np.max(data):.2f}', ha='center', fontsize=9, alpha=0.7) |
|
|
|
|
| |
| plt.tight_layout() |
| |
| return results, fig |
| |
| except Exception as e: |
| return f"計算錯誤: {str(e)}", None |
|
|
| def create_interface(): |
| """創建 Gradio 介面""" |
| |
| with gr.Blocks(title="敘述性統計計算機", theme=gr.themes.Soft()) as interface: |
| |
| gr.Markdown(""" |
| # 📊 敘述性統計計算機 |
| |
| 此介面為-國立高雄科技大學 114學年-財管系-財管系二甲 統計學(一) 上課教材 |
| |
| 這個工具可以計算各種統計指標,包括: |
| - **集中趨勢**: 算術平均、幾何平均、中位數、眾數 |
| - **分位數**: 四分位數、十分位數、百分位數 |
| - **變異性**: 全距、四分位距、標準差 |
| - **分佈形狀**: 偏態、峰態 |
| """) |
| |
| with gr.Row(): |
| with gr.Column(scale=1): |
| gr.Markdown("## 📝 資料輸入") |
| |
| |
| default_data = generate_random_data(30, 1, 100) |
| default_data_str = ' '.join(map(str, default_data)) |
| |
| data_input = gr.Textbox( |
| label="資料 (以空格分隔)", |
| value=default_data_str, |
| placeholder="例如: 1 2 3 4 5 6 7 8 9 10", |
| lines=4, |
| info="您可以直接編輯這些數據,或使用下方參數生成新的隨機資料" |
| ) |
| |
| gr.Markdown("### 🎲 隨機資料生成參數") |
| |
| with gr.Row(): |
| n_generate = gr.Slider( |
| minimum=10, |
| maximum=10000, |
| value=100, |
| step=10, |
| label="資料點數 (n)" |
| ) |
| |
| gr.Markdown("#### 📊 基本統計參數") |
| with gr.Row(): |
| target_mean = gr.Number( |
| value=0, |
| label="目標平均數 (μ)", |
| info="設定為 0 則自動計算" |
| ) |
| |
| target_std = gr.Number( |
| value=0, |
| minimum=0, |
| label="目標標準差 (σ)", |
| info="設定為 0 則自動計算" |
| ) |
| |
| gr.Markdown("#### 📏 數值範圍 (當未設定平均數/標準差時使用)") |
| with gr.Row(): |
| min_val = gr.Number( |
| value=1, |
| label="最小值" |
| ) |
| |
| max_val = gr.Number( |
| value=100, |
| label="最大值" |
| ) |
| |
| gr.Markdown("#### 📈 分佈形狀參數") |
| with gr.Row(): |
| target_skew = gr.Slider( |
| minimum=-100.0, |
| maximum=100.0, |
| value=0, |
| step=0.1, |
| label="目標偏態 (Skewness)", |
| info="範圍: -5 到 +5 (負值=左偏,0=對稱,正值=右偏)" |
| ) |
| |
| target_kurt = gr.Slider( |
| minimum=-100.0, |
| maximum=100.0, |
| value=0, |
| step=0.1, |
| label="目標峰態 (Kurtosis)", |
| info="範圍: -2 到 +10 (負值=平坦,0=正常,正值=尖峭)" |
| ) |
| |
| generate_btn = gr.Button("🎲 生成新的隨機資料", variant="secondary", size="sm") |
| calculate_btn = gr.Button("🔍 計算統計指標", variant="primary", size="lg") |
| |
| with gr.Column(scale=2): |
| gr.Markdown("## 📈 計算結果") |
| |
| results_output = gr.Markdown(label="統計結果") |
| |
| plot_output = gr.Plot(label="統計圖表 (直方圖 & 盒形圖)") |
| |
| |
| generate_btn.click( |
| generate_random_data_for_input, |
| inputs=[n_generate, min_val, max_val, target_mean, target_std, target_skew, target_kurt], |
| outputs=[data_input] |
| ) |
| |
| |
| calculate_btn.click( |
| calculate_statistics, |
| inputs=[data_input], |
| outputs=[results_output, plot_output] |
| ) |
| |
| |
| with gr.Accordion("📚 統計指標說明", open=False): |
| gr.Markdown(""" |
| ### 集中趨勢 |
| - **算術平均數**: 所有數值的總和除以個數 |
| - **幾何平均數**: n個正數的n次方根(適用於比率、成長率) |
| - **中位數**: 將數據排序後位於中間的數值 |
| - **眾數**: 出現頻率最高的數值 |
| |
| ### 數值範圍 |
| - **最小值/最大值**: 當未指定平均數和標準差時,用於限制數據範圍 |
| |
| ### 分佈形狀參數 |
| - **四分位數**: 將數據分為四等份的分割點(Q1, Q2, Q3) |
| - **十分位數**: 將數據分為十等份的分割點(D1-D9) |
| - **百分位數**: 將數據分為百等份的分割點(P1-P99) |
| |
| ### 變異性指標 |
| - **全距**: 最大值與最小值的差 |
| - **四分位距**: Q3與Q1的差,反映中間50%數據的散佈 |
| - **標準差**: 衡量數據相對於平均數的離散程度 |
| |
| ### 分佈形狀 |
| - **偏態**: 分佈的對稱性(正值=右偏,負值=左偏) |
| - **峰態**: 分佈的尖銳程度(正值=較尖銳,負值=較平坦) |
| """) |
| |
| return interface |
|
|
| if __name__ == "__main__": |
| interface = create_interface() |
| interface.launch(share=True) |