File size: 1,671 Bytes
4fcdb80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from io import StringIO

class ReportGenerator:
    def generate(self, data):
        report = StringIO()
        report.write("Data Analysis Report\n\n")

        # Data summary
        report.write("1. Data Summary\n")
        report.write(data.describe().to_string())
        report.write("\n\n")

        # Missing values
        report.write("2. Missing Values\n")
        missing_values = data.isnull().sum()
        report.write(missing_values.to_string())
        report.write("\n\n")

        # Correlation analysis
        report.write("3. Correlation Analysis\n")
        numeric_columns = data.select_dtypes(include=[pd.np.number]).columns
        if len(numeric_columns) > 1:
            correlation_matrix = data[numeric_columns].corr()
            report.write(correlation_matrix.to_string())
        else:
            report.write("Not enough numeric columns for correlation analysis.")
        report.write("\n\n")

        # Categorical data analysis
        categorical_columns = data.select_dtypes(include=['object']).columns
        if len(categorical_columns) > 0:
            report.write("4. Categorical Data Analysis\n")
            for column in categorical_columns:
                report.write(f"{column} value counts:\n")
                report.write(data[column].value_counts().to_string())
                report.write("\n\n")

        # Data visualizations
        report.write("5. Data Visualizations\n")
        report.write("Please refer to the generated plots in the Streamlit app for visual representations of the data.\n\n")

        return report.getvalue()