Ashar086 commited on
Commit
4fcdb80
·
verified ·
1 Parent(s): 568dff8

Create report_generator.py

Browse files
Files changed (1) hide show
  1. report_generator.py +45 -0
report_generator.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import matplotlib.pyplot as plt
3
+ import seaborn as sns
4
+ from io import StringIO
5
+
6
+ class ReportGenerator:
7
+ def generate(self, data):
8
+ report = StringIO()
9
+ report.write("Data Analysis Report\n\n")
10
+
11
+ # Data summary
12
+ report.write("1. Data Summary\n")
13
+ report.write(data.describe().to_string())
14
+ report.write("\n\n")
15
+
16
+ # Missing values
17
+ report.write("2. Missing Values\n")
18
+ missing_values = data.isnull().sum()
19
+ report.write(missing_values.to_string())
20
+ report.write("\n\n")
21
+
22
+ # Correlation analysis
23
+ report.write("3. Correlation Analysis\n")
24
+ numeric_columns = data.select_dtypes(include=[pd.np.number]).columns
25
+ if len(numeric_columns) > 1:
26
+ correlation_matrix = data[numeric_columns].corr()
27
+ report.write(correlation_matrix.to_string())
28
+ else:
29
+ report.write("Not enough numeric columns for correlation analysis.")
30
+ report.write("\n\n")
31
+
32
+ # Categorical data analysis
33
+ categorical_columns = data.select_dtypes(include=['object']).columns
34
+ if len(categorical_columns) > 0:
35
+ report.write("4. Categorical Data Analysis\n")
36
+ for column in categorical_columns:
37
+ report.write(f"{column} value counts:\n")
38
+ report.write(data[column].value_counts().to_string())
39
+ report.write("\n\n")
40
+
41
+ # Data visualizations
42
+ report.write("5. Data Visualizations\n")
43
+ report.write("Please refer to the generated plots in the Streamlit app for visual representations of the data.\n\n")
44
+
45
+ return report.getvalue()