Deevyankar commited on
Commit
847e426
·
verified ·
1 Parent(s): fd38fe9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +162 -0
app.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import pandas as pd
3
+ import gradio as gr
4
+
5
+ def _load_excel(file_obj):
6
+ """
7
+ file_obj is a tempfile-like object from Gradio.
8
+ Returns: (file_bytes, excel_file, sheet_names)
9
+ """
10
+ if file_obj is None:
11
+ raise ValueError("No file uploaded.")
12
+
13
+ # Gradio passes an object with a .name (path)
14
+ file_path = getattr(file_obj, "name", None)
15
+ if not file_path:
16
+ raise ValueError("Invalid uploaded file object.")
17
+
18
+ with open(file_path, "rb") as f:
19
+ file_bytes = f.read()
20
+
21
+ if not file_bytes:
22
+ raise ValueError("Uploaded file is empty.")
23
+
24
+ # Read workbook
25
+ excel = pd.ExcelFile(io.BytesIO(file_bytes), engine="openpyxl")
26
+ sheets = excel.sheet_names
27
+ if not sheets:
28
+ raise ValueError("No sheets found in this workbook.")
29
+
30
+ return file_bytes, excel, sheets
31
+
32
+
33
+ def on_upload(file_obj):
34
+ """
35
+ Triggered when user uploads the Excel file.
36
+ """
37
+ try:
38
+ file_bytes, excel, sheets = _load_excel(file_obj)
39
+
40
+ # Auto-load first sheet
41
+ sheet = sheets[0]
42
+ df = pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet, engine="openpyxl")
43
+
44
+ if df is None or df.empty:
45
+ rows, cols, missing = 0, 0, 0
46
+ else:
47
+ rows, cols = df.shape
48
+ missing = int(df.isna().sum().sum())
49
+
50
+ return (
51
+ gr.Dropdown(choices=sheets, value=sheet, interactive=True),
52
+ df,
53
+ rows,
54
+ cols,
55
+ missing,
56
+ file_bytes, # store bytes in State
57
+ sheet, # store selected sheet in State
58
+ )
59
+
60
+ except Exception as e:
61
+ # Reset UI safely
62
+ return (
63
+ gr.Dropdown(choices=[], value=None, interactive=False),
64
+ pd.DataFrame(),
65
+ 0,
66
+ 0,
67
+ 0,
68
+ None,
69
+ None,
70
+ )
71
+
72
+
73
+ def on_sheet_change(sheet_name, file_bytes):
74
+ """
75
+ When sheet is changed, reload dataframe from bytes.
76
+ """
77
+ try:
78
+ if not file_bytes:
79
+ raise ValueError("Please upload an Excel file first.")
80
+ if not sheet_name:
81
+ raise ValueError("Please select a sheet.")
82
+
83
+ df = pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet_name, engine="openpyxl")
84
+
85
+ if df is None or df.empty:
86
+ rows, cols, missing = 0, 0, 0
87
+ else:
88
+ rows, cols = df.shape
89
+ missing = int(df.isna().sum().sum())
90
+
91
+ return df, rows, cols, missing, sheet_name
92
+
93
+ except Exception:
94
+ return pd.DataFrame(), 0, 0, 0, None
95
+
96
+
97
+ def make_clean_csv(file_bytes, sheet_name, drop_empty_cols=True, drop_empty_rows=True):
98
+ """
99
+ Create a cleaned CSV (bytes) for download.
100
+ """
101
+ if not file_bytes:
102
+ raise ValueError("Please upload an Excel file first.")
103
+ if not sheet_name:
104
+ raise ValueError("Please select a sheet first.")
105
+
106
+ df = pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet_name, engine="openpyxl")
107
+
108
+ if drop_empty_cols:
109
+ df = df.dropna(axis=1, how="all")
110
+ if drop_empty_rows:
111
+ df = df.dropna(axis=0, how="all")
112
+
113
+ csv_bytes = df.to_csv(index=False).encode("utf-8")
114
+ filename = f"cleaned__{sheet_name}.csv"
115
+ return (filename, csv_bytes)
116
+
117
+
118
+ with gr.Blocks(title="CGA Excel Analyzer") as demo:
119
+ gr.Markdown("## 📊 CGA Excel Analyzer (Gradio)")
120
+ gr.Markdown("Upload an Excel file (.xlsx). Select a sheet, preview it, see basic stats, and download cleaned CSV.")
121
+
122
+ file_state = gr.State(None) # stores raw Excel bytes
123
+ sheet_state = gr.State(None) # stores selected sheet name
124
+
125
+ with gr.Row():
126
+ upload = gr.File(label="Upload Excel (.xlsx)", file_types=[".xlsx"])
127
+ sheet_dd = gr.Dropdown(label="Sheet", choices=[], interactive=False)
128
+
129
+ with gr.Row():
130
+ rows_out = gr.Number(label="Rows", value=0, precision=0)
131
+ cols_out = gr.Number(label="Columns", value=0, precision=0)
132
+ missing_out = gr.Number(label="Missing cells", value=0, precision=0)
133
+
134
+ df_view = gr.Dataframe(label="Preview", interactive=False, wrap=True)
135
+
136
+ with gr.Accordion("🧹 Cleaning options (for CSV export)", open=False):
137
+ drop_cols = gr.Checkbox(label="Drop fully empty columns", value=True)
138
+ drop_rows = gr.Checkbox(label="Drop fully empty rows", value=True)
139
+
140
+ download_btn = gr.Button("⬇️ Download cleaned CSV")
141
+ download_file = gr.File(label="Your cleaned CSV")
142
+
143
+ # Events
144
+ upload.change(
145
+ fn=on_upload,
146
+ inputs=[upload],
147
+ outputs=[sheet_dd, df_view, rows_out, cols_out, missing_out, file_state, sheet_state],
148
+ )
149
+
150
+ sheet_dd.change(
151
+ fn=on_sheet_change,
152
+ inputs=[sheet_dd, file_state],
153
+ outputs=[df_view, rows_out, cols_out, missing_out, sheet_state],
154
+ )
155
+
156
+ download_btn.click(
157
+ fn=make_clean_csv,
158
+ inputs=[file_state, sheet_state, drop_cols, drop_rows],
159
+ outputs=[download_file],
160
+ )
161
+
162
+ demo.launch()