venkatl commited on
Commit
bc7d030
·
verified ·
1 Parent(s): f557181

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. README.md +7 -8
  2. a.py +31 -0
  3. app.py +321 -0
  4. pmusha.xlsx +0 -0
  5. requirements.txt +64 -0
README.md CHANGED
@@ -1,12 +1,11 @@
1
  ---
2
- title: Padmavathi Ws
3
- emoji: 🏢
4
- colorFrom: green
5
- colorTo: purple
6
- sdk: gradio
7
- sdk_version: 6.1.0
8
  app_file: app.py
9
- pinned: false
 
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
1
  ---
2
+ title: padmavathi_ws
 
 
 
 
 
3
  app_file: app.py
4
+ sdk: gradio
5
+ sdk_version: 6.0.1
6
+ python_version: 3.12.10
7
  ---
8
 
9
+
10
+ Telugu Regex
11
+ [\u0C00-\u0C7F]+
a.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ citation ="""
2
+ # **Womens Studies Data Analysis Tool**
3
+ Developed for
4
+ **Dr. Padmavathi**
5
+ **Department Of Women Studies**
6
+ **Sri Padmavathi Mahila Visvavidyalayam (SPMVV), Tirupati**
7
+
8
+ ---
9
+
10
+ ### **📌 Data Ownership**
11
+ All data presented, analyzed, or processed in this application
12
+ **belongs to the Department of Women Studies, SPMVV**
13
+ and has been collected exclusively under their research activities.
14
+
15
+ Any reuse, redistribution, or publication of the data
16
+ **requires explicit written permission** from the institution.
17
+
18
+ ---
19
+
20
+ ### **📚 Suggested Research Citation (Copy & Use)**
21
+
22
+ > **Dr Padmavathi, [Department Of Women Studies], Sri Padmavathi Mahila Visvavidyalayam (SPMVV).**
23
+ > *Data Analysis Dataset, 2025.*
24
+ > Department of Women Studies, SPMVV, Tirupati.
25
+
26
+ ---
27
+
28
+ If you use this tool or the data in academic work,
29
+ **please cite using the above citation format.**
30
+ """
31
+
app.py ADDED
@@ -0,0 +1,321 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+ import seaborn as sns
5
+ import gradio as gr
6
+ from io import BytesIO
7
+ import base64
8
+ import random
9
+ import scipy.stats as ss
10
+
11
+ from PIL import Image
12
+
13
+ def fig_to_pil(fig):
14
+ buf = BytesIO()
15
+ fig.savefig(buf, format="png", bbox_inches="tight")
16
+ buf.seek(0)
17
+ return Image.open(buf)
18
+
19
+ # -----------------------------
20
+ # Load Data
21
+ # -----------------------------
22
+ df = pd.read_excel("pmusha.xlsx")
23
+
24
+ numeric_cols = df.select_dtypes(include=['number']).columns
25
+ categorical_cols = df.select_dtypes(include=['object', 'category']).columns
26
+
27
+ # ----------------------------------------------------
28
+ # 1. Descriptive Statistics
29
+ # ----------------------------------------------------
30
+ def get_descriptive_stats():
31
+ stats = df[numeric_cols].describe().T
32
+ stats = stats.reset_index().rename(columns={"index": "Feature"})
33
+ return stats
34
+
35
+ def download_keyword_counts(df):
36
+ r = random.randint(1,1000)
37
+ path = f"keyword_counts_{r}.csv"
38
+ df.to_csv(path, index=False)
39
+ return path
40
+
41
+ # ----------------------------------------------------
42
+ # 2. Keyword Frequency Table + Plots
43
+ # ----------------------------------------------------
44
+
45
+ def keyword_frequency(column):
46
+ series = df[column].dropna().astype(str).str.split(',').explode().str.strip()
47
+ counts = series.value_counts().reset_index()
48
+ counts.columns = ["Keyword", "Count"]
49
+
50
+ # --- BAR CHART (matplotlib → PIL) ---
51
+ fig_bar, ax_bar = plt.subplots(figsize=(8,4))
52
+ ax_bar.bar(counts["Keyword"].head(15), counts["Count"].head(15))
53
+ ax_bar.set_title(f"Top Keywords in {column}")
54
+ ax_bar.set_xticklabels(counts["Keyword"].head(15), rotation=45, ha='right')
55
+ bar_img = fig_to_pil(fig_bar)
56
+ plt.close(fig_bar)
57
+
58
+ # --- PIE CHART (matplotlib → PIL) ---
59
+ fig_pie, ax_pie = plt.subplots(figsize=(6,6))
60
+ ax_pie.pie(
61
+ counts["Count"].head(10),
62
+ labels=counts["Keyword"].head(10),
63
+ autopct="%1.1f%%"
64
+ )
65
+ ax_pie.set_title(f"Pie Chart {column} (Distribution)")
66
+ pie_img = fig_to_pil(fig_pie)
67
+ plt.close(fig_pie)
68
+
69
+ # --- HORIZONTAL BAR CHART ---
70
+ fig_hbar, ax_hbar = plt.subplots(figsize=(8,6))
71
+ ax_hbar.barh(counts["Keyword"].head(15), counts["Count"].head(15))
72
+ ax_hbar.set_title(f"Top Keywords in {column} (Horizontal Bar)")
73
+ plt.tight_layout()
74
+ hbar_img = fig_to_pil(fig_hbar)
75
+ plt.close(fig_hbar)
76
+
77
+ # --- PARETO CHART (80/20) ---
78
+ counts_sorted = counts.sort_values("Count", ascending=False)
79
+ cum_percentage = (counts_sorted["Count"].cumsum() / counts_sorted["Count"].sum()) * 100
80
+
81
+ fig_pareto, ax1 = plt.subplots(figsize=(8,4))
82
+ ax1.bar(counts_sorted["Keyword"].head(15), counts_sorted["Count"].head(15), color='skyblue')
83
+ ax2 = ax1.twinx()
84
+ ax2.plot(counts_sorted["Keyword"].head(15), cum_percentage.head(15), color='red', marker="o")
85
+ ax1.set_xticklabels(counts_sorted["Keyword"].head(15), rotation=45, ha='right')
86
+ ax1.set_title(f"Pareto Analysis of {column}")
87
+ pareto_img = fig_to_pil(fig_pareto)
88
+ plt.close(fig_pareto)
89
+
90
+ # --- SCATTER PLOT (Rank vs Frequency) ---
91
+ counts["Rank"] = range(1, len(counts) + 1)
92
+
93
+ fig_scatter, ax_scatter = plt.subplots(figsize=(6,4))
94
+ ax_scatter.scatter(counts["Rank"], counts["Count"])
95
+ ax_scatter.set_title(f"Rank vs Frequency for {column}")
96
+ ax_scatter.set_xlabel("Rank (1 = most common)")
97
+ ax_scatter.set_ylabel("Frequency")
98
+ scatter_img = fig_to_pil(fig_scatter)
99
+ plt.close(fig_scatter)
100
+
101
+ # --- CUMULATIVE DISTRIBUTION PLOT ---
102
+ fig_cum, ax_cum = plt.subplots(figsize=(6,4))
103
+ ax_cum.plot(cum_percentage.values)
104
+ ax_cum.set_title(f"Cumulative Distribution of {column}")
105
+ ax_cum.set_ylabel("Cumulative %")
106
+ ax_cum.set_xlabel("Keyword Rank")
107
+ cum_img = fig_to_pil(fig_cum)
108
+ plt.close(fig_cum)
109
+
110
+
111
+ return counts, bar_img, pie_img, hbar_img, pareto_img, scatter_img, cum_img
112
+
113
+ # ----------------------------------------------------
114
+ # 3. Correlation Explorer
115
+ # ----------------------------------------------------
116
+ def explore_two_columns(col1, col2):
117
+ c1 = df[col1]
118
+ c2 = df[col2]
119
+
120
+ images = []
121
+ result_text = ""
122
+
123
+ # NUMERIC vs NUMERIC
124
+ if col1 in numeric_cols and col2 in numeric_cols:
125
+ # Pearson
126
+ corr = c1.corr(c2)
127
+ result_text = f"Pearson Correlation = {corr:.4f}"
128
+
129
+ # Scatter
130
+ fig, ax = plt.subplots(figsize=(6,4))
131
+ ax.scatter(c1, c2)
132
+ ax.set_xlabel(col1)
133
+ ax.set_ylabel(col2)
134
+ ax.set_title(f"{col1} vs {col2} (Scatter)")
135
+ images.append(fig_to_pil(fig))
136
+ plt.close(fig)
137
+
138
+ # Regression
139
+ fig, ax = plt.subplots(figsize=(6,4))
140
+ sns.regplot(x=c1, y=c2, ax=ax)
141
+ ax.set_title("Regression Line")
142
+ images.append(fig_to_pil(fig))
143
+ plt.close(fig)
144
+
145
+ # Distributions
146
+ fig, ax = plt.subplots(figsize=(6,4))
147
+ sns.histplot(c1, color="blue", kde=True, label=col1)
148
+ sns.histplot(c2, color="orange", kde=True, label=col2)
149
+ ax.legend()
150
+ ax.set_title("Distribution Comparison")
151
+ images.append(fig_to_pil(fig))
152
+ plt.close(fig)
153
+
154
+ print(result_text)
155
+ return result_text, None, images[0], images[1], images[2]
156
+
157
+ # CATEGORICAL vs CATEGORICAL
158
+ if col1 in categorical_cols and col2 in categorical_cols:
159
+ confusion = pd.crosstab(c1, c2)
160
+ v = cramers_v(confusion)
161
+ result_text = f"Cramér’s V = {v:.4f}"
162
+
163
+ conf = pd.crosstab(c1,c2, margins=True, margins_name="Total")
164
+ conf_table = conf.reset_index()
165
+ conf_table.columns = ["Category_1"] + list(conf.columns)
166
+
167
+ # Heatmap
168
+ fig, ax = plt.subplots(figsize=(6,4))
169
+ sns.heatmap(confusion, cmap="Blues", annot=True, fmt="d")
170
+ ax.set_title("Crosstab Heatmap")
171
+ images.append(fig_to_pil(fig))
172
+ plt.close(fig)
173
+
174
+ # Bar chart
175
+ fig, ax = plt.subplots(figsize=(6,4))
176
+ confusion.sum(axis=1).plot(kind='bar', ax=ax)
177
+ ax.set_title(f"Correlation between {col1} and {col2}")
178
+ images.append(fig_to_pil(fig))
179
+ plt.close(fig)
180
+
181
+ print(result_text)
182
+ return result_text, conf_table, images[0], images[1], None
183
+
184
+ # MIXED TYPES (numeric + categorical)
185
+ # Ensure correct assignment
186
+ if col1 in categorical_cols and col2 in numeric_cols:
187
+ cat = col1; num = col2
188
+ else:
189
+ cat = col2; num = col1
190
+
191
+ result_text = f"Numeric vs Categorical Analysis ({num} by {cat})"
192
+
193
+ # Boxplot
194
+ fig, ax = plt.subplots(figsize=(6,4))
195
+ sns.boxplot(x=df[cat], y=df[num], ax=ax)
196
+ ax.set_title("Boxplot")
197
+ plt.xticks(rotation=45, ha='right')
198
+ images.append(fig_to_pil(fig))
199
+ plt.close(fig)
200
+
201
+ # Violin plot
202
+ fig, ax = plt.subplots(figsize=(6,4))
203
+ sns.violinplot(x=df[cat], y=df[num], ax=ax)
204
+ ax.set_title("Violin Plot")
205
+ plt.xticks(rotation=45, ha='right')
206
+ images.append(fig_to_pil(fig))
207
+ plt.close(fig)
208
+
209
+ print(result_text)
210
+ return result_text, None, images[0], images[1], None
211
+
212
+ def cramers_v(confusion_matrix):
213
+ """ Cramér's V for categorical correlation """
214
+ chi2 = ss.chi2_contingency(confusion_matrix)[0]
215
+ n = confusion_matrix.sum().sum()
216
+ r, k = confusion_matrix.shape
217
+ return np.sqrt(chi2 / (n * (min(r, k) - 1)))
218
+
219
+
220
+ def compute_correlation(col1, col2):
221
+ c1 = df[col1]
222
+ c2 = df[col2]
223
+
224
+ # Case 1: numeric vs numeric
225
+ if col1 in numeric_cols and col2 in numeric_cols:
226
+ corr = c1.corr(c2)
227
+ return f"Pearson Correlation = {corr:.4f}", None
228
+
229
+ # Case 2: categorical vs categorical → Cramér’s V
230
+ if col1 in categorical_cols and col2 in categorical_cols:
231
+ confusion = pd.crosstab(c1, c2)
232
+ v = cramers_v(confusion)
233
+ return f"Cramér’s V = {v:.4f}", confusion
234
+
235
+ # Case 3: keyword frequency vs numeric/categorical
236
+ # Convert col1 or col2 (if comma-separated) into frequency counts
237
+ def keyword_expand(col):
238
+ return df[col].dropna().astype(str).str.split(',').explode().str.strip()
239
+
240
+ if col1 in categorical_cols:
241
+ k = keyword_expand(col1)
242
+ k_counts = k.value_counts()
243
+ df_k = df.assign(**{f"{col1}_KEYWORD_COUNTS": df[col1].fillna("").apply(
244
+ lambda x: sum([k_counts.get(i.strip(), 0) for i in x.split(',') if i.strip()])
245
+ )})
246
+ c1 = df_k[f"{col1}_KEYWORD_COUNTS"]
247
+
248
+ if col2 in categorical_cols:
249
+ k = keyword_expand(col2)
250
+ k_counts = k.value_counts()
251
+ df_k = df.assign(**{f"{col2}_KEYWORD_COUNTS": df[col2].fillna("").apply(
252
+ lambda x: sum([k_counts.get(i.strip(), 0) for i in x.split(',') if i.strip()])
253
+ )})
254
+ c2 = df_k[f"{col2}_KEYWORD_COUNTS"]
255
+
256
+ corr = c1.corr(c2)
257
+ return f"Keyword-Frequency Based Correlation = {corr:.4f}", None
258
+
259
+
260
+ # ----------------------------------------------------
261
+ # Gradio UI
262
+ # ----------------------------------------------------
263
+ with gr.Blocks(title="DATA ANALYSIS APP") as app:
264
+
265
+ gr.Markdown("# 📊 Youth Nutritional Data Analysis System \nUpload → Analyse → Export\n Developed by Dr. Indira Priyadarsini")
266
+ with gr.Tab("ℹ️ About & Citation"):
267
+ from a import citation
268
+ gr.Markdown(citation)
269
+ with gr.Tab("1️⃣ Descriptive Statistics"):
270
+ btn_stats = gr.Button("Generate Stats")
271
+ stats_out = gr.Dataframe()
272
+ btn_stats.click(get_descriptive_stats, outputs=stats_out)
273
+
274
+ with gr.Tab("2️⃣ Keyword Frequency Explorer"):
275
+ col_select = gr.Dropdown(choices=list(categorical_cols), label="Select Column")
276
+ freq_table = gr.Dataframe(label="Keyword Counts")
277
+ bar_plot = gr.Image(label="Bar Chart")
278
+ pie_img = gr.Image(label="Pie Chart")
279
+ hbar_img = gr.Image(label="Horizontal Bar Chart")
280
+ pareto_img = gr.Image(label="Pareto Chart")
281
+ scatter_img = gr.Image(label="Rank vs Frequency Scatter")
282
+ cum_img = gr.Image(label="Cumulative Distribution")
283
+
284
+ download_btn = gr.Button("Download as CSV")
285
+ download_file = gr.File(label="Download File")
286
+ col_select.change(keyword_frequency,
287
+ inputs=col_select,
288
+ outputs=[freq_table, bar_plot, pie_img,hbar_img, pareto_img, scatter_img, cum_img])
289
+ download_btn.click(
290
+ download_keyword_counts,
291
+ inputs=freq_table,
292
+ outputs=download_file
293
+ )
294
+
295
+ with gr.Tab("4️⃣ Two-Column Relationship Explorer"):
296
+ colA = gr.Dropdown(choices=df.columns.tolist(), label="Column A")
297
+ colB = gr.Dropdown(choices=df.columns.tolist(), label="Column B")
298
+ btn_rel = gr.Button("Explore Relationship")
299
+
300
+ rel_text = gr.Textbox(label="Summary")
301
+ rel_table = gr.Dataframe(label="Crosstab (if categorical)")
302
+ rel_img1 = gr.Image()
303
+ rel_img2 = gr.Image()
304
+ rel_img3 = gr.Image()
305
+
306
+ btn_rel.click(
307
+ explore_two_columns,
308
+ inputs=[colA, colB],
309
+ outputs=[rel_text, rel_table, rel_img1, rel_img2, rel_img3]
310
+ )
311
+
312
+ with gr.Tab("3️⃣ Correlation Explorer"):
313
+ col_select = gr.Dropdown(choices=categorical_cols.tolist(), label="Select Column")
314
+ col1 = gr.Dropdown(choices=df.columns.tolist(), label="Column 1")
315
+ col2 = gr.Dropdown(choices=df.columns.tolist(), label="Column 2")
316
+ corr_btn = gr.Button("Compute Correlation")
317
+ corr_text = gr.Textbox(label="Correlation Result")
318
+ confusion_out = gr.Dataframe(label="Categorical Crosstab (if applicable)")
319
+ corr_btn.click(compute_correlation, inputs=[col1, col2], outputs=[corr_text, confusion_out])
320
+
321
+ app.launch(theme=gr.themes.Monochrome())
pmusha.xlsx ADDED
Binary file (22.7 kB). View file
 
requirements.txt ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==24.1.0
2
+ annotated-doc==0.0.4
3
+ annotated-types==0.7.0
4
+ anyio==4.11.0
5
+ brotli==1.2.0
6
+ cerebras_cloud_sdk==1.56.1
7
+ certifi==2025.10.5
8
+ click==8.3.1
9
+ contourpy==1.3.3
10
+ cycler==0.12.1
11
+ distro==1.9.0
12
+ et_xmlfile==2.0.0
13
+ fastapi==0.122.0
14
+ ffmpy==1.0.0
15
+ filelock==3.20.0
16
+ fonttools==4.60.1
17
+ fsspec==2025.10.0
18
+ gradio==6.0.1
19
+ gradio_client==2.0.0
20
+ groovy==0.1.2
21
+ h11==0.16.0
22
+ hf-xet==1.2.0
23
+ httpcore==1.0.9
24
+ httpx==0.28.1
25
+ huggingface_hub==1.1.5
26
+ idna==3.11
27
+ Jinja2==3.1.6
28
+ kiwisolver==1.4.9
29
+ markdown-it-py==4.0.0
30
+ MarkupSafe==3.0.3
31
+ matplotlib==3.10.7
32
+ mdurl==0.1.2
33
+ numpy==2.3.5
34
+ openpyxl==3.1.5
35
+ orjson==3.11.4
36
+ packaging==25.0
37
+ pandas==2.3.3
38
+ pillow==12.0.0
39
+ pydantic==2.12.3
40
+ pydantic_core==2.41.4
41
+ pydub==0.25.1
42
+ Pygments==2.19.2
43
+ pyparsing==3.2.5
44
+ python-dateutil==2.9.0.post0
45
+ python-multipart==0.0.20
46
+ pytz==2025.2
47
+ PyYAML==6.0.3
48
+ rich==14.2.0
49
+ safehttpx==0.1.7
50
+ scipy==1.16.3
51
+ seaborn==0.13.2
52
+ semantic-version==2.10.0
53
+ shellingham==1.5.4
54
+ six==1.17.0
55
+ sniffio==1.3.1
56
+ starlette==0.50.0
57
+ tomlkit==0.13.3
58
+ tqdm==4.67.1
59
+ typer==0.20.0
60
+ typer-slim==0.20.0
61
+ typing-inspection==0.4.2
62
+ typing_extensions==4.15.0
63
+ tzdata==2025.2
64
+ uvicorn==0.38.0