YashsharmaPhD commited on
Commit
0f7f6fa
·
verified ·
1 Parent(s): 2068142

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -25
app.py CHANGED
@@ -1,5 +1,4 @@
1
  import os
2
- import shutil
3
  import zipfile
4
  import pandas as pd
5
  import nltk
@@ -12,28 +11,27 @@ import plotly.express as px
12
  import gradio as gr
13
  import tempfile
14
 
15
- # Download NLTK assets
16
  nltk.download('punkt')
17
  nltk.download('stopwords')
18
  stop_words = set(stopwords.words('english'))
19
 
20
- # Global variable
21
  embed_df = pd.DataFrame()
22
 
23
  def analyze_bigrams(zip_file, perplexity):
24
  global embed_df
25
  if zip_file is None:
26
- return "Please upload a ZIP file containing .txt files.", None
27
 
28
- # Extract uploaded zip to a temporary directory
29
  with tempfile.TemporaryDirectory() as tmpdir:
30
- with zipfile.ZipFile(zip_file.name, 'r') as zip_ref:
31
  zip_ref.extractall(tmpdir)
32
 
33
- # Gather all .txt files
34
  txt_files = [os.path.join(tmpdir, f) for f in os.listdir(tmpdir) if f.endswith(".txt")]
35
  if not txt_files:
36
- return "No .txt files found in the ZIP file.", None
37
 
38
  all_texts = []
39
  for file_path in txt_files:
@@ -69,37 +67,34 @@ def analyze_bigrams(zip_file, perplexity):
69
  size='count', title="Bigram Embeddings", template='plotly_white')
70
  fig.update_layout(dragmode='lasso')
71
 
72
- return "Bigram analysis complete. Select points on the plot below.", fig
73
 
74
- def generate_bar_plot(selected_indices):
75
- global embed_df
76
- if not embed_df.empty and selected_indices:
77
- selected_df = embed_df.iloc[selected_indices]
78
- fig = px.bar(selected_df.sort_values("count", ascending=False),
79
- x="count", y="bigram", orientation="h",
80
- title="Selected Bigram Frequencies")
81
- return fig
82
- return None
83
 
84
  with gr.Blocks() as demo:
85
  gr.Markdown("## 📦 Upload a ZIP of .txt files to Analyze Bigrams")
86
 
87
-
88
- zip_input = gr.File(label="Upload ZIP File of .txt Files", type="filepath") # ✅ FIXED
89
-
90
  perplexity_input = gr.Number(label="t-SNE Perplexity", value=30)
91
-
92
  generate_btn = gr.Button("Generate Scatter Plot")
 
93
  status_output = gr.Label()
94
  scatter_plot = gr.Plot()
 
95
  bar_plot = gr.Plot()
96
 
97
  generate_btn.click(fn=analyze_bigrams,
98
  inputs=[zip_input, perplexity_input],
99
- outputs=[status_output, scatter_plot])
100
 
101
- scatter_plot.select(fn=generate_bar_plot,
102
- inputs=[],
103
  outputs=bar_plot)
104
 
105
  demo.launch()
 
1
  import os
 
2
  import zipfile
3
  import pandas as pd
4
  import nltk
 
11
  import gradio as gr
12
  import tempfile
13
 
14
+ # NLTK setup
15
  nltk.download('punkt')
16
  nltk.download('stopwords')
17
  stop_words = set(stopwords.words('english'))
18
 
19
+ # Global variable to store embeddings dataframe
20
  embed_df = pd.DataFrame()
21
 
22
  def analyze_bigrams(zip_file, perplexity):
23
  global embed_df
24
  if zip_file is None:
25
+ return "Please upload a ZIP file containing .txt files.", None, None
26
 
27
+ # Extract to temp dir
28
  with tempfile.TemporaryDirectory() as tmpdir:
29
+ with zipfile.ZipFile(zip_file, 'r') as zip_ref:
30
  zip_ref.extractall(tmpdir)
31
 
 
32
  txt_files = [os.path.join(tmpdir, f) for f in os.listdir(tmpdir) if f.endswith(".txt")]
33
  if not txt_files:
34
+ return "No .txt files found in the ZIP file.", None, None
35
 
36
  all_texts = []
37
  for file_path in txt_files:
 
67
  size='count', title="Bigram Embeddings", template='plotly_white')
68
  fig.update_layout(dragmode='lasso')
69
 
70
+ return "Bigram analysis complete.", fig, embed_df
71
 
72
+ def generate_bar_plot(selected_df):
73
+ if selected_df is None or selected_df.empty:
74
+ return None
75
+ fig = px.bar(selected_df.sort_values("count", ascending=False),
76
+ x="count", y="bigram", orientation="h",
77
+ title="Selected Bigram Frequencies")
78
+ return fig
 
 
79
 
80
  with gr.Blocks() as demo:
81
  gr.Markdown("## 📦 Upload a ZIP of .txt files to Analyze Bigrams")
82
 
83
+ zip_input = gr.File(label="Upload ZIP File of .txt Files", type="filepath")
 
 
84
  perplexity_input = gr.Number(label="t-SNE Perplexity", value=30)
 
85
  generate_btn = gr.Button("Generate Scatter Plot")
86
+
87
  status_output = gr.Label()
88
  scatter_plot = gr.Plot()
89
+ bigram_table = gr.Dataframe(label="Top Bigrams", interactive=True)
90
  bar_plot = gr.Plot()
91
 
92
  generate_btn.click(fn=analyze_bigrams,
93
  inputs=[zip_input, perplexity_input],
94
+ outputs=[status_output, scatter_plot, bigram_table])
95
 
96
+ bigram_table.change(fn=generate_bar_plot,
97
+ inputs=bigram_table,
98
  outputs=bar_plot)
99
 
100
  demo.launch()