YashsharmaPhD commited on
Commit
53ed032
·
verified ·
1 Parent(s): 45cf4ac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -20
app.py CHANGED
@@ -17,7 +17,6 @@ nltk.download('punkt')
17
  nltk.download('stopwords')
18
  stop_words = set(stopwords.words('english'))
19
 
20
- # Global DataFrame to hold embeddings and metadata
21
  embed_df = pd.DataFrame()
22
 
23
  def analyze_bigrams(zip_file, perplexity):
@@ -25,20 +24,16 @@ def analyze_bigrams(zip_file, perplexity):
25
  if zip_file is None:
26
  return "Please upload a ZIP file containing .txt files.", None
27
 
28
- # Wrap the binary zip file with BytesIO
29
  zip_stream = io.BytesIO(zip_file)
30
 
31
- # Extract files into a temporary directory
32
  with tempfile.TemporaryDirectory() as tmpdir:
33
  with zipfile.ZipFile(zip_stream, 'r') as zip_ref:
34
  zip_ref.extractall(tmpdir)
35
 
36
- # Find all .txt files
37
  txt_files = [os.path.join(tmpdir, f) for f in os.listdir(tmpdir) if f.endswith(".txt")]
38
  if not txt_files:
39
- return "No .txt files found in the ZIP file.", None
40
 
41
- # Read and tokenize texts
42
  all_texts = []
43
  for path in txt_files:
44
  with open(path, "r", encoding="utf-8") as f:
@@ -73,29 +68,29 @@ def analyze_bigrams(zip_file, perplexity):
73
  size="count", template="plotly_white", title="Bigram t-SNE Projection")
74
  fig.update_layout(dragmode="lasso")
75
 
76
- return "✅ Bigram analysis complete. Select points on the plot.", fig
77
 
78
- def generate_bar_plot(selected_points):
79
  global embed_df
80
- if not selected_points or embed_df.empty:
81
  return None
82
 
83
- selected_indices = [point["pointIndex"] for point in selected_points["points"]]
84
  selected_df = embed_df.iloc[selected_indices]
85
 
86
- bar_fig = px.bar(selected_df.sort_values("count", ascending=False),
87
- x="count", y="bigram", orientation="h",
88
- title="Selected Bigram Frequencies")
89
- return bar_fig
90
 
91
  # Gradio UI
92
  with gr.Blocks() as demo:
93
- gr.Markdown("## 📦 Upload a ZIP of .txt files to Analyze Bigrams with t-SNE and Select to Compare Frequencies")
94
 
95
  zip_input = gr.File(label="Upload ZIP File of .txt Files", type="binary")
96
  perplexity_input = gr.Number(label="t-SNE Perplexity", value=30)
97
 
98
- analyze_btn = gr.Button("Analyze Bigrams")
99
  status = gr.Label()
100
  scatter_plot = gr.Plot()
101
  bar_plot = gr.Plot()
@@ -104,9 +99,8 @@ with gr.Blocks() as demo:
104
  inputs=[zip_input, perplexity_input],
105
  outputs=[status, scatter_plot])
106
 
107
- scatter_plot.select(generate_bar_plot,
108
- inputs=[],
109
- outputs=bar_plot)
110
 
111
- # Enable public sharing (optional for Hugging Face Spaces)
112
  demo.launch(share=True)
 
17
  nltk.download('stopwords')
18
  stop_words = set(stopwords.words('english'))
19
 
 
20
  embed_df = pd.DataFrame()
21
 
22
  def analyze_bigrams(zip_file, perplexity):
 
24
  if zip_file is None:
25
  return "Please upload a ZIP file containing .txt files.", None
26
 
 
27
  zip_stream = io.BytesIO(zip_file)
28
 
 
29
  with tempfile.TemporaryDirectory() as tmpdir:
30
  with zipfile.ZipFile(zip_stream, 'r') as zip_ref:
31
  zip_ref.extractall(tmpdir)
32
 
 
33
  txt_files = [os.path.join(tmpdir, f) for f in os.listdir(tmpdir) if f.endswith(".txt")]
34
  if not txt_files:
35
+ return "No .txt files found.", None
36
 
 
37
  all_texts = []
38
  for path in txt_files:
39
  with open(path, "r", encoding="utf-8") as f:
 
68
  size="count", template="plotly_white", title="Bigram t-SNE Projection")
69
  fig.update_layout(dragmode="lasso")
70
 
71
+ return "✅ Bigram analysis complete. Use lasso to select points.", fig
72
 
73
+ def generate_bar_plot(events):
74
  global embed_df
75
+ if not events or embed_df.empty:
76
  return None
77
 
78
+ selected_indices = [pt["pointIndex"] for pt in events]
79
  selected_df = embed_df.iloc[selected_indices]
80
 
81
+ fig = px.bar(selected_df.sort_values("count", ascending=False),
82
+ x="count", y="bigram", orientation="h",
83
+ title="Selected Bigram Frequencies")
84
+ return fig
85
 
86
  # Gradio UI
87
  with gr.Blocks() as demo:
88
+ gr.Markdown("## 📦 Upload ZIP of .txt files to Analyze Bigrams")
89
 
90
  zip_input = gr.File(label="Upload ZIP File of .txt Files", type="binary")
91
  perplexity_input = gr.Number(label="t-SNE Perplexity", value=30)
92
 
93
+ analyze_btn = gr.Button("Analyze")
94
  status = gr.Label()
95
  scatter_plot = gr.Plot()
96
  bar_plot = gr.Plot()
 
99
  inputs=[zip_input, perplexity_input],
100
  outputs=[status, scatter_plot])
101
 
102
+ gr.Plot.update(scatter_plot, interactive=True)
103
+
104
+ gr.plotly_events(scatter_plot, select_event=True)(generate_bar_plot, inputs=None, outputs=bar_plot)
105
 
 
106
  demo.launch(share=True)