Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -17,7 +17,6 @@ nltk.download('punkt')
|
|
| 17 |
nltk.download('stopwords')
|
| 18 |
stop_words = set(stopwords.words('english'))
|
| 19 |
|
| 20 |
-
# Global DataFrame to hold embeddings and metadata
|
| 21 |
embed_df = pd.DataFrame()
|
| 22 |
|
| 23 |
def analyze_bigrams(zip_file, perplexity):
|
|
@@ -25,20 +24,16 @@ def analyze_bigrams(zip_file, perplexity):
|
|
| 25 |
if zip_file is None:
|
| 26 |
return "Please upload a ZIP file containing .txt files.", None
|
| 27 |
|
| 28 |
-
# Wrap the binary zip file with BytesIO
|
| 29 |
zip_stream = io.BytesIO(zip_file)
|
| 30 |
|
| 31 |
-
# Extract files into a temporary directory
|
| 32 |
with tempfile.TemporaryDirectory() as tmpdir:
|
| 33 |
with zipfile.ZipFile(zip_stream, 'r') as zip_ref:
|
| 34 |
zip_ref.extractall(tmpdir)
|
| 35 |
|
| 36 |
-
# Find all .txt files
|
| 37 |
txt_files = [os.path.join(tmpdir, f) for f in os.listdir(tmpdir) if f.endswith(".txt")]
|
| 38 |
if not txt_files:
|
| 39 |
-
return "No .txt files found
|
| 40 |
|
| 41 |
-
# Read and tokenize texts
|
| 42 |
all_texts = []
|
| 43 |
for path in txt_files:
|
| 44 |
with open(path, "r", encoding="utf-8") as f:
|
|
@@ -73,29 +68,29 @@ def analyze_bigrams(zip_file, perplexity):
|
|
| 73 |
size="count", template="plotly_white", title="Bigram t-SNE Projection")
|
| 74 |
fig.update_layout(dragmode="lasso")
|
| 75 |
|
| 76 |
-
return "✅ Bigram analysis complete.
|
| 77 |
|
| 78 |
-
def generate_bar_plot(
|
| 79 |
global embed_df
|
| 80 |
-
if not
|
| 81 |
return None
|
| 82 |
|
| 83 |
-
selected_indices = [
|
| 84 |
selected_df = embed_df.iloc[selected_indices]
|
| 85 |
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
return
|
| 90 |
|
| 91 |
# Gradio UI
|
| 92 |
with gr.Blocks() as demo:
|
| 93 |
-
gr.Markdown("## 📦 Upload
|
| 94 |
|
| 95 |
zip_input = gr.File(label="Upload ZIP File of .txt Files", type="binary")
|
| 96 |
perplexity_input = gr.Number(label="t-SNE Perplexity", value=30)
|
| 97 |
|
| 98 |
-
analyze_btn = gr.Button("Analyze
|
| 99 |
status = gr.Label()
|
| 100 |
scatter_plot = gr.Plot()
|
| 101 |
bar_plot = gr.Plot()
|
|
@@ -104,9 +99,8 @@ with gr.Blocks() as demo:
|
|
| 104 |
inputs=[zip_input, perplexity_input],
|
| 105 |
outputs=[status, scatter_plot])
|
| 106 |
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
|
| 111 |
-
# Enable public sharing (optional for Hugging Face Spaces)
|
| 112 |
demo.launch(share=True)
|
|
|
|
| 17 |
nltk.download('stopwords')
|
| 18 |
stop_words = set(stopwords.words('english'))
|
| 19 |
|
|
|
|
| 20 |
embed_df = pd.DataFrame()
|
| 21 |
|
| 22 |
def analyze_bigrams(zip_file, perplexity):
|
|
|
|
| 24 |
if zip_file is None:
|
| 25 |
return "Please upload a ZIP file containing .txt files.", None
|
| 26 |
|
|
|
|
| 27 |
zip_stream = io.BytesIO(zip_file)
|
| 28 |
|
|
|
|
| 29 |
with tempfile.TemporaryDirectory() as tmpdir:
|
| 30 |
with zipfile.ZipFile(zip_stream, 'r') as zip_ref:
|
| 31 |
zip_ref.extractall(tmpdir)
|
| 32 |
|
|
|
|
| 33 |
txt_files = [os.path.join(tmpdir, f) for f in os.listdir(tmpdir) if f.endswith(".txt")]
|
| 34 |
if not txt_files:
|
| 35 |
+
return "No .txt files found.", None
|
| 36 |
|
|
|
|
| 37 |
all_texts = []
|
| 38 |
for path in txt_files:
|
| 39 |
with open(path, "r", encoding="utf-8") as f:
|
|
|
|
| 68 |
size="count", template="plotly_white", title="Bigram t-SNE Projection")
|
| 69 |
fig.update_layout(dragmode="lasso")
|
| 70 |
|
| 71 |
+
return "✅ Bigram analysis complete. Use lasso to select points.", fig
|
| 72 |
|
| 73 |
+
def generate_bar_plot(events):
|
| 74 |
global embed_df
|
| 75 |
+
if not events or embed_df.empty:
|
| 76 |
return None
|
| 77 |
|
| 78 |
+
selected_indices = [pt["pointIndex"] for pt in events]
|
| 79 |
selected_df = embed_df.iloc[selected_indices]
|
| 80 |
|
| 81 |
+
fig = px.bar(selected_df.sort_values("count", ascending=False),
|
| 82 |
+
x="count", y="bigram", orientation="h",
|
| 83 |
+
title="Selected Bigram Frequencies")
|
| 84 |
+
return fig
|
| 85 |
|
| 86 |
# Gradio UI
|
| 87 |
with gr.Blocks() as demo:
|
| 88 |
+
gr.Markdown("## 📦 Upload ZIP of .txt files to Analyze Bigrams")
|
| 89 |
|
| 90 |
zip_input = gr.File(label="Upload ZIP File of .txt Files", type="binary")
|
| 91 |
perplexity_input = gr.Number(label="t-SNE Perplexity", value=30)
|
| 92 |
|
| 93 |
+
analyze_btn = gr.Button("Analyze")
|
| 94 |
status = gr.Label()
|
| 95 |
scatter_plot = gr.Plot()
|
| 96 |
bar_plot = gr.Plot()
|
|
|
|
| 99 |
inputs=[zip_input, perplexity_input],
|
| 100 |
outputs=[status, scatter_plot])
|
| 101 |
|
| 102 |
+
gr.Plot.update(scatter_plot, interactive=True)
|
| 103 |
+
|
| 104 |
+
gr.plotly_events(scatter_plot, select_event=True)(generate_bar_plot, inputs=None, outputs=bar_plot)
|
| 105 |
|
|
|
|
| 106 |
demo.launch(share=True)
|