kambris commited on
Commit
9a034e8
·
verified ·
1 Parent(s): bb3e7d0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -18
app.py CHANGED
@@ -2,60 +2,65 @@ import gradio as gr
2
  from bertopic import BERTopic
3
  from sentence_transformers import SentenceTransformer
4
 
 
5
  def run_from_textfile(file):
6
  if file is None:
7
  return "Please upload a .txt file.", "", None
8
 
9
- # Read text file
10
- text = file.read().decode("utf-8")
 
 
 
 
 
 
 
11
  docs = [line.strip() for line in text.split("\n") if line.strip()]
12
 
13
  if len(docs) < 3:
14
  return "Need at least 3 documents (one per line).", "", None
15
 
16
- # Embedding model
17
  embedder = SentenceTransformer("all-MiniLM-L6-v2")
18
 
19
- # Topic model
20
  topic_model = BERTopic(embedding_model=embedder)
21
  topics, probs = topic_model.fit_transform(docs)
22
 
23
- # Topic info
24
  topic_info = topic_model.get_topic_info().to_string()
25
 
26
- # Map each doc to its topic
27
  assignments = "\n".join([f"Doc {i+1}: Topic {topics[i]}" for i in range(len(docs))])
28
 
29
- # Visualization
30
  fig = topic_model.visualize_barchart(top_n_topics=10)
31
 
32
  return topic_info, assignments, fig
33
 
34
 
 
35
  with gr.Blocks() as demo:
36
  gr.Markdown("# 🧠 Topic Modeling from TXT File (BERTopic)")
37
  gr.Markdown(
38
- "Upload a plain text (.txt) file. Each line should contain one LLM response.\n\n"
39
- "Example format:\n"
40
- "```\n"
41
- "Response 1...\n"
42
- "Response 2...\n"
43
- "Response 3...\n"
44
- "```\n"
45
  )
46
 
47
  file_input = gr.File(label="Upload .txt file")
48
 
49
- run_btn = gr.Button("Run Topic Modeling")
50
 
51
- topic_output = gr.Textbox(label="Topic Overview", lines=10)
52
- assignment_output = gr.Textbox(label="Document → Topic Assignments", lines=10)
53
  fig_output = gr.Plot(label="Topic Visualization")
54
 
55
- run_btn.click(
56
  fn=run_from_textfile,
57
  inputs=file_input,
58
  outputs=[topic_output, assignment_output, fig_output]
59
  )
60
 
 
61
  demo.launch()
 
2
  from bertopic import BERTopic
3
  from sentence_transformers import SentenceTransformer
4
 
5
+
6
  def run_from_textfile(file):
7
  if file is None:
8
  return "Please upload a .txt file.", "", None
9
 
10
+ # ---- Handle file input for both HuggingFace and local environments ----
11
+ try:
12
+ # HuggingFace Spaces: file is NamedString and supports .decode()
13
+ text = file.decode("utf-8")
14
+ except:
15
+ # Local Gradio: file is a TemporaryFile-like object
16
+ text = file.read().decode("utf-8")
17
+
18
+ # Split the text into documents (one per line)
19
  docs = [line.strip() for line in text.split("\n") if line.strip()]
20
 
21
  if len(docs) < 3:
22
  return "Need at least 3 documents (one per line).", "", None
23
 
24
+ # ---- Embedding Model ----
25
  embedder = SentenceTransformer("all-MiniLM-L6-v2")
26
 
27
+ # ---- Topic Modeling ----
28
  topic_model = BERTopic(embedding_model=embedder)
29
  topics, probs = topic_model.fit_transform(docs)
30
 
31
+ # ---- Topic Summary ----
32
  topic_info = topic_model.get_topic_info().to_string()
33
 
34
+ # ---- Document Topic Assignments ----
35
  assignments = "\n".join([f"Doc {i+1}: Topic {topics[i]}" for i in range(len(docs))])
36
 
37
+ # ---- Visualization ----
38
  fig = topic_model.visualize_barchart(top_n_topics=10)
39
 
40
  return topic_info, assignments, fig
41
 
42
 
43
+ # ---- Gradio Interface ----
44
  with gr.Blocks() as demo:
45
  gr.Markdown("# 🧠 Topic Modeling from TXT File (BERTopic)")
46
  gr.Markdown(
47
+ "Upload a plain text (.txt) file. Each line should contain **one LLM response**.\n"
48
+ "\nExample format:\n```\nResponse 1...\nResponse 2...\nResponse 3...\n```"
 
 
 
 
 
49
  )
50
 
51
  file_input = gr.File(label="Upload .txt file")
52
 
53
+ run_button = gr.Button("Run Topic Modeling")
54
 
55
+ topic_output = gr.Textbox(label="Topic Overview", lines=12)
56
+ assignment_output = gr.Textbox(label="Document → Topic Assignments", lines=12)
57
  fig_output = gr.Plot(label="Topic Visualization")
58
 
59
+ run_button.click(
60
  fn=run_from_textfile,
61
  inputs=file_input,
62
  outputs=[topic_output, assignment_output, fig_output]
63
  )
64
 
65
+ # Launch app
66
  demo.launch()