kambris commited on
Commit
b160c3d
·
verified ·
1 Parent(s): 5ce4f5a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -26
app.py CHANGED
@@ -1,54 +1,44 @@
1
  import gradio as gr
2
  from bertopic import BERTopic
3
  from sentence_transformers import SentenceTransformer
4
- import os # Import os for potential path checks, though the logic below is key
 
5
 
6
  def run_from_textfile(file):
7
  if file is None:
8
- return "Please upload a .txt file.", "", None
9
 
10
- # ---- Handle file input: Unify access for NamedString (Spaces) and file object (Local) ----
11
  text = ""
12
 
13
- # 1. Check for the .decode() method, which is characteristic of the Gradio NamedString object
14
- # used in some environments (like HuggingFace Spaces).
15
  if hasattr(file, 'decode'):
16
  try:
17
- # HuggingFace Spaces/NamedString: file supports .decode() directly
18
  text = file.decode("utf-8")
19
  except Exception as e:
20
- return f"Error decoding NamedString: {e}", "", None
21
 
22
- # 2. If it does not have .decode(), it's likely a standard file object
23
- # (or a path, though gr.File usually passes an object or path string)
24
- # The original TemporaryFile-like object in local Gradio will support .read()
25
  elif hasattr(file, 'read'):
26
  try:
27
- # Local Gradio/TemporaryFile-like object: file supports .read()
28
  text = file.read().decode("utf-8")
29
  except Exception as e:
30
- return f"Error reading/decoding file object: {e}", "", None
31
 
32
- # Optional: Handle the case where Gradio passed a string path instead of an object
33
  elif isinstance(file, str) and os.path.exists(file):
34
  try:
35
  with open(file, 'r', encoding='utf-8') as f:
36
  text = f.read()
37
  except Exception as e:
38
- return f"Error reading file from path: {e}", "", None
39
-
40
- # Fallback check if text is still empty (e.g., if object type was unexpected)
41
  if not text:
42
- return "Could not read the file content. Please check the file type and content.", "", None
43
-
44
  # Split the text into documents (one per line)
45
  docs = [line.strip() for line in text.split("\n") if line.strip()]
46
-
47
  if len(docs) < 3:
48
- return "Need at least 3 documents (one per line).", "", None
49
 
50
  # ---- Embedding Model ----
51
- # Using 'all-MiniLM-L6-v2' as requested
52
  embedder = SentenceTransformer("all-MiniLM-L6-v2")
53
 
54
  # ---- Topic Modeling ----
@@ -56,16 +46,36 @@ def run_from_textfile(file):
56
  topics, probs = topic_model.fit_transform(docs)
57
 
58
  # ---- Topic Summary ----
59
- # Convert to string and remove index for clean output
60
  topic_info = topic_model.get_topic_info().to_string(index=False)
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  # ---- Document → Topic Assignments ----
63
  assignments = "\n".join([f"Doc {i+1}: Topic {topics[i]}" for i in range(len(docs))])
64
 
65
  # ---- Visualization ----
66
  fig = topic_model.visualize_barchart(top_n_topics=10)
67
 
68
- return topic_info, assignments, fig
69
 
70
  # ---- Gradio Interface ----
71
  with gr.Blocks() as demo:
@@ -75,20 +85,19 @@ with gr.Blocks() as demo:
75
  "\nExample format:\n```\nResponse 1...\nResponse 2...\nResponse 3...\n```"
76
  )
77
 
78
- # Ensure file_input is configured to pass a file object or path.
79
- # The default setting should work with the logic above.
80
  file_input = gr.File(label="Upload .txt file")
81
 
82
  run_button = gr.Button("Run Topic Modeling")
83
 
84
  topic_output = gr.Textbox(label="Topic Overview", lines=12)
 
85
  assignment_output = gr.Textbox(label="Document → Topic Assignments", lines=12)
86
  fig_output = gr.Plot(label="Topic Visualization")
87
 
88
  run_button.click(
89
  fn=run_from_textfile,
90
  inputs=file_input,
91
- outputs=[topic_output, assignment_output, fig_output]
92
  )
93
 
94
  # Launch app
 
1
  import gradio as gr
2
  from bertopic import BERTopic
3
  from sentence_transformers import SentenceTransformer
4
+ import os
5
+ import pandas as pd
6
 
7
  def run_from_textfile(file):
8
  if file is None:
9
+ return "Please upload a .txt file.", "", "", None
10
 
11
+ # ---- Handle file input ----
12
  text = ""
13
 
 
 
14
  if hasattr(file, 'decode'):
15
  try:
 
16
  text = file.decode("utf-8")
17
  except Exception as e:
18
+ return f"Error decoding NamedString: {e}", "", "", None
19
 
 
 
 
20
  elif hasattr(file, 'read'):
21
  try:
 
22
  text = file.read().decode("utf-8")
23
  except Exception as e:
24
+ return f"Error reading/decoding file object: {e}", "", "", None
25
 
 
26
  elif isinstance(file, str) and os.path.exists(file):
27
  try:
28
  with open(file, 'r', encoding='utf-8') as f:
29
  text = f.read()
30
  except Exception as e:
31
+ return f"Error reading file from path: {e}", "", "", None
32
+
 
33
  if not text:
34
+ return "Could not read the file content. Please check the file type and content.", "", "", None
35
+
36
  # Split the text into documents (one per line)
37
  docs = [line.strip() for line in text.split("\n") if line.strip()]
 
38
  if len(docs) < 3:
39
+ return "Need at least 3 documents (one per line).", "", "", None
40
 
41
  # ---- Embedding Model ----
 
42
  embedder = SentenceTransformer("all-MiniLM-L6-v2")
43
 
44
  # ---- Topic Modeling ----
 
46
  topics, probs = topic_model.fit_transform(docs)
47
 
48
  # ---- Topic Summary ----
 
49
  topic_info = topic_model.get_topic_info().to_string(index=False)
50
 
51
+ # ---- TOPIC WEIGHTS (Word Importance per Topic) ----
52
+ weights_output = "=" * 80 + "\n"
53
+ weights_output += "TOPIC WEIGHTS (Word Importance Scores)\n"
54
+ weights_output += "=" * 80 + "\n\n"
55
+
56
+ # Get all topics except outlier topic (-1)
57
+ all_topics = [t for t in topic_model.get_topics().keys() if t != -1]
58
+
59
+ for topic_id in all_topics:
60
+ weights_output += f"TOPIC {topic_id}\n"
61
+ weights_output += "-" * 40 + "\n"
62
+
63
+ # Get top words and their weights for this topic
64
+ topic_words = topic_model.get_topic(topic_id)
65
+
66
+ if topic_words:
67
+ for word, weight in topic_words[:10]: # Top 10 words
68
+ weights_output += f" {word:20s} {weight:8.4f}\n"
69
+
70
+ weights_output += "\n"
71
+
72
  # ---- Document → Topic Assignments ----
73
  assignments = "\n".join([f"Doc {i+1}: Topic {topics[i]}" for i in range(len(docs))])
74
 
75
  # ---- Visualization ----
76
  fig = topic_model.visualize_barchart(top_n_topics=10)
77
 
78
+ return topic_info, weights_output, assignments, fig
79
 
80
  # ---- Gradio Interface ----
81
  with gr.Blocks() as demo:
 
85
  "\nExample format:\n```\nResponse 1...\nResponse 2...\nResponse 3...\n```"
86
  )
87
 
 
 
88
  file_input = gr.File(label="Upload .txt file")
89
 
90
  run_button = gr.Button("Run Topic Modeling")
91
 
92
  topic_output = gr.Textbox(label="Topic Overview", lines=12)
93
+ weights_output = gr.Textbox(label="📊 Topic Weights (Word Importance)", lines=20)
94
  assignment_output = gr.Textbox(label="Document → Topic Assignments", lines=12)
95
  fig_output = gr.Plot(label="Topic Visualization")
96
 
97
  run_button.click(
98
  fn=run_from_textfile,
99
  inputs=file_input,
100
+ outputs=[topic_output, weights_output, assignment_output, fig_output]
101
  )
102
 
103
  # Launch app