Spaces:

kambris
/

LLMLPTopic

Sleeping

App Files Files Community

kambris commited on Dec 11, 2025

Commit

b160c3d

verified ·

1 Parent(s): 5ce4f5a

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -26

app.py CHANGED Viewed

@@ -1,54 +1,44 @@
 import gradio as gr
 from bertopic import BERTopic
 from sentence_transformers import SentenceTransformer
-import os # Import os for potential path checks, though the logic below is key
 def run_from_textfile(file):
     if file is None:
-        return "Please upload a .txt file.", "", None
-    # ---- Handle file input: Unify access for NamedString (Spaces) and file object (Local) ----
     text = ""
-    # 1. Check for the .decode() method, which is characteristic of the Gradio NamedString object
-    # used in some environments (like HuggingFace Spaces).
     if hasattr(file, 'decode'):
         try:
-            # HuggingFace Spaces/NamedString: file supports .decode() directly
             text = file.decode("utf-8")
         except Exception as e:
-            return f"Error decoding NamedString: {e}", "", None
-    # 2. If it does not have .decode(), it's likely a standard file object
-    # (or a path, though gr.File usually passes an object or path string)
-    # The original TemporaryFile-like object in local Gradio will support .read()
     elif hasattr(file, 'read'):
         try:
-            # Local Gradio/TemporaryFile-like object: file supports .read()
             text = file.read().decode("utf-8")
         except Exception as e:
-            return f"Error reading/decoding file object: {e}", "", None
-    # Optional: Handle the case where Gradio passed a string path instead of an object
     elif isinstance(file, str) and os.path.exists(file):
         try:
             with open(file, 'r', encoding='utf-8') as f:
                 text = f.read()
         except Exception as e:
-            return f"Error reading file from path: {e}", "", None
-    # Fallback check if text is still empty (e.g., if object type was unexpected)
     if not text:
-         return "Could not read the file content. Please check the file type and content.", "", None
     # Split the text into documents (one per line)
     docs = [line.strip() for line in text.split("\n") if line.strip()]
     if len(docs) < 3:
-        return "Need at least 3 documents (one per line).", "", None
     # ---- Embedding Model ----
-    # Using 'all-MiniLM-L6-v2' as requested
     embedder = SentenceTransformer("all-MiniLM-L6-v2")
     # ---- Topic Modeling ----
@@ -56,16 +46,36 @@ def run_from_textfile(file):
     topics, probs = topic_model.fit_transform(docs)
     # ---- Topic Summary ----
-    # Convert to string and remove index for clean output
     topic_info = topic_model.get_topic_info().to_string(index=False)
     # ---- Document → Topic Assignments ----
     assignments = "\n".join([f"Doc {i+1}: Topic {topics[i]}" for i in range(len(docs))])
     # ---- Visualization ----
     fig = topic_model.visualize_barchart(top_n_topics=10)
-    return topic_info, assignments, fig
 # ---- Gradio Interface ----
 with gr.Blocks() as demo:
@@ -75,20 +85,19 @@ with gr.Blocks() as demo:
         "\nExample format:\n```\nResponse 1...\nResponse 2...\nResponse 3...\n```"
     )
-    # Ensure file_input is configured to pass a file object or path.
-    # The default setting should work with the logic above.
     file_input = gr.File(label="Upload .txt file")
     run_button = gr.Button("Run Topic Modeling")
     topic_output = gr.Textbox(label="Topic Overview", lines=12)
     assignment_output = gr.Textbox(label="Document → Topic Assignments", lines=12)
     fig_output = gr.Plot(label="Topic Visualization")
     run_button.click(
         fn=run_from_textfile,
         inputs=file_input,
-        outputs=[topic_output, assignment_output, fig_output]
     )
 # Launch app

 import gradio as gr
 from bertopic import BERTopic
 from sentence_transformers import SentenceTransformer
+import os
+import pandas as pd
 def run_from_textfile(file):
     if file is None:
+        return "Please upload a .txt file.", "", "", None
+    # ---- Handle file input ----
     text = ""
     if hasattr(file, 'decode'):
         try:
             text = file.decode("utf-8")
         except Exception as e:
+            return f"Error decoding NamedString: {e}", "", "", None
     elif hasattr(file, 'read'):
         try:
             text = file.read().decode("utf-8")
         except Exception as e:
+            return f"Error reading/decoding file object: {e}", "", "", None
     elif isinstance(file, str) and os.path.exists(file):
         try:
             with open(file, 'r', encoding='utf-8') as f:
                 text = f.read()
         except Exception as e:
+            return f"Error reading file from path: {e}", "", "", None
     if not text:
+         return "Could not read the file content. Please check the file type and content.", "", "", None
     # Split the text into documents (one per line)
     docs = [line.strip() for line in text.split("\n") if line.strip()]
     if len(docs) < 3:
+        return "Need at least 3 documents (one per line).", "", "", None
     # ---- Embedding Model ----
     embedder = SentenceTransformer("all-MiniLM-L6-v2")
     # ---- Topic Modeling ----
     topics, probs = topic_model.fit_transform(docs)
     # ---- Topic Summary ----
     topic_info = topic_model.get_topic_info().to_string(index=False)
+    # ---- TOPIC WEIGHTS (Word Importance per Topic) ----
+    weights_output = "=" * 80 + "\n"
+    weights_output += "TOPIC WEIGHTS (Word Importance Scores)\n"
+    weights_output += "=" * 80 + "\n\n"
+    # Get all topics except outlier topic (-1)
+    all_topics = [t for t in topic_model.get_topics().keys() if t != -1]
+    for topic_id in all_topics:
+        weights_output += f"TOPIC {topic_id}\n"
+        weights_output += "-" * 40 + "\n"
+        # Get top words and their weights for this topic
+        topic_words = topic_model.get_topic(topic_id)
+        if topic_words:
+            for word, weight in topic_words[:10]:  # Top 10 words
+                weights_output += f"  {word:20s} {weight:8.4f}\n"
+        weights_output += "\n"
     # ---- Document → Topic Assignments ----
     assignments = "\n".join([f"Doc {i+1}: Topic {topics[i]}" for i in range(len(docs))])
     # ---- Visualization ----
     fig = topic_model.visualize_barchart(top_n_topics=10)
+    return topic_info, weights_output, assignments, fig
 # ---- Gradio Interface ----
 with gr.Blocks() as demo:
         "\nExample format:\n```\nResponse 1...\nResponse 2...\nResponse 3...\n```"
     )
     file_input = gr.File(label="Upload .txt file")
     run_button = gr.Button("Run Topic Modeling")
     topic_output = gr.Textbox(label="Topic Overview", lines=12)
+    weights_output = gr.Textbox(label="📊 Topic Weights (Word Importance)", lines=20)
     assignment_output = gr.Textbox(label="Document → Topic Assignments", lines=12)
     fig_output = gr.Plot(label="Topic Visualization")
     run_button.click(
         fn=run_from_textfile,
         inputs=file_input,
+        outputs=[topic_output, weights_output, assignment_output, fig_output]
     )
 # Launch app