kambris commited on
Commit
bb3e7d0
·
verified ·
1 Parent(s): 0e1f9e9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -0
app.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from bertopic import BERTopic
3
+ from sentence_transformers import SentenceTransformer
4
+
5
+ def run_from_textfile(file):
6
+ if file is None:
7
+ return "Please upload a .txt file.", "", None
8
+
9
+ # Read text file
10
+ text = file.read().decode("utf-8")
11
+ docs = [line.strip() for line in text.split("\n") if line.strip()]
12
+
13
+ if len(docs) < 3:
14
+ return "Need at least 3 documents (one per line).", "", None
15
+
16
+ # Embedding model
17
+ embedder = SentenceTransformer("all-MiniLM-L6-v2")
18
+
19
+ # Topic model
20
+ topic_model = BERTopic(embedding_model=embedder)
21
+ topics, probs = topic_model.fit_transform(docs)
22
+
23
+ # Topic info
24
+ topic_info = topic_model.get_topic_info().to_string()
25
+
26
+ # Map each doc to its topic
27
+ assignments = "\n".join([f"Doc {i+1}: Topic {topics[i]}" for i in range(len(docs))])
28
+
29
+ # Visualization
30
+ fig = topic_model.visualize_barchart(top_n_topics=10)
31
+
32
+ return topic_info, assignments, fig
33
+
34
+
35
+ with gr.Blocks() as demo:
36
+ gr.Markdown("# 🧠 Topic Modeling from TXT File (BERTopic)")
37
+ gr.Markdown(
38
+ "Upload a plain text (.txt) file. Each line should contain one LLM response.\n\n"
39
+ "Example format:\n"
40
+ "```\n"
41
+ "Response 1...\n"
42
+ "Response 2...\n"
43
+ "Response 3...\n"
44
+ "```\n"
45
+ )
46
+
47
+ file_input = gr.File(label="Upload .txt file")
48
+
49
+ run_btn = gr.Button("Run Topic Modeling")
50
+
51
+ topic_output = gr.Textbox(label="Topic Overview", lines=10)
52
+ assignment_output = gr.Textbox(label="Document → Topic Assignments", lines=10)
53
+ fig_output = gr.Plot(label="Topic Visualization")
54
+
55
+ run_btn.click(
56
+ fn=run_from_textfile,
57
+ inputs=file_input,
58
+ outputs=[topic_output, assignment_output, fig_output]
59
+ )
60
+
61
+ demo.launch()