rockerritesh commited on
Commit
5d0296f
·
verified ·
1 Parent(s): 52d62c6

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -0
app.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import networkx as nx
3
+ import matplotlib.pyplot as plt
4
+ from nltk import sent_tokenize
5
+ from sklearn.feature_extraction.text import TfidfVectorizer
6
+ from sklearn.cluster import KMeans
7
+ import numpy as np
8
+
9
+ # Helper function to split text into topics using KMeans clustering
10
+ def split_text_into_topics(text, n_topics):
11
+ sentences = sent_tokenize(text)
12
+ vectorizer = TfidfVectorizer(stop_words='english')
13
+ X = vectorizer.fit_transform(sentences)
14
+
15
+ kmeans = KMeans(n_clusters=n_topics, random_state=42)
16
+ kmeans.fit(X)
17
+
18
+ clusters = kmeans.labels_.tolist()
19
+ topic_sentences = {i: [] for i in range(n_topics)}
20
+
21
+ for i, sentence in enumerate(sentences):
22
+ topic_sentences[clusters[i]].append(sentence)
23
+
24
+ return topic_sentences
25
+
26
+ # Recursive function to split subtopics
27
+ def recursive_split(topic_dict, depth, max_depth, subtopics):
28
+ if depth >= max_depth:
29
+ return
30
+
31
+ new_topic_dict = {}
32
+ for topic, sentences in topic_dict.items():
33
+ if len(sentences) <= 1:
34
+ new_topic_dict[topic] = sentences
35
+ else:
36
+ sub_topics = split_text_into_topics(' '.join(sentences), subtopics)
37
+ new_topic_dict[topic] = sub_topics
38
+
39
+ return new_topic_dict
40
+
41
+ # Plotting function to visualize the tree structure
42
+ def plot_tree(tree, parent=None, graph=None, level=0):
43
+ if graph is None:
44
+ graph = nx.Graph()
45
+
46
+ for key, value in tree.items():
47
+ node_label = f'Topic {key}' if parent is None else f'Subtopic {key}'
48
+ graph.add_node(node_label, level=level)
49
+ if parent:
50
+ graph.add_edge(parent, node_label)
51
+
52
+ if isinstance(value, dict):
53
+ plot_tree(value, parent=node_label, graph=graph, level=level+1)
54
+ else:
55
+ for i, sentence in enumerate(value):
56
+ sentence_label = f"{node_label} - Sentence {i+1}"
57
+ graph.add_node(sentence_label, level=level+1)
58
+ graph.add_edge(node_label, sentence_label)
59
+
60
+ return graph
61
+
62
+ # Streamlit App layout
63
+ st.title('Text Topic Tree Generator')
64
+
65
+ # Upload file
66
+ uploaded_file = st.file_uploader("Upload a text file", type="txt")
67
+
68
+ if uploaded_file is not None:
69
+ text = uploaded_file.read().decode('utf-8')
70
+
71
+ # Select number of main topics and depth of subtopics
72
+ n_topics = st.slider('Select number of main topics', 2, 10, 5)
73
+ max_depth = st.slider('Select maximum depth of subtopics', 1, 5, 2)
74
+ subtopics_per_topic = st.slider('Select number of subtopics per topic', 2, 5, 3)
75
+
76
+ # Split text into main topics
77
+ topic_dict = split_text_into_topics(text, n_topics)
78
+
79
+ # Recursively split the topics into subtopics
80
+ full_tree = recursive_split(topic_dict, 0, max_depth, subtopics_per_topic)
81
+
82
+ # Create and display the tree graph
83
+ graph = plot_tree(full_tree)
84
+
85
+ # Plot the tree graph
86
+ pos = nx.spring_layout(graph)
87
+ levels = nx.get_node_attributes(graph, 'level')
88
+ plt.figure(figsize=(12, 8))
89
+ nx.draw(graph, pos, with_labels=True, node_size=3000, node_color="lightblue", font_size=10, font_weight="bold", labels={node: node for node in graph.nodes()})
90
+ plt.title("Tree Structure of Text Topics")
91
+ st.pyplot(plt)
92
+