Dineshkumars commited on
Commit
e698faa
·
1 Parent(s): 3314461

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -0
app.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import nltk
2
+ from nltk.corpus import stopwords
3
+ from nltk.cluster.util import cosine_distance
4
+ import numpy as np
5
+ import networkx as nx
6
+
7
+ def read_para(string):
8
+ article = string.split(". ")
9
+ sentences = []
10
+ for sentence in article:
11
+ sentences.append(sentence.replace("[^a-zA-Z]", " ").split(" "))
12
+ sentences.pop()
13
+ return sentences
14
+
15
+ def similarity_in_sentences(sent1, sent2, stopwords=None):
16
+ if stopwords is None:
17
+ stopwords = []
18
+
19
+ sent1 = [w.lower() for w in sent1]
20
+ sent2 = [w.lower() for w in sent2]
21
+
22
+ all_words = list(set(sent1 + sent2))
23
+
24
+ vector1 = [0] * len(all_words)
25
+ vector2 = [0] * len(all_words)
26
+ for w in sent1:
27
+ if w in stopwords:
28
+ continue
29
+ vector1[all_words.index(w)] += 1
30
+ for w in sent2:
31
+ if w in stopwords:
32
+ continue
33
+ vector2[all_words.index(w)] += 1
34
+ return 1 - cosine_distance(vector1, vector2)
35
+
36
+ def build_similarity_matrix(sentences, stop_words):
37
+ similarity_matrix = np.zeros((len(sentences), len(sentences)))
38
+ for w1 in range(len(sentences)):
39
+ for w2 in range(len(sentences)):
40
+ if w1 == w2:
41
+ continue
42
+ similarity_matrix[w1][w2] = similarity_in_sentences(sentences[w1], sentences[w2], stop_words)
43
+ return similarity_matrix
44
+
45
+ def summary(txt, top_n=5):
46
+ nltk.download("stopwords")
47
+ stop_words = stopwords.words('english')
48
+ summarized_text = []
49
+ sentences = read_para(txt)
50
+ sentence_similarity_martix = build_similarity_matrix(sentences, stop_words)
51
+ sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_martix)
52
+ scores = nx.pagerank(sentence_similarity_graph)
53
+ ranked_sentence = sorted(((scores[i],s) for i,s in enumerate(sentences)), reverse=True)
54
+ for i in range(top_n):
55
+ summarized_text.append(" ".join(ranked_sentence[i][1]))
56
+ return(". ".join(summarized_text))
57
+
58
+
59
+
60
+ import streamlit as st
61
+ st.title("Text Summarizer using Streamlit")
62
+ file = st.file_uploader("Upload file", type=["txt"])
63
+ no_para=st.text_input("Enter the size of summarized paragraph :")
64
+ if file is not None:
65
+ if no_para is not None and len(no_para)>0:
66
+ content = file.read().decode("utf-8")
67
+ st.subheader("Summarized Text : ")
68
+ st.markdown(summary(content,int(no_para)))