Spaces:
Runtime error
Runtime error
File size: 2,289 Bytes
e698faa d4d9e0d e698faa 79076f5 e698faa 95e1ca4 e698faa a1a1940 e698faa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
import nltk
from nltk.corpus import stopwords
from nltk.cluster.util import cosine_distance
import numpy as np
import networkx as nx
def read_para(string):
article = string.split(". ")
sentences = []
for sentence in article:
sentences.append(sentence.replace("[^a-zA-Z]", " ").split(" "))
sentences.pop()
return sentences
def similarity_in_sentences(sent1, sent2, stopwords=None):
if stopwords is None:
stopwords = []
sent1 = [w.lower() for w in sent1]
sent2 = [w.lower() for w in sent2]
all_words = list(set(sent1 + sent2))
vector1 = [0] * len(all_words)
vector2 = [0] * len(all_words)
for w in sent1:
if w in stopwords:
continue
vector1[all_words.index(w)] += 1
for w in sent2:
if w in stopwords:
continue
vector2[all_words.index(w)] += 1
return 1 - cosine_distance(vector1, vector2)
def build_similarity_matrix(sentences, stop_words):
similarity_matrix = np.zeros((len(sentences), len(sentences)))
for w1 in range(len(sentences)):
for w2 in range(len(sentences)):
if w1 == w2:
continue
similarity_matrix[w1][w2] = similarity_in_sentences(sentences[w1], sentences[w2], stop_words)
return similarity_matrix
def summary(txt, top_n=5):
nltk.download("stopwords")
stop_words = stopwords.words('english')
summarized_text = []
sentences = read_para(txt)
sentence_similarity_martix = build_similarity_matrix(sentences, stop_words)
sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_martix)
scores = nx.pagerank(sentence_similarity_graph)
ranked_sentence = sorted(((scores[i],s) for i,s in enumerate(sentences)), reverse=True)
for i in range(top_n):
summarized_text.append(" ".join(ranked_sentence[i][1]))
return(". ".join(summarized_text))
import streamlit as st
st.title("Text Summarizer")
file = st.file_uploader("Upload file", type=["txt"])
no_para=st.text_input("Enter the size of summarized paragraph :")
if file is not None:
if no_para is not None and len(no_para)>0:
content = file.read().decode("utf-8")
st.subheader("Summarized Text: ")
st.markdown(summary(content,int(no_para))) |