Spaces:
Runtime error
Runtime error
| import nltk | |
| from nltk.corpus import stopwords | |
| from nltk.cluster.util import cosine_distance | |
| import numpy as np | |
| import networkx as nx | |
| def read_para(string): | |
| article = string.split(". ") | |
| sentences = [] | |
| for sentence in article: | |
| sentences.append(sentence.replace("[^a-zA-Z]", " ").split(" ")) | |
| sentences.pop() | |
| return sentences | |
| def similarity_in_sentences(sent1, sent2, stopwords=None): | |
| if stopwords is None: | |
| stopwords = [] | |
| sent1 = [w.lower() for w in sent1] | |
| sent2 = [w.lower() for w in sent2] | |
| all_words = list(set(sent1 + sent2)) | |
| vector1 = [0] * len(all_words) | |
| vector2 = [0] * len(all_words) | |
| for w in sent1: | |
| if w in stopwords: | |
| continue | |
| vector1[all_words.index(w)] += 1 | |
| for w in sent2: | |
| if w in stopwords: | |
| continue | |
| vector2[all_words.index(w)] += 1 | |
| return 1 - cosine_distance(vector1, vector2) | |
| def build_similarity_matrix(sentences, stop_words): | |
| similarity_matrix = np.zeros((len(sentences), len(sentences))) | |
| for w1 in range(len(sentences)): | |
| for w2 in range(len(sentences)): | |
| if w1 == w2: | |
| continue | |
| similarity_matrix[w1][w2] = similarity_in_sentences(sentences[w1], sentences[w2], stop_words) | |
| return similarity_matrix | |
| def summary(txt, top_n=5): | |
| nltk.download("stopwords") | |
| stop_words = stopwords.words('english') | |
| summarized_text = [] | |
| sentences = read_para(txt) | |
| sentence_similarity_martix = build_similarity_matrix(sentences, stop_words) | |
| sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_martix) | |
| scores = nx.pagerank(sentence_similarity_graph) | |
| ranked_sentence = sorted(((scores[i],s) for i,s in enumerate(sentences)), reverse=True) | |
| for i in range(top_n): | |
| summarized_text.append(" ".join(ranked_sentence[i][1])) | |
| return(". ".join(summarized_text)) | |
| import streamlit as st | |
| st.title("Text Summarizer") | |
| file = st.file_uploader("Upload file", type=["txt"]) | |
| no_para=st.text_input("Enter the size of summarized paragraph :") | |
| if file is not None: | |
| if no_para is not None and len(no_para)>0: | |
| content = file.read().decode("utf-8") | |
| st.subheader("Summarized Text: ") | |
| st.markdown(summary(content,int(no_para))) |