Spaces:
Sleeping
Sleeping
| import re | |
| from collections import Counter | |
| import nltk | |
| from nltk.tokenize import sent_tokenize, word_tokenize | |
| from nltk.corpus import stopwords | |
| nltk.download('stopwords') | |
| nltk.download('punkt') | |
| nltk.download('punkt_tab') | |
| def preporcess_text(text): | |
| stop_words = set(stopwords.words('english')) | |
| words = word_tokenize(text) | |
| words = [word.lower() for word in words if word.isalnum() and word.lower() not in stop_words] | |
| return words | |
| def sentence_score(text): | |
| sentences = sent_tokenize(text) | |
| words = preporcess_text(text) | |
| word_frequencies = Counter(words) | |
| scores = {} | |
| for sentence in sentences: | |
| sentence_words = preporcess_text(sentence) | |
| score = sum([word_frequencies[word] for word in sentence_words]) | |
| scores[sentence] = score | |
| return scores | |
| def summarize_text(text, num_sentences=10): | |
| scores = sentence_score(text) | |
| ranked_sentences = sorted(scores, key=scores.get, reverse=True) | |
| summary = " ".join(ranked_sentences[:num_sentences]) | |
| return summary |