HaarisIqubal's picture
Synced repo using 'sync_with_huggingface' Github Action
c075e70 verified
import re
from collections import Counter
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('punkt_tab')
def preporcess_text(text):
stop_words = set(stopwords.words('english'))
words = word_tokenize(text)
words = [word.lower() for word in words if word.isalnum() and word.lower() not in stop_words]
return words
def sentence_score(text):
sentences = sent_tokenize(text)
words = preporcess_text(text)
word_frequencies = Counter(words)
scores = {}
for sentence in sentences:
sentence_words = preporcess_text(sentence)
score = sum([word_frequencies[word] for word in sentence_words])
scores[sentence] = score
return scores
def summarize_text(text, num_sentences=10):
scores = sentence_score(text)
ranked_sentences = sorted(scores, key=scores.get, reverse=True)
summary = " ".join(ranked_sentences[:num_sentences])
return summary