sabirbagwan commited on
Commit
66d7b2f
·
1 Parent(s): 4fa33c4

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +31 -0
  2. model.py +50 -0
  3. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import base64
3
+ from model import Model
4
+
5
+ st.title("Text Summarizer")
6
+
7
+ with st.form(key="clf_form"):
8
+ text_input = st.text_area("Type Here:")
9
+ submit_btn = st.form_submit_button(label="Submit")
10
+
11
+ count_of_words = len(text_input.split())
12
+
13
+ if submit_btn:
14
+ if text_input == "":
15
+ st.error("Enter something in order to summarize it.", icon="⛔️")
16
+ elif count_of_words <= 100:
17
+ st.warning("Please enter more than 100 words in order to summarize it.", icon="⚠️")
18
+ else:
19
+ st.subheader("Output:")
20
+
21
+ col1, col2 = st.columns(2)
22
+
23
+ output = Model.predict(text=text_input)
24
+
25
+ with col1:
26
+ st.info("Original Text:")
27
+ st.write(text_input)
28
+
29
+ with col2:
30
+ st.info("Summarized Text:")
31
+ st.write(output)
model.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import spacy
3
+ from heapq import nlargest
4
+ class Model():
5
+
6
+ try:
7
+ nlp = spacy.load("en_core_web_sm")
8
+ except OSError:
9
+ import subprocess
10
+ subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
11
+ def predict(text):
12
+ stop_words = [ 'stop', 'the', 'to', 'and', 'a', 'in', 'it', 'is', 'I', 'that', 'had', 'on', 'for', 'were', 'was']
13
+ nlp = spacy.load("en_core_web_sm")
14
+ doc = nlp(text)
15
+
16
+ lemmatized_text = " ".join([token.lemma_ for token in doc])
17
+
18
+ re_text = re.sub("[^\s\w,.]"," ",lemmatized_text)
19
+ re_text = re.sub("[ ]{2,}"," ",re_text).lower()
20
+
21
+ word_frequencies = {}
22
+ for word in doc:
23
+ if word.text not in "\n":
24
+ if word.text not in stop_words:
25
+ if word.text not in word_frequencies.keys():
26
+ word_frequencies[word.text] = 1
27
+ else:
28
+ word_frequencies[word.text] +=1
29
+
30
+ max_word_frequency = max(word_frequencies.values(),default=0)
31
+
32
+ for word in word_frequencies.keys():
33
+ word_frequencies[word] = word_frequencies[word] / max_word_frequency
34
+
35
+ sent_tokens = [sent for sent in doc.sents]
36
+ sent_scores = {}
37
+
38
+ for sent in sent_tokens:
39
+ for word in sent:
40
+ if word.text in word_frequencies.keys():
41
+ if sent not in sent_scores.keys():
42
+ sent_scores[sent] = word_frequencies[word.text]
43
+ else:
44
+ sent_scores[sent] += word_frequencies[word.text]
45
+
46
+ sentence_length = int(len(sent_tokens)*0.3)
47
+ summary = nlargest(sentence_length,sent_scores,sent_scores.get)
48
+ final_summary = [word.text for word in summary]
49
+ final_summary = " ".join(final_summary)
50
+ return final_summary
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ spacy==3.5.2
2
+ pandas==2.0.1
3
+ regex==2023.5.5
4
+ streamlit==1.22.0