Mpavan45 commited on
Commit
6ea85ab
·
verified ·
1 Parent(s): fcecef6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -31
app.py CHANGED
@@ -14,51 +14,51 @@ import pickle
14
  # Page Config
15
  st.set_page_config(page_title="Newsense AI", page_icon="📰", layout="wide")
16
 
17
- # Download necessary resources
18
- nltk.download('stopwords')
19
 
20
- # Load SpaCy model
21
- nlp = spacy.load("en_core_web_sm")
22
 
23
- # Stopwords
24
- stop_words = set(stopwords.words('english')).union({"pm"})
25
 
26
- # Pre-processing function (without parentheses extraction)
27
- def pre_process(x):
28
- # Convert to lowercase
29
- x = x.lower()
30
 
31
- # Remove HTML tags
32
- x = re.sub(r"<.*?>", "", x)
33
 
34
- # Remove URLs
35
- x = re.sub(r"http[s]?://\S+", "", x)
36
 
37
- # Remove mentions (@, #)
38
- x = re.sub(r"[@#]\S+", "", x)
39
 
40
- # Remove emojis
41
- x = emoji.replace_emoji(x, replace="")
42
 
43
- # Remove special characters (-, ., :, \, ,)
44
- x = re.sub(r"[-.:,\\]", " ", x)
45
 
46
- # Remove single and double quotes
47
- x = re.sub(r"['\"](.*?)['\"]", r'\1', x)
48
 
49
- # Remove content inside parentheses
50
- x = re.sub(r"\(.*?\)", "", x)
51
 
52
- # Remove extra spaces
53
- x = re.sub(r"\s+", " ", x).strip()
54
 
55
- # Spell checking
56
- x = str(TextBlob(x).correct())
57
 
58
- # Lemmatization using SpaCy
59
- x = " ".join([token.lemma_ for token in nlp(x)])
60
 
61
- return " ".join(x)
62
 
63
  # @st.cache_resource
64
  # def load_model():
 
14
  # Page Config
15
  st.set_page_config(page_title="Newsense AI", page_icon="📰", layout="wide")
16
 
17
+ # # Download necessary resources
18
+ # # nltk.download('stopwords')
19
 
20
+ # # Load SpaCy model
21
+ # nlp = spacy.load("en_core_web_sm")
22
 
23
+ # # Stopwords
24
+ # stop_words = set(stopwords.words('english')).union({"pm"})
25
 
26
+ # # Pre-processing function (without parentheses extraction)
27
+ # def pre_process(x):
28
+ # # Convert to lowercase
29
+ # x = x.lower()
30
 
31
+ # # Remove HTML tags
32
+ # x = re.sub(r"<.*?>", "", x)
33
 
34
+ # # Remove URLs
35
+ # x = re.sub(r"http[s]?://\S+", "", x)
36
 
37
+ # # Remove mentions (@, #)
38
+ # x = re.sub(r"[@#]\S+", "", x)
39
 
40
+ # # Remove emojis
41
+ # x = emoji.replace_emoji(x, replace="")
42
 
43
+ # # Remove special characters (-, ., :, \, ,)
44
+ # x = re.sub(r"[-.:,\\]", " ", x)
45
 
46
+ # # Remove single and double quotes
47
+ # x = re.sub(r"['\"](.*?)['\"]", r'\1', x)
48
 
49
+ # # Remove content inside parentheses
50
+ # x = re.sub(r"\(.*?\)", "", x)
51
 
52
+ # # Remove extra spaces
53
+ # x = re.sub(r"\s+", " ", x).strip()
54
 
55
+ # # Spell checking
56
+ # x = str(TextBlob(x).correct())
57
 
58
+ # # Lemmatization using SpaCy
59
+ # x = " ".join([token.lemma_ for token in nlp(x)])
60
 
61
+ # return " ".join(x)
62
 
63
  # @st.cache_resource
64
  # def load_model():