Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -14,51 +14,51 @@ import pickle
|
|
| 14 |
# Page Config
|
| 15 |
st.set_page_config(page_title="Newsense AI", page_icon="📰", layout="wide")
|
| 16 |
|
| 17 |
-
# Download necessary resources
|
| 18 |
-
nltk.download('stopwords')
|
| 19 |
|
| 20 |
-
# Load SpaCy model
|
| 21 |
-
nlp = spacy.load("en_core_web_sm")
|
| 22 |
|
| 23 |
-
# Stopwords
|
| 24 |
-
stop_words = set(stopwords.words('english')).union({"pm"})
|
| 25 |
|
| 26 |
-
# Pre-processing function (without parentheses extraction)
|
| 27 |
-
def pre_process(x):
|
| 28 |
-
|
| 29 |
-
|
| 30 |
|
| 31 |
-
|
| 32 |
-
|
| 33 |
|
| 34 |
-
|
| 35 |
-
|
| 36 |
|
| 37 |
-
|
| 38 |
-
|
| 39 |
|
| 40 |
-
|
| 41 |
-
|
| 42 |
|
| 43 |
-
|
| 44 |
-
|
| 45 |
|
| 46 |
-
|
| 47 |
-
|
| 48 |
|
| 49 |
-
|
| 50 |
-
|
| 51 |
|
| 52 |
-
|
| 53 |
-
|
| 54 |
|
| 55 |
-
|
| 56 |
-
|
| 57 |
|
| 58 |
-
|
| 59 |
-
|
| 60 |
|
| 61 |
-
|
| 62 |
|
| 63 |
# @st.cache_resource
|
| 64 |
# def load_model():
|
|
|
|
| 14 |
# Page Config
|
| 15 |
st.set_page_config(page_title="Newsense AI", page_icon="📰", layout="wide")
|
| 16 |
|
| 17 |
+
# # Download necessary resources
|
| 18 |
+
# # nltk.download('stopwords')
|
| 19 |
|
| 20 |
+
# # Load SpaCy model
|
| 21 |
+
# nlp = spacy.load("en_core_web_sm")
|
| 22 |
|
| 23 |
+
# # Stopwords
|
| 24 |
+
# stop_words = set(stopwords.words('english')).union({"pm"})
|
| 25 |
|
| 26 |
+
# # Pre-processing function (without parentheses extraction)
|
| 27 |
+
# def pre_process(x):
|
| 28 |
+
# # Convert to lowercase
|
| 29 |
+
# x = x.lower()
|
| 30 |
|
| 31 |
+
# # Remove HTML tags
|
| 32 |
+
# x = re.sub(r"<.*?>", "", x)
|
| 33 |
|
| 34 |
+
# # Remove URLs
|
| 35 |
+
# x = re.sub(r"http[s]?://\S+", "", x)
|
| 36 |
|
| 37 |
+
# # Remove mentions (@, #)
|
| 38 |
+
# x = re.sub(r"[@#]\S+", "", x)
|
| 39 |
|
| 40 |
+
# # Remove emojis
|
| 41 |
+
# x = emoji.replace_emoji(x, replace="")
|
| 42 |
|
| 43 |
+
# # Remove special characters (-, ., :, \, ,)
|
| 44 |
+
# x = re.sub(r"[-.:,\\]", " ", x)
|
| 45 |
|
| 46 |
+
# # Remove single and double quotes
|
| 47 |
+
# x = re.sub(r"['\"](.*?)['\"]", r'\1', x)
|
| 48 |
|
| 49 |
+
# # Remove content inside parentheses
|
| 50 |
+
# x = re.sub(r"\(.*?\)", "", x)
|
| 51 |
|
| 52 |
+
# # Remove extra spaces
|
| 53 |
+
# x = re.sub(r"\s+", " ", x).strip()
|
| 54 |
|
| 55 |
+
# # Spell checking
|
| 56 |
+
# x = str(TextBlob(x).correct())
|
| 57 |
|
| 58 |
+
# # Lemmatization using SpaCy
|
| 59 |
+
# x = " ".join([token.lemma_ for token in nlp(x)])
|
| 60 |
|
| 61 |
+
# return " ".join(x)
|
| 62 |
|
| 63 |
# @st.cache_resource
|
| 64 |
# def load_model():
|