Spaces:

Ashendilantha
/

News_Classification_

Sleeping

Ashendilantha commited on Mar 30, 2025

Commit

5f72ec8

verified ·

1 Parent(s): ffb0cc4

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,20 +4,18 @@ import re
 import string
 import nltk
 from nltk.corpus import stopwords
-from nltk.tokenize import word_tokenize
 from nltk.stem import WordNetLemmatizer
 from transformers import pipeline
 from PIL import Image
 # Download required NLTK data
 nltk.download('stopwords')
-nltk.download('punkt')
 nltk.download('wordnet')
 nltk.download('omw-1.4')
 # Load Models
 news_classifier = pipeline("text-classification", model="Oneli/News_Classification")
-qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
 # Label Mapping
 label_mapping = {
@@ -36,11 +34,11 @@ def clean_text(text):
     text = text.lower()
     text = re.sub(f"[{string.punctuation}]", "", text)  # Remove punctuation
     text = re.sub(r"[^a-zA-Z0-9\s]", "", text)  # Remove special characters
-    tokens = word_tokenize(text)
-    tokens = [word for word in tokens if word not in stopwords.words("english")]  # Remove stopwords
     lemmatizer = WordNetLemmatizer()
-    tokens = [lemmatizer.lemmatize(word) for word in tokens]  # Lemmatize tokens
-    return " ".join(tokens)
 # Define the functions
 def classify_text(text):
@@ -143,4 +141,4 @@ if st.button("✉ Send"):
     st.write("*Chatbot Response:*")
     for q, a in history:
         st.write(f"*Q:* {q}")
-        st.write(f"*A:* {a}")

 import string
 import nltk
 from nltk.corpus import stopwords
 from nltk.stem import WordNetLemmatizer
 from transformers import pipeline
 from PIL import Image
 # Download required NLTK data
 nltk.download('stopwords')
 nltk.download('wordnet')
 nltk.download('omw-1.4')
 # Load Models
 news_classifier = pipeline("text-classification", model="Oneli/News_Classification")
+qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
 # Label Mapping
 label_mapping = {
     text = text.lower()
     text = re.sub(f"[{string.punctuation}]", "", text)  # Remove punctuation
     text = re.sub(r"[^a-zA-Z0-9\s]", "", text)  # Remove special characters
+    words = text.split()  # Tokenization without Punkt
+    words = [word for word in words if word not in stopwords.words("english")]  # Remove stopwords
     lemmatizer = WordNetLemmatizer()
+    words = [lemmatizer.lemmatize(word) for word in words]  # Lemmatize tokens
+    return " ".join(words)
 # Define the functions
 def classify_text(text):
     st.write("*Chatbot Response:*")
     for q, a in history:
         st.write(f"*Q:* {q}")
+        st.write(f"*A:* {a}")