Ashendilantha commited on
Commit
5f72ec8
·
verified ·
1 Parent(s): ffb0cc4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -8
app.py CHANGED
@@ -4,20 +4,18 @@ import re
4
  import string
5
  import nltk
6
  from nltk.corpus import stopwords
7
- from nltk.tokenize import word_tokenize
8
  from nltk.stem import WordNetLemmatizer
9
  from transformers import pipeline
10
  from PIL import Image
11
 
12
  # Download required NLTK data
13
  nltk.download('stopwords')
14
- nltk.download('punkt')
15
  nltk.download('wordnet')
16
  nltk.download('omw-1.4')
17
 
18
  # Load Models
19
  news_classifier = pipeline("text-classification", model="Oneli/News_Classification")
20
- qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
21
 
22
  # Label Mapping
23
  label_mapping = {
@@ -36,11 +34,11 @@ def clean_text(text):
36
  text = text.lower()
37
  text = re.sub(f"[{string.punctuation}]", "", text) # Remove punctuation
38
  text = re.sub(r"[^a-zA-Z0-9\s]", "", text) # Remove special characters
39
- tokens = word_tokenize(text)
40
- tokens = [word for word in tokens if word not in stopwords.words("english")] # Remove stopwords
41
  lemmatizer = WordNetLemmatizer()
42
- tokens = [lemmatizer.lemmatize(word) for word in tokens] # Lemmatize tokens
43
- return " ".join(tokens)
44
 
45
  # Define the functions
46
  def classify_text(text):
@@ -143,4 +141,4 @@ if st.button("✉ Send"):
143
  st.write("*Chatbot Response:*")
144
  for q, a in history:
145
  st.write(f"*Q:* {q}")
146
- st.write(f"*A:* {a}")
 
4
  import string
5
  import nltk
6
  from nltk.corpus import stopwords
 
7
  from nltk.stem import WordNetLemmatizer
8
  from transformers import pipeline
9
  from PIL import Image
10
 
11
  # Download required NLTK data
12
  nltk.download('stopwords')
 
13
  nltk.download('wordnet')
14
  nltk.download('omw-1.4')
15
 
16
  # Load Models
17
  news_classifier = pipeline("text-classification", model="Oneli/News_Classification")
18
+ qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
19
 
20
  # Label Mapping
21
  label_mapping = {
 
34
  text = text.lower()
35
  text = re.sub(f"[{string.punctuation}]", "", text) # Remove punctuation
36
  text = re.sub(r"[^a-zA-Z0-9\s]", "", text) # Remove special characters
37
+ words = text.split() # Tokenization without Punkt
38
+ words = [word for word in words if word not in stopwords.words("english")] # Remove stopwords
39
  lemmatizer = WordNetLemmatizer()
40
+ words = [lemmatizer.lemmatize(word) for word in words] # Lemmatize tokens
41
+ return " ".join(words)
42
 
43
  # Define the functions
44
  def classify_text(text):
 
141
  st.write("*Chatbot Response:*")
142
  for q, a in history:
143
  st.write(f"*Q:* {q}")
144
+ st.write(f"*A:* {a}")