Update app.py
Browse files
app.py
CHANGED
|
@@ -1,12 +1,11 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import nltk
|
| 3 |
from nltk.tokenize import sent_tokenize, word_tokenize
|
| 4 |
-
from nltk.corpus import stopwords, wordnet, brown
|
| 5 |
from nltk.stem import PorterStemmer, WordNetLemmatizer
|
| 6 |
from nltk import pos_tag, ne_chunk, ngrams
|
| 7 |
from nltk.collocations import BigramCollocationFinder
|
| 8 |
from nltk.classify import NaiveBayesClassifier
|
| 9 |
-
from nltk.corpus import movie_reviews
|
| 10 |
import random
|
| 11 |
|
| 12 |
# Tải các tài nguyên cần thiết
|
|
@@ -25,10 +24,15 @@ stemmer = PorterStemmer()
|
|
| 25 |
lemmatizer = WordNetLemmatizer()
|
| 26 |
stop_words = set(stopwords.words('english'))
|
| 27 |
|
| 28 |
-
# Hàm huấn luyện classifier
|
| 29 |
def train_classifier():
|
| 30 |
-
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
train_set = pos_reviews + neg_reviews
|
| 33 |
random.shuffle(train_set)
|
| 34 |
return NaiveBayesClassifier.train(train_set)
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import nltk
|
| 3 |
from nltk.tokenize import sent_tokenize, word_tokenize
|
| 4 |
+
from nltk.corpus import stopwords, wordnet, brown, movie_reviews
|
| 5 |
from nltk.stem import PorterStemmer, WordNetLemmatizer
|
| 6 |
from nltk import pos_tag, ne_chunk, ngrams
|
| 7 |
from nltk.collocations import BigramCollocationFinder
|
| 8 |
from nltk.classify import NaiveBayesClassifier
|
|
|
|
| 9 |
import random
|
| 10 |
|
| 11 |
# Tải các tài nguyên cần thiết
|
|
|
|
| 24 |
lemmatizer = WordNetLemmatizer()
|
| 25 |
stop_words = set(stopwords.words('english'))
|
| 26 |
|
| 27 |
+
# Hàm huấn luyện classifier sửa lại
|
| 28 |
def train_classifier():
|
| 29 |
+
# Lấy danh sách file từ thư mục pos và neg
|
| 30 |
+
pos_files = movie_reviews.fileids('pos')[:50] # Giới hạn 50 file để nhanh hơn
|
| 31 |
+
neg_files = movie_reviews.fileids('neg')[:50]
|
| 32 |
+
|
| 33 |
+
# Tạo tập huấn luyện
|
| 34 |
+
pos_reviews = [({word: True for word in movie_reviews.words(fileid)}, 'positive') for fileid in pos_files]
|
| 35 |
+
neg_reviews = [({word: True for word in movie_reviews.words(fileid)}, 'negative') for fileid in neg_files]
|
| 36 |
train_set = pos_reviews + neg_reviews
|
| 37 |
random.shuffle(train_set)
|
| 38 |
return NaiveBayesClassifier.train(train_set)
|