thanhcong2001 commited on
Commit
590df16
·
verified ·
1 Parent(s): 9ac3355

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -0
app.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ df = pd.read_csv('NLP/CrimeVsNoCrimeArticles.csv')
4
+ titles = np.array(df['title'].to_list())
5
+ labels = np.array(df['is_crime_report'].to_list())
6
+ import gradio as gr
7
+ import re
8
+ import nltk
9
+ from nltk.corpus import stopwords
10
+ from nltk.tokenize import TreebankWordTokenizer
11
+ from tensorflow.keras.preprocessing.text import Tokenizer
12
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
13
+ from tensorflow.keras.models import Sequential
14
+ from tensorflow.keras.layers import Dense,LSTM,Embedding
15
+ stop_word = set(stopwords.words('english'))
16
+ word_tokenizer = TreebankWordTokenizer()
17
+ def preprocess(text):
18
+ text = text.lower()
19
+ text = re.sub(r'[^a-z\s]','',text)
20
+ tokens = word_tokenizer.tokenize(text)
21
+ filtered = [word for word in tokens if word not in stop_word]
22
+ return ' '.join(filtered)
23
+ preprocess_doc = [preprocess(doc) for doc in titles]
24
+ num_tokenizer = Tokenizer(num_words=10000,oov_token='<OOV>')
25
+ num_tokenizer.fit_on_texts(preprocess_doc)
26
+ seq= num_tokenizer.texts_to_sequences(preprocess_doc)
27
+ padded_seq = pad_sequences(seq,maxlen=10,padding='post')
28
+ model = Sequential([
29
+ Embedding(input_dim=10000,output_dim=16),
30
+ LSTM(32),
31
+ Dense(1,activation='sigmoid')
32
+ ])
33
+ model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])
34
+ model.fit(padded_seq,labels,epochs=50)
35
+ def pre_sentiment(user_input):
36
+ user = preprocess(user_input)
37
+ seq_input = num_tokenizer.texts_to_sequences([user])
38
+ padded_input = pad_sequences(seq_input,maxlen=10,padding='post')
39
+ prediction = model.predict(padded_input).item()
40
+ result = 'CRIMINAL' if prediction>=0.5 else 'NOT CRIMINAL'
41
+ return(f'{result} - Score: {prediction}')
42
+ demo = gr.Interface(fn=pre_sentiment,inputs='text',outputs='text',title='Sentiment Analyst')
43
+ demo.launch()