Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| df = pd.read_csv('CrimeVsNoCrimeArticles.csv') | |
| titles = np.array(df['title'].to_list()) | |
| labels = np.array(df['is_crime_report'].to_list()) | |
| import gradio as gr | |
| import re | |
| import nltk | |
| nltk.download('stopwords') | |
| from nltk.corpus import stopwords | |
| from nltk.tokenize import TreebankWordTokenizer | |
| from tensorflow.keras.preprocessing.text import Tokenizer | |
| from tensorflow.keras.preprocessing.sequence import pad_sequences | |
| from tensorflow.keras.models import Sequential | |
| from tensorflow.keras.layers import Dense,LSTM,Embedding | |
| stop_word = set(stopwords.words('english')) | |
| word_tokenizer = TreebankWordTokenizer() | |
| def preprocess(text): | |
| text = text.lower() | |
| text = re.sub(r'[^a-z\s]','',text) | |
| tokens = word_tokenizer.tokenize(text) | |
| filtered = [word for word in tokens if word not in stop_word] | |
| return ' '.join(filtered) | |
| preprocess_doc = [preprocess(doc) for doc in titles] | |
| num_tokenizer = Tokenizer(num_words=10000,oov_token='<OOV>') | |
| num_tokenizer.fit_on_texts(preprocess_doc) | |
| seq= num_tokenizer.texts_to_sequences(preprocess_doc) | |
| padded_seq = pad_sequences(seq,maxlen=10,padding='post') | |
| model = Sequential([ | |
| Embedding(input_dim=10000,output_dim=16), | |
| LSTM(32), | |
| Dense(1,activation='sigmoid') | |
| ]) | |
| model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy']) | |
| model.fit(padded_seq,labels,epochs=50) | |
| def pre_sentiment(user_input): | |
| user = preprocess(user_input) | |
| seq_input = num_tokenizer.texts_to_sequences([user]) | |
| padded_input = pad_sequences(seq_input,maxlen=10,padding='post') | |
| prediction = model.predict(padded_input).item() | |
| result = 'CRIMINAL' if prediction>=0.5 else 'NOT CRIMINAL' | |
| return(f'{result} - Score: {prediction}') | |
| demo = gr.Interface(fn=pre_sentiment,inputs='text',outputs='text',title='Sentiment Analyst') | |
| demo.launch() |