Spaces:

srini047
/

saliency-profanity-detection

Sleeping

App Files Files Community

saliency-profanity-detection / salient.py

srini047

Upload 5 files

de1732e over 2 years ago

raw

history blame contribute delete

2.08 kB

	"""# MODEL BUILDING"""
	# Commented out IPython magic to ensure Python compatibility.
	import numpy as np # For linear algebra
	import pandas as pd # Data processing, CSV file I/O (e.g. pd.read_csv)
	# import matplotlib.pyplot as plt # For Visualisation
	# %matplotlib inline
	# import seaborn as sns # For Visualisation
	# from bs4 import BeautifulSoup # For Text Parsing
	# from ydata_profiling import ProfileReport # For generating data report

	import nltk
	from nltk.corpus import stopwords
	nltk.download('stopwords')
	nltk.download('punkt')

	def remove_stopword(text):
	stopword=nltk.corpus.stopwords.words('english')
	stopword.remove('not')
	a=[w for w in nltk.word_tokenize(text) if w not in stopword]
	return ' '.join(a)
	#data['Extracted text'] = data['Extracted text'].apply(remove_stopword)

	data = pd.read_csv('train-cleaned.csv')
	data

	import nltk #Natural Language Processing Toolkit
	def punc_clean(text):
	import string as st
	a=[w for w in text if w not in st.punctuation]
	return ''.join(a)
	data[''] = data['Extracted text'].apply(punc_clean)
	#data.head(2)

	from sklearn.feature_extraction.text import TfidfVectorizer

	vectr = TfidfVectorizer(ngram_range=(1,2),min_df=1)
	vectr.fit(data['Extracted text'])

	vect_X = vectr.transform(data['Extracted text'])

	#from sklearn.linear_model import LogisticRegression

	from sklearn.svm import SVC
	from sklearn.linear_model import LogisticRegression
	from sklearn.ensemble import VotingClassifier

	svm_classifier = SVC(kernel='linear', probability=True)
	logistic_classifier = LogisticRegression()


	model = VotingClassifier(estimators=[
	('svm', svm_classifier),
	('logistic', logistic_classifier)
	], voting='hard')


	clf=model.fit(vect_X,data['saliency'])
	# clf.score(vect_X, data['saliency'])*100

	# """# PREDICTION"""

	# clf.predict(vectr.transform(['''thank you ''']))

	# clf.predict(vectr.transform(['''Theres no trailers or nothing on the other side of me and its been facing away from my trailer straight''']))

	# clf.predict(vectr.transform([''' I dont think that should really matter Um''']))