Spaces:

revaza
/

tfidf_hosting

Sleeping

App Files Files Community

tfidf_hosting / app.py

revaza

Update app.py

bc5fd0a verified 8 months ago

raw

history blame contribute delete

2.66 kB

	import gradio as gr
	import joblib
	import requests
	import os
	from lime.lime_text import LimeTextExplainer

	# Constants
	CLASSES = ["Non-Hate Speech", "Hate Speech"]
	STOPWORDS = {
	"კი", "არა", "და", "რომ", "რადგან", "ის", "ეს", "რო", "მას", "მისი",
	"შენი", "ჩემი", "რად", "რატომ" "მერე", "ან", "აუ", "ამის", "იმის",
	"რომც", "ეე", "ეეე", "ხარ", "ვარ", "როგორც", "რაც", "როდესაც",
	"სადაც", "თუ", "რა", "რომელი", "რომლიც", "როდის", "რაღა", "მაგრამ",
	"არ", "აქ", "იქ", "შემდეგ", "სად", "მე", "შენ", "თქვენ", "მიერ",
	"ვინ", "როგორ", "თუნდაც", "რათა", "ისინი", "ვინც", "რატო",
	}

	MODEL_URL = "https://raw.githubusercontent.com/RevazRevazashvili/geo-hate-speech-analysis/main/models/tfidf_logreg_classifier.pkl"
	MODEL_PATH = "tfidf_logreg_classifier.pkl"

	# Download model if not exists
	if not os.path.exists(MODEL_PATH):
	r = requests.get(MODEL_URL)
	with open(MODEL_PATH, "wb") as f:
	f.write(r.content)

	model = joblib.load(MODEL_PATH)

	def is_undecided(prob):
	return 0.35 < prob < 0.7

	def get_hate_words(text):
	explainer = LimeTextExplainer(class_names=CLASSES)
	predict_fn = lambda x: model.predict_proba(x)

	try:
	explanation = explainer.explain_instance(text, predict_fn, num_features=10)
	influential_words = explanation.as_list()
	filtered = [(word, score) for word, score in influential_words if word not in STOPWORDS]
	except:
	filtered = []

	pred = int(model.predict([text])[0])
	prob = model.predict_proba([text])[0][-1]
	pred_class = CLASSES[pred]

	if is_undecided(prob):
	return []

	if pred_class == "Hate Speech":
	return [word for word, score in filtered if score > 0]

	return []

	def api_predict(text):
	words = get_hate_words(text)
	return {"hate_words": words}

	with gr.Blocks() as demo:
	input_text = gr.Textbox(label="Enter Georgian text")
	output_json = gr.JSON(label="Detected Hate Words")
	submit_btn = gr.Button("Predict")

	submit_btn.click(
	fn=api_predict,
	inputs=input_text,
	outputs=output_json,
	api_name="predict" # This is the key addition
	)

	if __name__ == "__main__":
	demo.launch(share=True, server_name="0.0.0.0", server_port=7860, ssr_mode=False)