Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| from simpletransformers.classification import ClassificationModel | |
| from sklearn.model_selection import train_test_split | |
| import matplotlib.pyplot as plt | |
| from collections import Counter | |
| import nltk | |
| from nltk.corpus import stopwords | |
| import re | |
| import string | |
| import gradio as gr | |
| nltk.download('stopwords') | |
| stop_words_list = stopwords.words('turkish') | |
| false_text = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] | |
| def preprocess_text(text): | |
| # Küçük harflere çevirme | |
| text = text.lower() | |
| # Satır sonu karakterlerini kaldırma | |
| import re | |
| text = re.sub(r'\n', ' ', text) | |
| # Rakamları kaldırma | |
| text = re.sub(r'\d', '', text) | |
| # Noktalama işaretlerini kaldırma | |
| import string | |
| text = text.translate(str.maketrans("", "", string.punctuation)) | |
| # Stop-words'leri kaldırma | |
| words = text.split() | |
| words = [word for word in words if not word in stop_words_list] | |
| # Veri setindeki hatalı verilerin kaldırılması | |
| words = [word for word in words if not word in false_text] | |
| # Tekrarlanan karakterlerin kaldırılması | |
| words = [re.sub(r'(.)\1{1,}', r'\1\1', word) for word in words] | |
| # Tekrarlanan boşlukların kaldırılması | |
| words = [word.strip() for word in words if len(word.strip()) > 1] | |
| text = " ".join(words) | |
| return text | |
| def predict(texts): | |
| model_path = "bert_model" | |
| model = ClassificationModel('bert', model_path, use_cuda=False) | |
| predictions, _ = model.predict(texts) | |
| return [result_predict(prediction) for prediction in predictions] | |
| def result_predict(num): | |
| if num == 4: | |
| return 'OTHER' | |
| elif num == 1: | |
| return 'RACIST' | |
| elif num == 0: | |
| return 'INSULT' | |
| elif num == 3: | |
| return 'PROFANITY' | |
| elif num == 2: | |
| return 'SEXIST' | |
| # Gradio Interface oluşturun | |
| iface = gr.Interface( | |
| fn=predict, | |
| inputs="text", | |
| outputs=["text", "text"], | |
| live=True, | |
| layout="horizontal", | |
| title="BERT Text Classification" | |
| ) | |
| iface.launch() | |