import pandas as pd import numpy as np from simpletransformers.classification import ClassificationModel from sklearn.model_selection import train_test_split import matplotlib.pyplot as plt from collections import Counter import nltk from nltk.corpus import stopwords import re import string import gradio as gr nltk.download('stopwords') stop_words_list = stopwords.words('turkish') false_text = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] def preprocess_text(text): # Küçük harflere çevirme text = text.lower() # Satır sonu karakterlerini kaldırma import re text = re.sub(r'\n', ' ', text) # Rakamları kaldırma text = re.sub(r'\d', '', text) # Noktalama işaretlerini kaldırma import string text = text.translate(str.maketrans("", "", string.punctuation)) # Stop-words'leri kaldırma words = text.split() words = [word for word in words if not word in stop_words_list] # Veri setindeki hatalı verilerin kaldırılması words = [word for word in words if not word in false_text] # Tekrarlanan karakterlerin kaldırılması words = [re.sub(r'(.)\1{1,}', r'\1\1', word) for word in words] # Tekrarlanan boşlukların kaldırılması words = [word.strip() for word in words if len(word.strip()) > 1] text = " ".join(words) return text def predict(texts): model_path = "bert_model" model = ClassificationModel('bert', model_path, use_cuda=False) predictions, _ = model.predict(texts) return [result_predict(prediction) for prediction in predictions] def result_predict(num): if num == 4: return 'OTHER' elif num == 1: return 'RACIST' elif num == 0: return 'INSULT' elif num == 3: return 'PROFANITY' elif num == 2: return 'SEXIST' # Gradio Interface oluşturun iface = gr.Interface( fn=predict, inputs="text", outputs=["text", "text"], live=True, layout="horizontal", title="BERT Text Classification" ) iface.launch()