Spaces:
Build error
Build error
| import joblib | |
| import gradio as gr | |
| from datasets import Dataset, DatasetDict, load_dataset | |
| from huggingface_hub import login | |
| import os | |
| token = os.getenv('HF_TOKEN') | |
| login(token, add_to_git_credential=True,write_permission=True ) | |
| model = joblib.load('arabic_text_classifier.pkl') | |
| vectorizer = joblib.load('tfidf_vectorizer.pkl') | |
| label_encoder = joblib.load('label_encoder.pkl') | |
| available_labels = label_encoder.classes_ | |
| def predict_category(text): | |
| text_vector = vectorizer.transform([text]) | |
| probabilities = model.predict_proba(text_vector)[0] | |
| max_prob = max(probabilities) | |
| predicted_category = model.predict(text_vector)[0] | |
| if max_prob < 0.5: | |
| return "Other" | |
| predicted_label = label_encoder.inverse_transform([predicted_category])[0] | |
| return predicted_label | |
| def flag_data(text, prediction): | |
| try: | |
| dataset = load_dataset("Tevfik34/crowdsourced-text-classification-data", split="train") | |
| except: | |
| dataset = Dataset.from_dict({"text": [], "prediction": []}) | |
| new_data = {"text": [text], "prediction": [prediction]} | |
| dataset = dataset.add_item(new_data) | |
| dataset.push_to_hub("Tevfik34/crowdsourced-text-classification-data") | |
| def classify_and_flag(text): | |
| prediction = predict_category(text) | |
| flag_data(text, prediction) | |
| return prediction | |
| interface = gr.Interface(fn=classify_and_flag, | |
| inputs=gr.Textbox(lines=5, placeholder= "Enter text in Arabic here...", label="Text" ), | |
| outputs=gr.Label(label="Predicted Category"), | |
| title="Arabic Text Classifier", | |
| description=""" | |
| This interface allows you to classify Arabic text into different categories using a machine learning model trained on 160,000 real-world text samples. | |
| **Model Overview**: | |
| - The model is based on **Logistic Regression**. | |
| - It was trained on a large dataset of **160,000 Arabic text entries**, ensuring robustness and accuracy in classifying Arabic text. | |
| **How to use**: | |
| - Enter any Arabic text in the input box. | |
| - The model will predict the category that the text most likely belongs to. | |
| - If the model is uncertain, it will classify the text as 'Other'. | |
| **Available Labels**: | |
| The model can predict the following categories: | |
| - {} | |
| Try entering some text in Arabic to see how the model works. | |
| """.format(", ".join(available_labels)),theme="ParityError/Interstellar") | |
| interface.launch() |