|
|
|
|
|
import gradio as gr |
|
|
import torch |
|
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
|
import joblib |
|
|
import json |
|
|
|
|
|
|
|
|
model = AutoModelForSequenceClassification.from_pretrained("Woolv7007/egyptian-text-classification") |
|
|
tokenizer = AutoTokenizer.from_pretrained("Woolv7007/egyptian-text-classification") |
|
|
|
|
|
|
|
|
label_encoder = joblib.load("label_encoder.pkl") |
|
|
with open("labels.json", encoding="utf-8") as f: |
|
|
labels = json.load(f) |
|
|
|
|
|
|
|
|
def predict(text): |
|
|
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True) |
|
|
with torch.no_grad(): |
|
|
outputs = model(**inputs) |
|
|
probs = torch.softmax(outputs.logits, dim=1).squeeze().tolist() |
|
|
return {label: round(probs[i], 3) for i, label in enumerate(labels)} |
|
|
|
|
|
gr.Interface( |
|
|
fn=predict, |
|
|
inputs=gr.Textbox(lines=3, placeholder="Write an Egyptian Arabic sentence..."), |
|
|
outputs=gr.Label(num_top_classes=3), |
|
|
title="Egyptian Arabic Text Classification" |
|
|
).launch(share=True) |
|
|
|