| import gradio as gr |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification |
| import torch |
| import pandas as pd |
|
|
| |
| model_name = "SciTensor/e5-base-title-classifier-ukr" |
| tokenizer = AutoTokenizer.from_pretrained(model_name) |
| model = AutoModelForSequenceClassification.from_pretrained(model_name) |
|
|
| df = pd.read_csv('labels_categories_ukr.csv') |
| labels = dict(zip(df["label"], df["category_ukr"])) |
|
|
| def classify_product_title(text): |
| inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True) |
| with torch.no_grad(): |
| logits = model(**inputs).logits |
| prediction = logits.argmax(dim=-1).item() |
| return labels[prediction] |
|
|
| iface = gr.Interface( |
| fn=classify_product_title, |
| inputs=gr.Textbox(lines=2, placeholder="Enter a product title...", label="Product title (Ukrainian)"), |
| outputs=gr.Label(num_top_classes=1, label="Category (Ukrainian)"), |
| title="Product Title Categorizer (Fine-tuned E5 Model)", |
| description="Classify messy product titles into categories. Optimized for Ukrainian/retail titles." |
| ) |
|
|
| iface.launch() |
|
|