|
|
import gradio as gr |
|
|
import joblib |
|
|
import torch |
|
|
import torch.nn as nn |
|
|
import torch.nn.functional as F |
|
|
import json |
|
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
|
|
|
|
print("Pokrećem aplikaciju...") |
|
|
|
|
|
|
|
|
print("Učitavam SVM pipeline...") |
|
|
svm_pipeline = joblib.load("svm_pipeline.pkl") |
|
|
|
|
|
|
|
|
print("Učitavam riječnik...") |
|
|
with open("word2idx.json", "r", encoding="utf-8") as f: |
|
|
word2idx = json.load(f) |
|
|
|
|
|
|
|
|
class CNNModel(nn.Module): |
|
|
def __init__(self, vocab_size, embed_dim=300, num_classes=3, kernel_sizes=[3,4,5], num_filters=128): |
|
|
super(CNNModel, self).__init__() |
|
|
self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=0) |
|
|
self.convs = nn.ModuleList([ |
|
|
nn.Conv2d(1, num_filters, (k, embed_dim)) for k in kernel_sizes |
|
|
]) |
|
|
self.dropout = nn.Dropout(0.5) |
|
|
self.fc = nn.Linear(num_filters * len(kernel_sizes), num_classes) |
|
|
|
|
|
def forward(self, x): |
|
|
x = self.embedding(x).unsqueeze(1) |
|
|
convs = [F.relu(conv(x)).squeeze(3) for conv in self.convs] |
|
|
pools = [F.max_pool1d(c, c.size(2)).squeeze(2) for c in convs] |
|
|
x = torch.cat(pools, 1) |
|
|
x = self.dropout(x) |
|
|
return self.fc(x) |
|
|
|
|
|
|
|
|
class GRUModel(nn.Module): |
|
|
def __init__(self, vocab_size, embed_dim=300, hidden_dim=256, num_layers=1, num_classes=3): |
|
|
super(GRUModel, self).__init__() |
|
|
self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=0) |
|
|
self.gru = nn.GRU(embed_dim, hidden_dim, num_layers=num_layers, batch_first=True) |
|
|
self.fc = nn.Linear(hidden_dim, num_classes) |
|
|
|
|
|
def forward(self, x): |
|
|
x = self.embedding(x) |
|
|
_, h_n = self.gru(x) |
|
|
out = self.fc(h_n[-1]) |
|
|
return out |
|
|
|
|
|
|
|
|
vocab_size = len(word2idx) + 1 |
|
|
embed_dim = 300 |
|
|
num_classes = 3 |
|
|
|
|
|
print("Učitavam CNN model...") |
|
|
cnn_model = CNNModel(vocab_size, embed_dim, num_classes) |
|
|
cnn_model.load_state_dict(torch.load("cnn_model.pt", map_location=torch.device('cpu'))) |
|
|
cnn_model.eval() |
|
|
|
|
|
print("Učitavam GRU model...") |
|
|
gru_model = GRUModel(vocab_size, embed_dim, hidden_dim=256, num_layers=1, num_classes=num_classes) |
|
|
gru_model.load_state_dict(torch.load("gru_model.pt", map_location=torch.device('cpu'))) |
|
|
gru_model.eval() |
|
|
|
|
|
|
|
|
print("Učitavam BERTić model i tokenizer...") |
|
|
bert_tokenizer = AutoTokenizer.from_pretrained("my_finetuned_model") |
|
|
bert_model = AutoModelForSequenceClassification.from_pretrained("my_finetuned_model") |
|
|
bert_model.eval() |
|
|
|
|
|
|
|
|
def text_to_indices(text, max_len=100): |
|
|
tokens = text.lower().split() |
|
|
print(f"Tokeni: {tokens}") |
|
|
indices = [word2idx.get(token, 0) for token in tokens] |
|
|
print(f"Indeksi: {indices}") |
|
|
if len(indices) < max_len: |
|
|
indices += [0] * (max_len - len(indices)) |
|
|
else: |
|
|
indices = indices[:max_len] |
|
|
tensor = torch.tensor([indices], dtype=torch.long) |
|
|
print(f"Tensor shape: {tensor.shape}") |
|
|
return tensor |
|
|
|
|
|
|
|
|
|
|
|
def predict_svm(text): |
|
|
print(f"Predikcija SVM za tekst: {text}") |
|
|
proba = svm_pipeline.predict_proba([text])[0] |
|
|
pred = svm_pipeline.classes_[proba.argmax()] |
|
|
print(f"SVM predikcija: {pred}, povjerenje: {proba.max():.2f}") |
|
|
return f"{pred} (p={proba.max():.2f})" |
|
|
|
|
|
def predict_cnn(text): |
|
|
print(f"Predikcija CNN za tekst: {text}") |
|
|
with torch.no_grad(): |
|
|
inputs = text_to_indices(text) |
|
|
outputs = cnn_model(inputs) |
|
|
print(f"CNN output: {outputs}") |
|
|
probs = F.softmax(outputs, dim=1) |
|
|
pred = torch.argmax(probs, dim=1).item() |
|
|
confidence = probs[0][pred].item() |
|
|
print(f"CNN predikcija: {pred}, povjerenje: {confidence:.2f}") |
|
|
return f"{pred} (p={confidence:.2f})" |
|
|
|
|
|
def predict_gru(text): |
|
|
print(f"Predikcija GRU za tekst: {text}") |
|
|
with torch.no_grad(): |
|
|
inputs = text_to_indices(text) |
|
|
outputs = gru_model(inputs) |
|
|
print(f"GRU output: {outputs}") |
|
|
probs = F.softmax(outputs, dim=1) |
|
|
pred = torch.argmax(probs, dim=1).item() |
|
|
confidence = probs[0][pred].item() |
|
|
print(f"GRU predikcija: {pred}, povjerenje: {confidence:.2f}") |
|
|
return f"{pred} (p={confidence:.2f})" |
|
|
|
|
|
def predict_bert(text): |
|
|
print(f"Predikcija BERTić za tekst: {text}") |
|
|
inputs = bert_tokenizer(text, return_tensors="pt", truncation=True, padding=True) |
|
|
with torch.no_grad(): |
|
|
outputs = bert_model(**inputs) |
|
|
print(f"BERTić output logits: {outputs.logits}") |
|
|
probs = F.softmax(outputs.logits, dim=1) |
|
|
pred = torch.argmax(probs, dim=1).item() |
|
|
confidence = probs[0][pred].item() |
|
|
print(f"BERTić predikcija: {pred}, povjerenje: {confidence:.2f}") |
|
|
return f"{pred} (p={confidence:.2f})" |
|
|
|
|
|
|
|
|
def predict_all(text): |
|
|
return ( |
|
|
predict_svm(text), |
|
|
predict_cnn(text), |
|
|
predict_gru(text), |
|
|
predict_bert(text) |
|
|
) |
|
|
|
|
|
demo = gr.Interface( |
|
|
fn=predict_all, |
|
|
inputs=gr.Textbox(lines=3, placeholder="Upiši tekst za klasifikaciju..."), |
|
|
outputs=[ |
|
|
gr.Textbox(label="SVM (RBF)"), |
|
|
gr.Textbox(label="CNN"), |
|
|
gr.Textbox(label="GRU"), |
|
|
gr.Textbox(label="BERTić") |
|
|
], |
|
|
title="Demo klasifikacije teksta", |
|
|
description="Predikcije koriste SVM, CNN, GRU i BERTić modele." |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch(share=True, debug=True) |
|
|
|