File size: 5,568 Bytes
3f29e4d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import gradio as gr
import joblib
import torch
import torch.nn as nn
import torch.nn.functional as F
import json
from transformers import AutoTokenizer, AutoModelForSequenceClassification

print("Pokrećem aplikaciju...")

# --- Učitavanje SVM pipelinea ---
print("Učitavam SVM pipeline...")
svm_pipeline = joblib.load("svm_pipeline.pkl")

# --- Učitavanje riječnika za CNN i GRU ---
print("Učitavam riječnik...")
with open("word2idx.json", "r", encoding="utf-8") as f:
    word2idx = json.load(f)

# --- Definicija CNN modela ---
class CNNModel(nn.Module):
    def __init__(self, vocab_size, embed_dim=300, num_classes=3, kernel_sizes=[3,4,5], num_filters=128):
        super(CNNModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=0)
        self.convs = nn.ModuleList([
            nn.Conv2d(1, num_filters, (k, embed_dim)) for k in kernel_sizes
        ])
        self.dropout = nn.Dropout(0.5)
        self.fc = nn.Linear(num_filters * len(kernel_sizes), num_classes)

    def forward(self, x):
        x = self.embedding(x).unsqueeze(1)
        convs = [F.relu(conv(x)).squeeze(3) for conv in self.convs]
        pools = [F.max_pool1d(c, c.size(2)).squeeze(2) for c in convs]
        x = torch.cat(pools, 1)
        x = self.dropout(x)
        return self.fc(x)

# --- Definicija GRU modela ---
class GRUModel(nn.Module):
    def __init__(self, vocab_size, embed_dim=300, hidden_dim=256, num_layers=1, num_classes=3):
        super(GRUModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=0)
        self.gru = nn.GRU(embed_dim, hidden_dim, num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, num_classes)

    def forward(self, x):
        x = self.embedding(x)
        _, h_n = self.gru(x)
        out = self.fc(h_n[-1])
        return out

# --- Učitavanje CNN i GRU modela ---
vocab_size = len(word2idx) + 1
embed_dim = 300
num_classes = 3

print("Učitavam CNN model...")
cnn_model = CNNModel(vocab_size, embed_dim, num_classes)
cnn_model.load_state_dict(torch.load("cnn_model.pt", map_location=torch.device('cpu')))
cnn_model.eval()

print("Učitavam GRU model...")
gru_model = GRUModel(vocab_size, embed_dim, hidden_dim=256, num_layers=1, num_classes=num_classes)
gru_model.load_state_dict(torch.load("gru_model.pt", map_location=torch.device('cpu')))
gru_model.eval()

# --- Učitavanje BERTić modela i tokenizer ---
print("Učitavam BERTić model i tokenizer...")
bert_tokenizer = AutoTokenizer.from_pretrained("my_finetuned_model")
bert_model = AutoModelForSequenceClassification.from_pretrained("my_finetuned_model")
bert_model.eval()

# --- Pretvaranje teksta u indekse za CNN i GRU ---
def text_to_indices(text, max_len=100):
    tokens = text.lower().split()
    print(f"Tokeni: {tokens}")
    indices = [word2idx.get(token, 0) for token in tokens]
    print(f"Indeksi: {indices}")
    if len(indices) < max_len:
        indices += [0] * (max_len - len(indices))
    else:
        indices = indices[:max_len]
    tensor = torch.tensor([indices], dtype=torch.long)
    print(f"Tensor shape: {tensor.shape}")
    return tensor

# --- Funkcije za predikciju ---

def predict_svm(text):
    print(f"Predikcija SVM za tekst: {text}")
    proba = svm_pipeline.predict_proba([text])[0]
    pred = svm_pipeline.classes_[proba.argmax()]
    print(f"SVM predikcija: {pred}, povjerenje: {proba.max():.2f}")
    return f"{pred} (p={proba.max():.2f})"

def predict_cnn(text):
    print(f"Predikcija CNN za tekst: {text}")
    with torch.no_grad():
        inputs = text_to_indices(text)
        outputs = cnn_model(inputs)
        print(f"CNN output: {outputs}")
        probs = F.softmax(outputs, dim=1)
        pred = torch.argmax(probs, dim=1).item()
        confidence = probs[0][pred].item()
    print(f"CNN predikcija: {pred}, povjerenje: {confidence:.2f}")
    return f"{pred} (p={confidence:.2f})"

def predict_gru(text):
    print(f"Predikcija GRU za tekst: {text}")
    with torch.no_grad():
        inputs = text_to_indices(text)
        outputs = gru_model(inputs)
        print(f"GRU output: {outputs}")
        probs = F.softmax(outputs, dim=1)
        pred = torch.argmax(probs, dim=1).item()
        confidence = probs[0][pred].item()
    print(f"GRU predikcija: {pred}, povjerenje: {confidence:.2f}")
    return f"{pred} (p={confidence:.2f})"

def predict_bert(text):
    print(f"Predikcija BERTić za tekst: {text}")
    inputs = bert_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = bert_model(**inputs)
        print(f"BERTić output logits: {outputs.logits}")
        probs = F.softmax(outputs.logits, dim=1)
        pred = torch.argmax(probs, dim=1).item()
        confidence = probs[0][pred].item()
    print(f"BERTić predikcija: {pred}, povjerenje: {confidence:.2f}")
    return f"{pred} (p={confidence:.2f})"

# --- Gradio sučelje ---
def predict_all(text):
    return (
        predict_svm(text),
        predict_cnn(text),
        predict_gru(text),
        predict_bert(text)
    )

demo = gr.Interface(
    fn=predict_all,
    inputs=gr.Textbox(lines=3, placeholder="Upiši tekst za klasifikaciju..."),
    outputs=[
        gr.Textbox(label="SVM (RBF)"),
        gr.Textbox(label="CNN"),
        gr.Textbox(label="GRU"),
        gr.Textbox(label="BERTić")
    ],
    title="Demo klasifikacije teksta",
    description="Predikcije koriste SVM, CNN, GRU i BERTić modele."
)

if __name__ == "__main__":
    demo.launch(share=True, debug=True)