File size: 8,600 Bytes
24c45eb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
import gradio as gr
import torch
import joblib
import pandas as pd
import numpy as np
from scipy.spatial.distance import cosine
from pathlib import Path
import os

# Define the model architecture (same as in training)
class AESModel(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim=512, num_layers=3, dropout=0.4):
        super(AESModel, self).__init__()
        self.input_layer = torch.nn.Sequential(
            torch.nn.Linear(input_dim, hidden_dim),
            torch.nn.BatchNorm1d(hidden_dim),
            torch.nn.ReLU(),
            torch.nn.Dropout(dropout)
        )
        self.hidden_layers = torch.nn.ModuleList()
        for _ in range(num_layers - 1):
            self.hidden_layers.append(torch.nn.Sequential(
                torch.nn.Linear(hidden_dim, hidden_dim),
                torch.nn.BatchNorm1d(hidden_dim),
                torch.nn.ReLU(),
                torch.nn.Dropout(dropout)
            ))
        self.output_layer = torch.nn.Sequential(
            torch.nn.Linear(hidden_dim, 1),
            torch.nn.Sigmoid()
        )

    def forward(self, x):
        x = self.input_layer(x)
        for layer in self.hidden_layers:
            x = layer(x)
        return self.output_layer(x) * 4

# Load the saved model and components
def load_components(model_dir='./final'):
    try:
        # Use CPU for Hugging Face Spaces deployment
        device = torch.device('cpu')
        
        model_data = torch.load(f'{model_dir}/final_aes_model.pt', map_location='cpu', weights_only=False)
        preprocessor = joblib.load(f'{model_dir}/preprocessor.pkl')
        jawaban_essay = pd.read_pickle(f'{model_dir}/jawaban_essay.pkl')
        training_config = joblib.load(f'{model_dir}/training_config.pkl')

        model = AESModel(
            input_dim=training_config['model_architecture']['input_dim'],
            hidden_dim=training_config['model_architecture']['hidden_dim'],
            num_layers=training_config['model_architecture']['num_layers'],
            dropout=training_config['model_architecture']['dropout']
        )
        model.load_state_dict(model_data['model_state_dict'])
        model.to(device)
        model.eval()

        return model, preprocessor, jawaban_essay, device
    except Exception as e:
        print(f"Error loading model components: {e}")
        return None, None, None, None

# Prediction function that returns multiple outputs
def predict_score(question_code, student_answer, model, preprocessor, jawaban_essay, device):
    try:
        question_code = int(question_code)
        processed_text = preprocessor.preprocess_text(student_answer)

        # Get the answer key for this question
        kunci_jawaban = jawaban_essay[jawaban_essay['Kode'] == question_code]
        if len(kunci_jawaban) == 0:
            return "Error: Question code not found", "", "", ""

        question_text = kunci_jawaban['Pertanyaan'].values[0]
        key_answer = kunci_jawaban['Jawaban'].values[0]

        # Extract features
        linguistic_features = preprocessor.extract_linguistic_features(processed_text)
        embedding = preprocessor.get_sbert_embedding(processed_text)
        embedding_kunci = preprocessor.get_sbert_embedding(kunci_jawaban['processed_kunci_jawaban'].values[0])
        similarity = 1 - cosine(embedding, embedding_kunci)

        # Prepare feature vector
        features = {
            **{f'sbert_{i}': val for i, val in enumerate(embedding)},
            **{f'ling_{k}': v for k, v in linguistic_features.items()},
            'similarity': similarity
        }
        feature_values = np.array(list(features.values())).astype(np.float32).reshape(1, -1)

        # Make prediction
        with torch.no_grad():
            input_tensor = torch.FloatTensor(feature_values).to(device)
            prediction = model(input_tensor).item()
            prediction = min(max(prediction, 0), 4)
            prediction = round(prediction, 2)

        return (f"Predicted Score: {prediction:.2f}/4.00",
                f"Pertanyaan: {question_text}",
                f"Kunci Jawaban: {key_answer}",
                f"Similarity dengan Kunci Jawaban: {similarity:.3f}")
    except Exception as e:
        return f"Error: {str(e)}", "", "", ""

# Load components once at startup
print("Loading model components...")
model, preprocessor, jawaban_essay, device = load_components()

if model is not None:
    print("Model components loaded successfully!")
else:
    print("Failed to load model components. Running in demo mode.")

# Create Gradio interface with multiple outputs
def gradio_predict(question_code, student_answer):
    if model is None or preprocessor is None:
        return ("Error: Model not loaded", "Model tidak dapat dimuat", "", "")
    return predict_score(question_code, student_answer, model, preprocessor, jawaban_essay, device)

# Check if components are loaded before creating interface
if jawaban_essay is not None:
    # List of available question codes and their questions
    question_info = [(f"{row['Kode']} - {row['Pertanyaan'][:50]}...", row['Kode'])
                     for _, row in jawaban_essay.iterrows()]
else:
    # Fallback question info for demo
    question_info = [("Demo - Model tidak dapat dimuat", 1)]

# Create the Gradio app with enhanced display
with gr.Blocks(title="Auto-Scoring Essay Indonesia", theme=gr.themes.Soft()) as app:
    gr.Markdown("# πŸŽ“ Sistem Auto-Scoring Essay Bahasa Indonesia")
    gr.Markdown("Pilih pertanyaan dan masukkan jawaban siswa untuk mendapatkan prediksi nilai otomatis menggunakan AI.")
    
    if model is None:
        gr.Markdown("⚠️ **Model sedang dimuat atau tidak tersedia. Silakan coba lagi nanti.**")

    with gr.Row():
        with gr.Column():
            question_input = gr.Dropdown(
                label="πŸ“ Pilih Pertanyaan",
                choices=[info[0] for info in question_info],
                value=question_info[0][0] if question_info else None
            )
            actual_code = gr.Textbox(visible=False)  # Hidden field to store actual code
            answer_input = gr.Textbox(
                label="✍️ Jawaban Siswa",
                placeholder="Masukkan jawaban siswa di sini...",
                lines=8,
                max_lines=15
            )
            submit_btn = gr.Button("πŸ” Prediksi Nilai", variant="primary", size="lg")

        with gr.Column():
            output_score = gr.Textbox(label="πŸ“Š Hasil Prediksi", interactive=False)
            output_question = gr.Textbox(label="❓ Detail Pertanyaan", interactive=False)
            output_key = gr.Textbox(label="πŸ”‘ Kunci Jawaban", interactive=False)
            output_similarity = gr.Textbox(label="πŸ“ˆ Similarity Score", interactive=False)

    # Function to update hidden code when question selection changes
    def update_code(question_choice):
        for info in question_info:
            if info[0] == question_choice:
                return info[1]
        return question_info[0][1] if question_info else 1

    question_input.change(update_code, inputs=question_input, outputs=actual_code)

    submit_btn.click(
        fn=gradio_predict,
        inputs=[actual_code, answer_input],
        outputs=[output_score, output_question, output_key, output_similarity]
    )

    # Add some examples if model is loaded
    if jawaban_essay is not None and len(question_info) > 0:
        gr.Markdown("## πŸ’‘ Contoh Jawaban")
        examples = [
            [question_info[0][0], "Ancaman keamanan jaringan meliputi malware seperti virus dan trojan, serangan DDoS yang dapat melumpuhkan server, serta upaya phishing untuk mencuri data pribadi."],
            [question_info[0][0], "Kebocoran data, penyusupan akun, penghapusan data, spam dan phising merupakan ancaman utama dalam keamanan jaringan komputer."],
        ]
        
        if len(question_info) > 1:
            examples.append([
                question_info[1][0],
                "Jaringan komputer adalah kumpulan komputer yang terhubung untuk berbagi sumber daya seperti file, printer, dan koneksi internet."
            ])
            
        gr.Examples(
            examples=examples,
            inputs=[question_input, answer_input],
            outputs=[output_score, output_question, output_key, output_similarity],
            fn=gradio_predict,
            cache_examples=False
        )
    
    gr.Markdown("---")
    gr.Markdown("*Sistem ini menggunakan AI untuk memberikan penilaian otomatis pada essay berbahasa Indonesia.*")


# Launch the app
if __name__ == "__main__":
    app.launch()