File size: 17,726 Bytes
c002cb1
 
 
5475ac5
c002cb1
 
 
 
 
 
 
 
5475ac5
 
 
 
 
 
 
c002cb1
 
 
5475ac5
c002cb1
 
 
 
 
 
5475ac5
 
 
 
 
 
 
 
 
 
c002cb1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5475ac5
d86cab2
 
 
 
 
fc9c42a
 
5475ac5
fc9c42a
d86cab2
fc9c42a
c002cb1
5475ac5
 
fc9c42a
d86cab2
fc9c42a
 
5475ac5
fc9c42a
d86cab2
fc9c42a
 
5475ac5
 
fc9c42a
d86cab2
fc9c42a
 
5475ac5
fc9c42a
d86cab2
fc9c42a
5475ac5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d86cab2
5475ac5
 
 
d86cab2
5475ac5
 
 
 
 
 
d86cab2
 
 
 
 
 
 
 
 
 
 
 
c002cb1
 
d86cab2
 
 
 
1b3d667
5475ac5
1b3d667
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5475ac5
1b3d667
5475ac5
1b3d667
 
 
 
 
 
 
 
5475ac5
1b3d667
 
 
 
 
 
 
5475ac5
1b3d667
 
 
 
 
 
5475ac5
1b3d667
 
 
5475ac5
1b3d667
 
 
 
5475ac5
1b3d667
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5475ac5
1b3d667
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5475ac5
 
 
 
c002cb1
 
 
 
 
5475ac5
c002cb1
 
1b3d667
c002cb1
1b3d667
 
b74f9a2
1b3d667
 
 
 
 
 
 
 
5475ac5
 
 
1b3d667
 
 
 
 
 
c002cb1
 
 
1b3d667
 
 
 
 
 
 
 
 
d86cab2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1b3d667
 
5475ac5
1b3d667
 
 
 
 
 
 
 
 
5475ac5
1b3d667
 
 
 
 
 
 
 
 
 
 
5475ac5
 
1b3d667
 
c002cb1
5475ac5
 
 
 
604852a
c002cb1
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
"""
HuggingFace Space - ESS Variable Classification Demo
Interactive Gradio interface for the XLM-RoBERTa ESS classifier.
Developed by Sikt - Norwegian Agency for Shared Services in Education and Research
"""
import gradio as gr
from transformers import pipeline

# Load the model
MODEL_NAME = "benjaminBeuster/xlm-roberta-base-ess-classification"
classifier = pipeline("text-classification", model=MODEL_NAME)

# Sikt brand colors
SIKT_COLORS = {
    "amaranth": "#ee3243",      # Primary accent
    "meteorite": "#331c6c",      # Dark
    "selago": "#f3f1fe"          # Light
}

# Category descriptions
CATEGORY_INFO = {
    "DEMOGRAPHY (POPULATION, VITAL STATISTICS, AND CENSUSES)": "Demographics, population statistics, age, gender",
    "ECONOMICS": "Economic issues, finance, income, wealth",
    "EDUCATION": "Education, schooling, qualifications",
    "HEALTH": "Healthcare, medical services, health satisfaction",
    "POLITICS": "Political systems, trust in government, parliament",
    "SOCIETY AND CULTURE": "Social issues, cultural topics, religion",
    "LABOUR AND EMPLOYMENT": "Work, occupation, employment status",
    "PSYCHOLOGY": "Mental health, psychological wellbeing",
    "HOUSING AND LAND USE": "Housing conditions, residential environment",
    "NATURAL ENVIRONMENT": "Environmental concerns, climate change",
    "LAW, CRIME AND LEGAL SYSTEMS": "Justice, crime, legal matters",
    "MEDIA, COMMUNICATION AND LANGUAGE": "Media use, communication patterns",
    "SOCIAL STRATIFICATION AND GROUPINGS": "Social class, inequality, social groups",
    "SOCIAL WELFARE POLICY AND SYSTEMS": "Social benefits, welfare services",
    "TRANSPORT AND TRAVEL": "Transportation, mobility, travel patterns",
    "TRADE, INDUSTRY AND MARKETS": "Business, commerce, markets",
    "SCIENCE AND TECHNOLOGY": "Scientific advancement, technology use",
    "HISTORY": "Historical events, memory, heritage",
    "OTHER": "General or uncategorized topics"
}

def classify_text(text):
    """Classify survey question/variable."""
    if not text.strip():
        return "Please enter some text to classify."

    result = classifier(text)[0]
    label = result['label']
    score = result['score']

    # Format output
    output = f"**Category:** {label}\n\n"
    output += f"**Confidence:** {score:.2%}\n\n"

    if label in CATEGORY_INFO:
        output += f"**Description:** {CATEGORY_INFO[label]}"

    return output

# Example questions - mix of actual ESS data and generated diverse questions
# Using exact category names from pydantic_classes.py
import random

all_examples = [
    # EDUCATION
    ["What is the highest level of education you have successfully completed?"],
    ["What is the highest level of education your mother successfully completed?"],
    ["How many years of full-time education have you completed?"],

    # POLITICS
    ["Which party did you vote for in the last national election?"],
    ["Trust in country's parliament"],
    ["How satisfied are you with the way democracy works in your country?"],
    ["How much do you trust the legal system?"],

    # HEALTH
    ["How satisfied are you with the healthcare system?"],
    ["Which health problems that you had in the last 12 months hampered you in your daily activities?"],
    ["How is your health in general - very good, good, fair, bad, or very bad?"],

    # LABOUR AND EMPLOYMENT
    ["What best describes what you have been doing for the last 7 days - in paid work?"],
    ["Which description best describes the sort of work your mother did when you were 14?"],
    ["How many hours do you normally work per week in your main job?"],
    ["Are you a member of a trade union or similar organization?"],

    # SOCIETY AND CULTURE
    ["How often do you pray apart from at religious services?"],
    ["How important is it to always behave properly and avoid doing anything people would say is wrong?"],
    ["Do you consider yourself as belonging to any particular religion or denomination?"],

    # DEMOGRAPHY (POPULATION, VITAL STATISTICS, AND CENSUSES)
    ["What is your age?"],
    ["What is your gender?"],
    ["What is your current legal marital status?"],
    ["In which country were you born?"],

    # ECONOMICS
    ["Which of the descriptions on this card comes closest to how you feel about your household's income nowadays?"],
    ["What is your household's total net income from all sources?"],

    # PSYCHOLOGY
    ["Taking all things together, how happy would you say you are?"],
    ["Have you felt depressed or sad in the last two weeks?"],
    ["How often do you feel stressed?"],

    # NATURAL ENVIRONMENT
    ["How worried are you about climate change?"],
    ["To what extent do you think climate change is caused by human activity?"],

    # LAW, CRIME AND LEGAL SYSTEMS
    ["How safe do you feel walking alone at night in your local area?"],
    ["Have you or a member of your household been a victim of burglary or assault in the last 5 years?"],

    # MEDIA, COMMUNICATION AND LANGUAGE
    ["How much time do you spend watching television on an average weekday?"],
    ["How often do you use the internet for news?"],

    # SOCIAL STRATIFICATION AND GROUPINGS
    ["In society there are groups which tend to be towards the top and groups which tend to be towards the bottom. Where would you place yourself?"],
    ["Do you belong to any discriminated group in this country?"],

    # HOUSING AND LAND USE
    ["Do you rent or own your accommodation?"],
    ["How many rooms do you have for your household's use only?"],

    # SOCIAL WELFARE POLICY AND SYSTEMS
    ["Should the government reduce income differences?"],
    ["How satisfied are you with the state of social benefits in your country?"],

    # TRANSPORT AND TRAVEL
    ["How long does your daily commute to work take?"],
    ["What is your main mode of transportation?"],

    # SCIENCE AND TECHNOLOGY
    ["To what extent do you think scientific advances benefit society?"],
    ["How often do you use a smartphone or tablet?"],

    # HISTORY
    ["What do you think about your country's colonial past?"],
    ["How important is it to preserve historical monuments?"],

    # TRADE, INDUSTRY AND MARKETS
    ["Do you work in the private or public sector?"],
    ["How do you feel about free trade agreements?"],

    # OTHER
    ["What are your thoughts on the future?"],
    ["How do you define quality of life?"],
]

# Shuffle and select 30 examples
random.seed(42)  # For reproducibility
examples = random.sample(all_examples, min(30, len(all_examples)))

# Custom CSS for Sikt branding using design tokens
custom_css = """
:root {
    /* Sikt Design Tokens */
    --sds-color-text-primary: #1a1a1a;
    --sds-color-text-secondary: #331c6c;
    --sds-color-interaction-primary: #7d5da6;
    --sds-color-interaction-primary-hover: #6b4d94;
    --sds-color-layout-background-default: #ffffff;
    --sds-color-layout-background-subtle: #f3f1fe;
    --sds-color-accent-primary: #ee3243;
    --sds-space-gap-small: 0.5rem;
    --sds-space-gap-medium: 1rem;
    --sds-space-gap-large: 1.5rem;
    --sds-space-padding-small: 0.75rem;
    --sds-space-padding-medium: 1rem;
    --sds-space-padding-large: 1.5rem;
    --sds-space-border-radius-small: 4px;
    --sds-space-border-radius-medium: 8px;
    --sds-space-border-radius-large: 12px;
}

.gradio-container {
    font-family: "Source Sans Pro", -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif !important;
}

h1, .gr-title {
    color: var(--sds-color-text-secondary) !important;
    font-weight: 600 !important;
}

.gr-box {
    border-radius: var(--sds-space-border-radius-medium) !important;
}

.gr-button {
    background-color: var(--sds-color-interaction-primary) !important;
    border-color: var(--sds-color-interaction-primary) !important;
    border-radius: var(--sds-space-border-radius-small) !important;
    font-weight: 500 !important;
    transition: all 0.2s ease !important;
}

.gr-button:hover {
    background-color: var(--sds-color-interaction-primary-hover) !important;
    border-color: var(--sds-color-interaction-primary-hover) !important;
    transform: translateY(-1px) !important;
    box-shadow: 0 2px 8px rgba(125, 93, 166, 0.3) !important;
}

.gr-button-primary {
    background: linear-gradient(135deg, var(--sds-color-interaction-primary) 0%, #6b4d94 100%) !important;
}

.gr-input, .gr-textbox {
    border-color: #e0e0e0 !important;
    border-radius: var(--sds-space-border-radius-small) !important;
}

.gr-input:focus, .gr-textbox:focus {
    border-color: var(--sds-color-interaction-primary) !important;
    box-shadow: 0 0 0 2px rgba(125, 93, 166, 0.1) !important;
}

.gr-panel {
    background-color: var(--sds-color-layout-background-subtle) !important;
    border-radius: var(--sds-space-border-radius-medium) !important;
    padding: var(--sds-space-padding-large) !important;
}

.gr-form {
    gap: var(--sds-space-gap-medium) !important;
}

footer {
    background-color: var(--sds-color-layout-background-subtle) !important;
    border-top: 1px solid #e0e0e0 !important;
}

.sikt-logo {
    max-width: 120px;
    height: auto;
}

.sikt-header {
    background: linear-gradient(135deg, #f3f1fe 0%, #ffffff 100%);
    padding: var(--sds-space-padding-medium);
    border-radius: var(--sds-space-border-radius-medium);
    margin-bottom: var(--sds-space-gap-large);
    border-left: 4px solid var(--sds-color-interaction-primary);
}
"""

# Create Gradio interface with Sikt branding
demo = gr.Interface(
    fn=classify_text,
    inputs=gr.Textbox(
        lines=3,
        placeholder="Enter a survey question or variable description...",
        label="Survey Question"
    ),
    outputs=gr.Markdown(label="Classification Result"),
    title="ESS Variable Classification",
    description="""
    <div class="sikt-header">
        <div style="display: flex; align-items: center; gap: 1.5rem; flex-wrap: wrap;">
            <img src="https://modansa.blob.core.windows.net/testcontainer/Sikt-Prim%C3%A6rlogo-M%C3%B8rk_0.png" alt="Sikt Logo" class="sikt-logo">
            <div style="flex: 1; min-width: 300px;">
                <h3 style="margin: 0 0 0.5rem 0; color: var(--sds-color-text-secondary); font-size: 1.25rem; font-weight: 600;">
                    ESS Variable Classifier
                </h3>
                <p style="margin: 0; color: var(--sds-color-text-primary); font-size: 0.95rem; line-height: 1.5;">
                    Developed by <strong>Sikt</strong> – Norwegian Agency for Shared Services in Education and Research
                </p>
            </div>
        </div>
    </div>

    <div style="margin: 1.5rem 0;">
        <p style="font-size: 1.05rem; color: var(--sds-color-text-primary); line-height: 1.6;">
            Automatically classify European Social Survey (ESS) questions into <strong>19 subject categories</strong>.
            This AI model is fine-tuned from XLM-RoBERTa-Base and achieves <strong>83.8% accuracy</strong>.
        </p>
    </div>
    """,
    examples=examples,
    article="""
    <div style="margin-top: 2rem; padding-top: 2rem; border-top: 2px solid var(--sds-color-layout-background-subtle);">

    <div style="background: linear-gradient(135deg, #f3f1fe 0%, #ffffff 100%); padding: 1.5rem; border-radius: var(--sds-space-border-radius-medium); margin-bottom: 2rem; border-left: 4px solid var(--sds-color-interaction-primary);">
        <h3 style="color: var(--sds-color-text-secondary); margin-top: 0; font-weight: 600;">πŸ“Š About This Tool</h3>
        <p style="color: var(--sds-color-text-primary); line-height: 1.6;">
            This classifier helps researchers and data managers organize survey variables by automatically
            categorizing them into subject areas. The model was trained on European Social Survey metadata
            and can classify questions into <strong>19 categories</strong>:
        </p>
        <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(280px, 1fr)); gap: 0.5rem; margin-top: 1rem;">
            <span style="padding: 0.5rem; background: white; border-radius: 4px; font-size: 0.8rem; border-left: 3px solid #7d5da6;">πŸ“š EDUCATION</span>
            <span style="padding: 0.5rem; background: white; border-radius: 4px; font-size: 0.8rem; border-left: 3px solid #7d5da6;">πŸ›οΈ POLITICS</span>
            <span style="padding: 0.5rem; background: white; border-radius: 4px; font-size: 0.8rem; border-left: 3px solid #7d5da6;">πŸ₯ HEALTH</span>
            <span style="padding: 0.5rem; background: white; border-radius: 4px; font-size: 0.8rem; border-left: 3px solid #7d5da6;">πŸ’Ό LABOUR AND EMPLOYMENT</span>
            <span style="padding: 0.5rem; background: white; border-radius: 4px; font-size: 0.8rem; border-left: 3px solid #7d5da6;">🌍 SOCIETY AND CULTURE</span>
            <span style="padding: 0.5rem; background: white; border-radius: 4px; font-size: 0.8rem; border-left: 3px solid #7d5da6;">πŸ’° ECONOMICS</span>
            <span style="padding: 0.5rem; background: white; border-radius: 4px; font-size: 0.8rem; border-left: 3px solid #7d5da6;">🧠 PSYCHOLOGY</span>
            <span style="padding: 0.5rem; background: white; border-radius: 4px; font-size: 0.8rem; border-left: 3px solid #7d5da6;">πŸ‘₯ DEMOGRAPHY (POPULATION, VITAL STATISTICS, AND CENSUSES)</span>
            <span style="padding: 0.5rem; background: white; border-radius: 4px; font-size: 0.8rem; border-left: 3px solid #7d5da6;">🏠 HOUSING AND LAND USE</span>
            <span style="padding: 0.5rem; background: white; border-radius: 4px; font-size: 0.8rem; border-left: 3px solid #7d5da6;">🌱 NATURAL ENVIRONMENT</span>
            <span style="padding: 0.5rem; background: white; border-radius: 4px; font-size: 0.8rem; border-left: 3px solid #7d5da6;">βš–οΈ LAW, CRIME AND LEGAL SYSTEMS</span>
            <span style="padding: 0.5rem; background: white; border-radius: 4px; font-size: 0.8rem; border-left: 3px solid #7d5da6;">πŸ“Ί MEDIA, COMMUNICATION AND LANGUAGE</span>
            <span style="padding: 0.5rem; background: white; border-radius: 4px; font-size: 0.8rem; border-left: 3px solid #7d5da6;">πŸ‘” SOCIAL STRATIFICATION AND GROUPINGS</span>
            <span style="padding: 0.5rem; background: white; border-radius: 4px; font-size: 0.8rem; border-left: 3px solid #7d5da6;">🀝 SOCIAL WELFARE POLICY AND SYSTEMS</span>
            <span style="padding: 0.5rem; background: white; border-radius: 4px; font-size: 0.8rem; border-left: 3px solid #7d5da6;">πŸš— TRANSPORT AND TRAVEL</span>
            <span style="padding: 0.5rem; background: white; border-radius: 4px; font-size: 0.8rem; border-left: 3px solid #7d5da6;">πŸͺ TRADE, INDUSTRY AND MARKETS</span>
            <span style="padding: 0.5rem; background: white; border-radius: 4px; font-size: 0.8rem; border-left: 3px solid #7d5da6;">πŸ”¬ SCIENCE AND TECHNOLOGY</span>
            <span style="padding: 0.5rem; background: white; border-radius: 4px; font-size: 0.8rem; border-left: 3px solid #7d5da6;">πŸ“œ HISTORY</span>
            <span style="padding: 0.5rem; background: white; border-radius: 4px; font-size: 0.8rem; border-left: 3px solid #7d5da6;">πŸ“‹ OTHER</span>
        </div>
    </div>

    <div style="background: white; padding: 1.5rem; border-radius: var(--sds-space-border-radius-medium); margin-bottom: 2rem; border: 1px solid #e0e0e0;">
        <h3 style="color: var(--sds-color-text-secondary); margin-top: 0; font-weight: 600;">πŸ”¬ Technical Details</h3>
        <ul style="color: var(--sds-color-text-primary); line-height: 1.8; padding-left: 1.5rem;">
            <li><strong>Base Model:</strong> <a href="https://huggingface.co/FacebookAI/xlm-roberta-base" style="color: var(--sds-color-interaction-primary);">XLM-RoBERTa-Base</a> (125M parameters)</li>
            <li><strong>Fine-tuned Model:</strong> <a href="https://huggingface.co/benjaminBeuster/xlm-roberta-base-ess-classification" style="color: var(--sds-color-interaction-primary);">benjaminBeuster/xlm-roberta-base-ess-classification</a></li>
            <li><strong>Performance:</strong> 83.8% accuracy | F1: 0.796 (weighted) | 105 test samples</li>
            <li><strong>Training Data:</strong> <a href="https://huggingface.co/datasets/benjaminBeuster/ess_classification" style="color: var(--sds-color-interaction-primary);">ESS Classification Dataset</a></li>
        </ul>
    </div>

    <div style="background: linear-gradient(135deg, var(--sds-color-layout-background-subtle) 0%, white 100%); padding: 1.5rem; border-radius: var(--sds-space-border-radius-medium); text-align: center;">
        <h3 style="color: var(--sds-color-text-secondary); margin-top: 0; font-weight: 600;">About Sikt</h3>
        <p style="color: var(--sds-color-text-primary); line-height: 1.6; max-width: 600px; margin: 0 auto 1rem auto;">
            <a href="https://sikt.no" style="color: var(--sds-color-interaction-primary); text-decoration: none; font-weight: 600;">Sikt</a>
            – Norwegian Agency for Shared Services in Education and Research provides digital infrastructure
            and services for research and education in Norway.
        </p>
        <p style="margin-top: 1.5rem;">
            <a href="https://sikt.no" style="display: inline-block; padding: 0.75rem 1.5rem; background-color: var(--sds-color-interaction-primary); color: white; text-decoration: none; border-radius: var(--sds-space-border-radius-small); font-weight: 600; transition: all 0.2s;">
                Visit sikt.no β†’
            </a>
        </p>
    </div>

    </div>
    """,
    theme=gr.themes.Soft(
        primary_hue="red",
        secondary_hue="purple",
    ),
    css=custom_css
)

if __name__ == "__main__":
    demo.launch()