benjaminBeuster's picture
Upload app.py with huggingface_hub
604852a verified
raw
history blame
9.37 kB
"""
HuggingFace Space - ESS Variable Classification Demo
Interactive Gradio interface for the XLM-RoBERTa ESS classifier.
Developed by Sikt - Norwegian Agency for Shared Services in Education and Research
"""
import gradio as gr
from transformers import pipeline
# Load the model
MODEL_NAME = "benjaminBeuster/xlm-roberta-base-ess-classification"
classifier = pipeline("text-classification", model=MODEL_NAME)
# Sikt brand colors
SIKT_COLORS = {
"amaranth": "#ee3243", # Primary accent
"meteorite": "#331c6c", # Dark
"selago": "#f3f1fe" # Light
}
# Category descriptions
CATEGORY_INFO = {
"DEMOGRAPHY (POPULATION, VITAL STATISTICS, AND CENSUSES)": "Demographics, population statistics, age, gender",
"ECONOMICS": "Economic issues, finance, income, wealth",
"EDUCATION": "Education, schooling, qualifications",
"HEALTH": "Healthcare, medical services, health satisfaction",
"POLITICS": "Political systems, trust in government, parliament",
"SOCIETY AND CULTURE": "Social issues, cultural topics, religion",
"LABOUR AND EMPLOYMENT": "Work, occupation, employment status",
"PSYCHOLOGY": "Mental health, psychological wellbeing",
"HOUSING AND LAND USE": "Housing conditions, residential environment",
"NATURAL ENVIRONMENT": "Environmental concerns, climate change",
"LAW, CRIME AND LEGAL SYSTEMS": "Justice, crime, legal matters",
"MEDIA, COMMUNICATION AND LANGUAGE": "Media use, communication patterns",
"SOCIAL STRATIFICATION AND GROUPINGS": "Social class, inequality, social groups",
"SOCIAL WELFARE POLICY AND SYSTEMS": "Social benefits, welfare services",
"TRANSPORT AND TRAVEL": "Transportation, mobility, travel patterns",
"TRADE, INDUSTRY AND MARKETS": "Business, commerce, markets",
"SCIENCE AND TECHNOLOGY": "Scientific advancement, technology use",
"HISTORY": "Historical events, memory, heritage",
"OTHER": "General or uncategorized topics"
}
def classify_text(text):
"""Classify survey question/variable."""
if not text.strip():
return "Please enter some text to classify."
result = classifier(text)[0]
label = result['label']
score = result['score']
# Format output
output = f"**Category:** {label}\n\n"
output += f"**Confidence:** {score:.2%}\n\n"
if label in CATEGORY_INFO:
output += f"**Description:** {CATEGORY_INFO[label]}"
return output
# Example questions - mix of actual ESS data and generated diverse questions
examples = [
# EDUCATION (most common - 146 samples)
["What is the highest level of education you have successfully completed?"],
["What is the highest level of education your mother successfully completed?"],
["How many years of full-time education have you completed?"],
# POLITICS (100 samples)
["Which party did you vote for in the last national election?"],
["Trust in country's parliament"],
["How satisfied are you with the way democracy works in your country?"],
["How much do you trust the legal system?"],
# HEALTH (90 samples)
["How satisfied are you with the healthcare system?"],
["Which health problems that you had in the last 12 months hampered you in your daily activities?"],
["How is your health in general - very good, good, fair, bad, or very bad?"],
# LABOUR AND EMPLOYMENT (82 samples)
["What best describes what you have been doing for the last 7 days - in paid work?"],
["Which description best describes the sort of work your mother did when you were 14?"],
["How many hours do you normally work per week in your main job?"],
["Are you a member of a trade union or similar organization?"],
# SOCIETY AND CULTURE (73 samples)
["How often do you pray apart from at religious services?"],
["How important is it to always behave properly and avoid doing anything people would say is wrong?"],
["Do you consider yourself as belonging to any particular religion or denomination?"],
# DEMOGRAPHY
["What is your age?"],
["What is your gender?"],
["What is your current legal marital status?"],
["In which country were you born?"],
# ECONOMICS
["Which of the descriptions on this card comes closest to how you feel about your household's income nowadays?"],
["What is your household's total net income from all sources?"],
# PSYCHOLOGY
["Taking all things together, how happy would you say you are?"],
["Have you felt depressed or sad in the last two weeks?"],
["How often do you feel stressed?"],
# NATURAL ENVIRONMENT
["How worried are you about climate change?"],
["To what extent do you think climate change is caused by human activity?"],
# LAW, CRIME AND LEGAL SYSTEMS
["How safe do you feel walking alone at night in your local area?"],
["Have you or a member of your household been a victim of burglary or assault in the last 5 years?"],
# MEDIA, COMMUNICATION AND LANGUAGE
["How much time do you spend watching television on an average weekday?"],
["How often do you use the internet for news?"],
# SOCIAL STRATIFICATION AND GROUPINGS
["In society there are groups which tend to be towards the top and groups which tend to be towards the bottom. Where would you place yourself?"],
["Do you belong to any discriminated group in this country?"],
# HOUSING AND LAND USE
["Do you rent or own your accommodation?"],
["How many rooms do you have for your household's use only?"],
# SOCIAL WELFARE
["Should the government reduce income differences?"],
["How satisfied are you with the state of social benefits in your country?"],
# TRANSPORT
["How long does your daily commute to work take?"],
["What is your main mode of transportation?"],
# SCIENCE AND TECHNOLOGY
["To what extent do you think scientific advances benefit society?"],
["How often do you use a smartphone or tablet?"],
]
# Custom CSS for Sikt branding
custom_css = """
.gradio-container {
font-family: "Source Sans Pro", -apple-system, BlinkMacSystemFont, sans-serif;
}
h1 {
color: #331c6c !important;
}
.header-logo {
display: flex;
align-items: center;
gap: 1rem;
margin-bottom: 1rem;
}
button.primary {
background-color: #ee3243 !important;
border-color: #ee3243 !important;
}
button.primary:hover {
background-color: #d62839 !important;
border-color: #d62839 !important;
}
.tabs {
border-color: #331c6c !important;
}
footer {
background-color: #f3f1fe !important;
}
"""
# Create Gradio interface with Sikt branding
demo = gr.Interface(
fn=classify_text,
inputs=gr.Textbox(
lines=3,
placeholder="Enter a survey question or variable description...",
label="Survey Question"
),
outputs=gr.Markdown(label="Classification Result"),
title="🔍 ESS Variable Classification",
description="""
<div style="display: flex; align-items: center; gap: 1rem; margin-bottom: 1rem;">
<img src="https://cdn.brandfetch.io/id9VCyV64w/theme/dark/logo.svg?c=1bxid64Mup7aczewSAYMX"
alt="Sikt Logo" style="height: 40px;">
<div>
<p style="margin: 0; color: #331c6c; font-size: 1.1em; font-weight: 500;">
Developed by <strong>Sikt</strong> – Norwegian Agency for Shared Services in Education and Research
</p>
</div>
</div>
Automatically classify European Social Survey (ESS) questions into **19 subject categories**.
This AI model is fine-tuned from XLM-RoBERTa-Base and achieves **83.8% accuracy** on the test set.
""",
examples=examples,
article="""
---
### About This Tool
This classifier helps researchers and data managers organize survey variables by automatically
categorizing them into subject areas. The model was trained on European Social Survey metadata
and can classify questions into categories including:
- **Education** • **Politics** • **Health** • **Labour & Employment**
- **Society & Culture** • **Economics** • **Psychology** • **Demographics**
- And 11 more categories
### Technical Details
- **Base Model:** [XLM-RoBERTa-Base](https://huggingface.co/FacebookAI/xlm-roberta-base) (125M parameters)
- **Fine-tuned Model:** [benjaminBeuster/xlm-roberta-base-ess-classification](https://huggingface.co/benjaminBeuster/xlm-roberta-base-ess-classification)
- **Performance:** 83.8% accuracy | F1: 0.796 (weighted) | 105 test samples
- **Training Data:** [ESS Classification Dataset](https://huggingface.co/datasets/benjaminBeuster/ess_classification)
### About Sikt
[Sikt](https://sikt.no) – Norwegian Agency for Shared Services in Education and Research
provides digital infrastructure and services for research and education in Norway.
---
<div style="text-align: center; padding: 1rem; background-color: #f3f1fe; border-radius: 8px; margin-top: 1rem;">
<p style="color: #331c6c; margin: 0;">
Questions or feedback? Visit <a href="https://sikt.no" style="color: #ee3243; text-decoration: none; font-weight: 600;">sikt.no</a>
</p>
</div>
""",
theme=gr.themes.Soft(
primary_hue="red",
secondary_hue="purple",
),
css=custom_css
)
if __name__ == "__main__":
demo.launch()