Spaces:

th-nuernberg
/

oncoco

Running

File size: 5,240 Bytes

import json
import torch
import gradio as gr
from huggingface_hub import hf_hub_download
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch.nn.functional as F

MODEL_ID = "th-nuernberg/xlm-roberta-base-online-counseling-oncoco"
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID)
model.eval()

path = hf_hub_download("th-nuernberg/OnCoCoV1", "code_to_category.json", repo_type="dataset")
code2cat = json.load(open(path))
id2label = model.config.id2label

examples = [
    "Counselor: It sounds like you're feeling overwhelmed. Can you tell me more?",
    "Counselor: You mentioned feeling very alone in this. What kind of support do you have around you?",
    "Counselor: I notice you've been using substances more frequently lately. Can you tell me what's been going on around that time?",
    "Counselor: It sounds like the financial pressure has become really overwhelming. What does your current situation look like?",
    "Client: I have been struggling with anxiety and don't know what to do.",
    "Client: I've been drinking every night to cope with stress and I'm starting to think it's becoming a real problem.",
    "Client: I lost my job two months ago and can't pay my rent anymore. I don't know how to tell my family.",
    "Client: I feel like I have no purpose and just go through the motions every day without feeling anything.",
    "Berater: Haben Sie schon versucht, mit jemandem darüber zu sprechen?",
    "Berater: Sie erwähnen, dass Sie sich sehr allein fühlen. Was bedeutet das für Ihren Alltag?",
    "Berater: Wie lange besteht die finanzielle Situation schon, und haben Sie bereits versucht, Unterstützung zu bekommen?",
    "Berater: Es klingt, als wäre der Konsum für Sie eine Art Bewältigungsstrategie geworden. Was glauben Sie, was dahintersteckt?",
    "Klient: Ich streite mich ständig mit meinem Partner.",
    "Klient: Ich habe seit Monaten keine Arbeit mehr und weiß nicht, wie ich meine Miete zahlen soll.",
    "Klient: Ich trinke jeden Abend, um abschalten zu können, aber ich merke, dass es immer mehr wird.",
    "Klient: Ich fühle mich meiner Familie gegenüber wie eine Last und weiß nicht mehr, wie es weitergehen soll.",
]


def load_example(choice):
    if choice == "Custom":
        return ""
    return choice


def predict(text, top_k=3):
    if not text or not text.strip():
        return {}
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
    with torch.no_grad():
        probs = F.softmax(model(**inputs).logits, dim=-1).squeeze()
    top_indices = probs.argsort(descending=True)[:top_k]
    result = {}
    for i in top_indices:
        code = id2label[i.item()]
        description = code2cat.get(code, code)
        display_key = f"{code} | {description}"
        result[display_key] = probs[i].item()# round(probs[i].item(), 4)
    return result


example_choices = examples + ["Custom"]

with gr.Blocks(title="OnCoCo Message Classifier", css="#prediction .output-class { display: none; }") as demo:
    gr.Markdown(
        "# OnCoCo Message Classifier\n"
        "This demo classifies individual messages from psychosocial online counseling conversations "
        "into fine-grained categories covering counselor and client communication acts, "
        "such as empathic reflection, problem exploration, emotional support, or requests for information.\n\n"
        "It is based on the [OnCoCo dataset](https://huggingface.co/datasets/th-nuernberg/OnCoCoV1), "
        "a bilingual (German/English) corpus of annotated online counseling messages spanning topics like "
        "mental health, relationships, substance use, and financial problems. "
        "Messages are classified into 40 counselor and 28 client categories.\n\n"
        "The underlying model here is [th-nuernberg/xlm-roberta-base-online-counseling-oncoco](https://huggingface.co/th-nuernberg/xlm-roberta-base-online-counseling-oncoco), "
        "a fine-tuned XLM-RoBERTa model trained on the OnCoCo dataset."
    )

    example_dropdown = gr.Dropdown(
        choices=example_choices,
        value=example_choices[0],
        label="Choose an example or select Custom to enter your own.",
    )

    with gr.Row():
        text_input = gr.Textbox(
            value=examples[0],
            label="Write your own message. Prefix messages with `Counselor:`, `Client:`, `Berater:`, or `Klient:`.",
            lines=3,
            scale=10,
        )
        top_k_dropdown = gr.Dropdown(
            choices=[1, 2, 3],
            value=3,
            label="Top-K",
            scale=1,
            min_width=80,
        )

    with gr.Row():
        classify_btn = gr.Button("Classify", variant="primary", scale=0, min_width=150)
        gr.HTML("")

    label_output = gr.Label(num_top_classes=5, label="Prediction", show_label=False, elem_id="prediction")

    example_dropdown.change(fn=load_example, inputs=example_dropdown, outputs=text_input)
    classify_btn.click(fn=predict, inputs=[text_input, top_k_dropdown], outputs=label_output)
    text_input.submit(fn=predict, inputs=[text_input, top_k_dropdown], outputs=label_output)

demo.launch()