File size: 4,622 Bytes
f006f31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#!/usr/bin/env python3

import pytest
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline

from mappingservice.utils import predict_language


@pytest.fixture
def classifier():
    model_path = "papluca/xlm-roberta-base-language-detection"
    model = AutoModelForSequenceClassification.from_pretrained(model_path)
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    classification = pipeline(
        "text-classification",
        model=model,
        tokenizer=tokenizer,
        framework="pt",
        device=0 if torch.cuda.is_available() else -1,
    )

    return classification


def test_model_predictions(classifier):
    test_data = [
        {'input': 'Habitacion estandar con bano', 'expected_response': 'es'},
        {'input': 'apartamento de lujo con vistas al mar', 'expected_response': 'es'},  # noqa: E501
        {'input': 'casa ejecutiva', 'expected_response': 'es'},
        {'input': 'villa doble', 'expected_response': 'es'},
        {'input': 'estudio de una habitacion de lujo', 'expected_response': 'es'},
        {'input': 'chalet premier con dos habitaciones', 'expected_response': 'es'},
        {'input': 'casa de la playa premium con bano compartido', 'expected_response': 'es'},  # noqa: E501
        {'input': 'estudio familiar grande', 'expected_response': 'es'},
        {'input': 'suite familiar junior', 'expected_response': 'en'},
        {'input': 'bungalow tradicional sin bano', 'expected_response': 'es'},
        {'input': 'superior room 1 king superior room 1 king cupola or courtyard view french style 36sqm 385sq', 'expected_response': 'en'},  # noqa: E501
        {'input': 'habitacion matrimonial adaptada discapacitados', 'expected_response': 'es'},  # noqa: E501
        {'input': 'privilege room twin for 2 adults 0 children and 0 infants', 'expected_response': 'en'},  # noqa: E501
        {'input': 'deluxe room double for 2 adults 0 children and 0 infants', 'expected_response': 'en'},  # noqa: E501
        {'input': 'premier palace double room', 'expected_response': 'en'},
        {'input': 'double single use deluxe', 'expected_response': 'en'},
        {'input': 'double room queen bed superior', 'expected_response': 'en'},
        {'input': 'double guest room', 'expected_response': 'en'},
        {'input': 'single room for 1 adults 0 children and 0 infants', 'expected_response': 'en'},  # noqa: E501
        {'input': 'twin premium room incl evening tasting welcome gift comp wifi 28 sqm espresso fridge bathrobe', 'expected_response': 'en'},  # noqa: E501
        {'input': 'superior quadruple room', 'expected_response': 'en'},
        {'input': 'superior one bedroom apartment x2013 2 adults', 'expected_response': 'en'},  # noqa: E501
        {'input': 'deluxe room double for 2 adults 0 children and 0 infants', 'expected_response': 'en'},  # noqa: E501
        {'input': 'premier palace double room', 'expected_response': 'en'},
        {'input': 'double single use deluxe', 'expected_response': 'en'},
        {'input': 'double room queen bed superior', 'expected_response': 'en'},
        {'input': 'double guest room', 'expected_response': 'en'},
        {'input': 'single room for 1 adults 0 children and 0 infants', 'expected_response': 'en'},  # noqa: E501
        {'input': 'twin premium room incl evening tasting welcome gift comp wifi 28 sqm espresso fridge bathrobe', 'expected_response': 'en'},  # noqa: E501
        {'input': 'superior quadruple room', 'expected_response': 'en'},
        {'input': 'superior one bedroom apartment x2013 2 adults', 'expected_response': 'en'},  # noqa: E501
        {'input': 'comfort double', 'expected_response': 'en'},
        {'input': '1 king bed suite nonsmoking', 'expected_response': 'en'},
        {'input': 'junior suite 1 king bed nonsmoking', 'expected_response': 'en'},
        {'input': 'family room superior', 'expected_response': 'en'}
    ]

    for test_case in test_data:
        description = test_case["input"]
        expected_label = test_case["expected_response"]
        # First, try to predict based on keywords
        predicted_label = predict_language(description)

        # If no prediction was made, fallback to model prediction
        if not predicted_label:
            print(f"Fallback to model prediction for '{description}'")
            result = classifier(description)
            predicted_label = result[0]["label"]

        assert (
            predicted_label == expected_label
        ), f"Incorrect prediction for '{description}': expected '{expected_label}', obtained '{predicted_label}'"  # noqa: E501