Spaces:
Paused
Paused
| #!/usr/bin/env python3 | |
| import pytest | |
| import torch | |
| from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline | |
| from mappingservice.utils import predict_language | |
| def classifier(): | |
| model_path = "papluca/xlm-roberta-base-language-detection" | |
| model = AutoModelForSequenceClassification.from_pretrained(model_path) | |
| tokenizer = AutoTokenizer.from_pretrained(model_path) | |
| classification = pipeline( | |
| "text-classification", | |
| model=model, | |
| tokenizer=tokenizer, | |
| framework="pt", | |
| device=0 if torch.cuda.is_available() else -1, | |
| ) | |
| return classification | |
| def test_model_predictions(classifier): | |
| test_data = [ | |
| {'input': 'Habitacion estandar con bano', 'expected_response': 'es'}, | |
| {'input': 'apartamento de lujo con vistas al mar', 'expected_response': 'es'}, # noqa: E501 | |
| {'input': 'casa ejecutiva', 'expected_response': 'es'}, | |
| {'input': 'villa doble', 'expected_response': 'es'}, | |
| {'input': 'estudio de una habitacion de lujo', 'expected_response': 'es'}, | |
| {'input': 'chalet premier con dos habitaciones', 'expected_response': 'es'}, | |
| {'input': 'casa de la playa premium con bano compartido', 'expected_response': 'es'}, # noqa: E501 | |
| {'input': 'estudio familiar grande', 'expected_response': 'es'}, | |
| {'input': 'suite familiar junior', 'expected_response': 'en'}, | |
| {'input': 'bungalow tradicional sin bano', 'expected_response': 'es'}, | |
| {'input': 'superior room 1 king superior room 1 king cupola or courtyard view french style 36sqm 385sq', 'expected_response': 'en'}, # noqa: E501 | |
| {'input': 'habitacion matrimonial adaptada discapacitados', 'expected_response': 'es'}, # noqa: E501 | |
| {'input': 'privilege room twin for 2 adults 0 children and 0 infants', 'expected_response': 'en'}, # noqa: E501 | |
| {'input': 'deluxe room double for 2 adults 0 children and 0 infants', 'expected_response': 'en'}, # noqa: E501 | |
| {'input': 'premier palace double room', 'expected_response': 'en'}, | |
| {'input': 'double single use deluxe', 'expected_response': 'en'}, | |
| {'input': 'double room queen bed superior', 'expected_response': 'en'}, | |
| {'input': 'double guest room', 'expected_response': 'en'}, | |
| {'input': 'single room for 1 adults 0 children and 0 infants', 'expected_response': 'en'}, # noqa: E501 | |
| {'input': 'twin premium room incl evening tasting welcome gift comp wifi 28 sqm espresso fridge bathrobe', 'expected_response': 'en'}, # noqa: E501 | |
| {'input': 'superior quadruple room', 'expected_response': 'en'}, | |
| {'input': 'superior one bedroom apartment x2013 2 adults', 'expected_response': 'en'}, # noqa: E501 | |
| {'input': 'deluxe room double for 2 adults 0 children and 0 infants', 'expected_response': 'en'}, # noqa: E501 | |
| {'input': 'premier palace double room', 'expected_response': 'en'}, | |
| {'input': 'double single use deluxe', 'expected_response': 'en'}, | |
| {'input': 'double room queen bed superior', 'expected_response': 'en'}, | |
| {'input': 'double guest room', 'expected_response': 'en'}, | |
| {'input': 'single room for 1 adults 0 children and 0 infants', 'expected_response': 'en'}, # noqa: E501 | |
| {'input': 'twin premium room incl evening tasting welcome gift comp wifi 28 sqm espresso fridge bathrobe', 'expected_response': 'en'}, # noqa: E501 | |
| {'input': 'superior quadruple room', 'expected_response': 'en'}, | |
| {'input': 'superior one bedroom apartment x2013 2 adults', 'expected_response': 'en'}, # noqa: E501 | |
| {'input': 'comfort double', 'expected_response': 'en'}, | |
| {'input': '1 king bed suite nonsmoking', 'expected_response': 'en'}, | |
| {'input': 'junior suite 1 king bed nonsmoking', 'expected_response': 'en'}, | |
| {'input': 'family room superior', 'expected_response': 'en'} | |
| ] | |
| for test_case in test_data: | |
| description = test_case["input"] | |
| expected_label = test_case["expected_response"] | |
| # First, try to predict based on keywords | |
| predicted_label = predict_language(description) | |
| # If no prediction was made, fallback to model prediction | |
| if not predicted_label: | |
| print(f"Fallback to model prediction for '{description}'") | |
| result = classifier(description) | |
| predicted_label = result[0]["label"] | |
| assert ( | |
| predicted_label == expected_label | |
| ), f"Incorrect prediction for '{description}': expected '{expected_label}', obtained '{predicted_label}'" # noqa: E501 | |