File size: 2,711 Bytes
8026579
 
 
 
217cc74
457da48
217cc74
8026579
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217cc74
457da48
217cc74
 
 
457da48
 
217cc74
 
 
8026579
 
 
 
 
 
0a12911
8026579
0a12911
 
 
8026579
0a12911
 
 
 
 
8026579
 
 
0a12911
8026579
 
217cc74
0a12911
 
217cc74
8b71188
457da48
0a12911
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import streamlit as st
import torch
from transformers import DistilBertTokenizer, DistilBertModel
import torch.nn as nn
import pandas as pd
from datasets import load_dataset
from analisis import mostrar_analisis_aerolinea

# -------------------------------
# Modelo personalizado regresor
# -------------------------------
class DistilBertRegressor(nn.Module):
    def __init__(self):
        super().__init__()
        self.bert = DistilBertModel.from_pretrained("distilbert-base-uncased")
        self.regressor = nn.Linear(self.bert.config.hidden_size, 1)

    def forward(self, input_ids=None, attention_mask=None):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.last_hidden_state[:, 0]
        return self.regressor(pooled_output)

# -------------------------------
# Cargar modelo y tokenizer con caché moderna
# -------------------------------
@st.cache_resource
def load_model():
    model = DistilBertRegressor()
    # Cargar pesos entrenados desde Hugging Face
    state_dict = torch.hub.load_state_dict_from_url(
        "https://huggingface.co/ManuelMC/Modelo_TFM/resolve/main/pytorch_model.bin",
        map_location="cpu"
    )
    model.load_state_dict(state_dict)
    model.eval()
    tokenizer = DistilBertTokenizer.from_pretrained("ManuelMC/Modelo_TFM")
    return model, tokenizer

# -------------------------------
# Cargar dataset desde Hugging Face
# -------------------------------
@st.cache_data
def cargar_dataset():
    dataset = load_dataset("ManuelMC/dataset_scraping", split="train")
    return dataset.to_pandas()

df_reviews = cargar_dataset()

model, tokenizer = load_model()

# -------------------------------
# Interfaz de usuario
# -------------------------------

st.title("✈️ Airline Review Rating Prediction")

airlines = ["Iberia", "Vueling", "Ryanair", "Air Europa", "EasyJet", "Eurowings",
            "Grupo AirFrance-KLM", "Grupo IAG", "Iberia Express", "Jet2.com",
            "Lufthansa", "Norwegian", "Pegasus Airlines", "SAS", "Turkish Airlines", "Wizz Air"]

selected_airline = st.selectbox("Select an airline", airlines)

review = st.text_area("Write your flight review here:")

if st.button("Predict Rating"):
    inputs = tokenizer(review, return_tensors="pt", truncation=True, padding=True, max_length=256)
    with torch.no_grad():
        output = model(**inputs).squeeze()
        rating = output.item() * 1.65  # Scale adjustment
        rating = min(rating, 10)
        rating = round(rating, 2)

    with st.expander(f"Results for {selected_airline}"):
        st.success(f"🌟 Predicted Rating: **{rating}/10**")
        mostrar_analisis_aerolinea(df_reviews, selected_airline)