import gradio as gr
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# ──────────────────────────────────────────────
# 1. LOAD & ENRICH DATA
# ──────────────────────────────────────────────
df = pd.read_csv("hotel_reviews_enriched.csv")

# VADER sentiment scores
analyzer = SentimentIntensityAnalyzer()
df["vader_score"] = df["review_text"].apply(
    lambda x: analyzer.polarity_scores(str(x))["compound"]
)
df["vader_sentiment"] = df["vader_score"].apply(
    lambda s: "positive" if s >= 0.05 else ("negative" if s <= -0.05 else "neutral")
)

# Pricing recommendation (rule-based, same as Notebook 2)
def pricing_strategy(row):
    if row["sentiment"] == "negative" and row["price_eur"] > 200:
        return "Decrease price / improve service"
    elif row["sentiment"] == "positive" and row["occupancy_rate"] > 80:
        return "Increase price carefully"
    elif row["sentiment"] == "neutral":
        return "Maintain price / monitor reviews"
    else:
        return "Maintain price"

df["recommendation"] = df.apply(pricing_strategy, axis=1)

# Helper lists for filters
ALL_HOTELS = sorted(df["hotel_name"].unique().tolist())
ALL_CATEGORIES = sorted(df["hotel_category"].unique().tolist())
ALL_SEASONS = ["Winter", "Spring", "Summer", "Autumn"]
ALL_SENTIMENTS = ["positive", "neutral", "negative"]

# Colours
SENT_COLORS = {"positive": "#2ecc71", "neutral": "#f39c12", "negative": "#e74c3c"}
CAT_COLORS = {
    "Luxury": "#8e44ad", "Business": "#2980b9", "Boutique": "#e67e22",
    "Resort": "#1abc9c", "Budget": "#95a5a6",
}


# ──────────────────────────────────────────────
# 2. CHART FUNCTIONS
# ──────────────────────────────────────────────
def filter_df(hotels, categories, seasons):
    d = df.copy()
    if hotels:
        d = d[d["hotel_name"].isin(hotels)]
    if categories:
        d = d[d["hotel_category"].isin(categories)]
    if seasons:
        d = d[d["season"].isin(seasons)]
    return d


def build_dashboard(hotels, categories, seasons):
    d = filter_df(hotels, categories, seasons)
    if d.empty:
        empty = go.Figure().update_layout(title="No data for selected filters")
        return empty, empty, empty, empty, empty, "No data."

    # ── Chart 1: Sentiment distribution ──
    sent_counts = d["sentiment"].value_counts().reindex(ALL_SENTIMENTS, fill_value=0)
    fig_sent = px.bar(
        x=sent_counts.index, y=sent_counts.values,
        color=sent_counts.index, color_discrete_map=SENT_COLORS,
        labels={"x": "Sentiment", "y": "Reviews"},
        title="Customer Sentiment Distribution",
    )
    fig_sent.update_layout(showlegend=False)

    # ── Chart 2: Avg rating by category ──
    cat_rating = d.groupby("hotel_category")["rating"].mean().sort_values(ascending=False)
    fig_cat = px.bar(
        x=cat_rating.index, y=cat_rating.values,
        color=cat_rating.index, color_discrete_map=CAT_COLORS,
        labels={"x": "Hotel Category", "y": "Avg Rating"},
        title="Average Rating by Hotel Category",
    )
    fig_cat.update_layout(showlegend=False)

    # ── Chart 3: Price by sentiment ──
    fig_price = px.box(
        d, x="sentiment", y="price_eur",
        color="sentiment", color_discrete_map=SENT_COLORS,
        category_orders={"sentiment": ALL_SENTIMENTS},
        labels={"price_eur": "Price (€)", "sentiment": "Sentiment"},
        title="Price Distribution by Sentiment",
    )
    fig_price.update_layout(showlegend=False)

    # ── Chart 4: Revenue by hotel ──
    rev = d.groupby("hotel_name")["revenue_eur"].mean().sort_values(ascending=True)
    fig_rev = px.bar(
        x=rev.values, y=rev.index, orientation="h",
        labels={"x": "Avg Revenue (€)", "y": ""},
        title="Average Revenue by Hotel",
        color_discrete_sequence=["#3498db"],
    )

    # ── Chart 5: Recommendation distribution ──
    rec_counts = d["recommendation"].value_counts()
    fig_rec = px.pie(
        names=rec_counts.index, values=rec_counts.values,
        title="Pricing Recommendations",
        color_discrete_sequence=px.colors.qualitative.Set2,
    )

    # ── Summary text ──
    summary = (
        f"**Filtered dataset:** {len(d):,} reviews\n\n"
        f"| Metric | Value |\n|---|---|\n"
        f"| Avg Rating | {d['rating'].mean():.2f} |\n"
        f"| Avg Price | €{d['price_eur'].mean():.0f} |\n"
        f"| Avg Occupancy | {d['occupancy_rate'].mean():.1f}% |\n"
        f"| Avg Revenue | €{d['revenue_eur'].mean():,.0f} |\n"
        f"| Positive reviews | {(d['sentiment']=='positive').mean()*100:.1f}% |\n"
        f"| Negative reviews | {(d['sentiment']=='negative').mean()*100:.1f}% |\n"
    )

    return fig_sent, fig_cat, fig_price, fig_rev, fig_rec, summary


# ──────────────────────────────────────────────
# 3. SINGLE-REVIEW ANALYZER
# ──────────────────────────────────────────────
def analyze_review(text):
    if not text or not text.strip():
        return "Please enter a review.", "", ""
    scores = analyzer.polarity_scores(text)
    compound = scores["compound"]
    label = "positive" if compound >= 0.05 else ("negative" if compound <= -0.05 else "neutral")

    detail = (
        f"**VADER Compound Score:** {compound:.3f}\n\n"
        f"**Sentiment:** {label.upper()}\n\n"
        f"Positive: {scores['pos']:.2f} · Neutral: {scores['neu']:.2f} · Negative: {scores['neg']:.2f}"
    )

    if label == "negative":
        advice = "⚠️ Consider reducing price or improving specific service areas mentioned in the review."
    elif label == "positive" :
        advice = "✅ Positive feedback — consider a careful price increase if occupancy is high."
    else:
        advice = "📊 Neutral feedback — maintain current pricing and monitor upcoming reviews."

    return detail, advice


# ──────────────────────────────────────────────
# 4. HOTEL DEEP-DIVE
# ──────────────────────────────────────────────
def hotel_deep_dive(hotel_name):
    h = df[df["hotel_name"] == hotel_name]
    if h.empty:
        empty = go.Figure().update_layout(title="No data")
        return empty, empty, "No data."

    # Sentiment over time
    monthly = (
        h.groupby(["month", "sentiment"]).size().reset_index(name="count")
    )
    fig_time = px.bar(
        monthly, x="month", y="count", color="sentiment",
        color_discrete_map=SENT_COLORS,
        title=f"Sentiment Over Time — {hotel_name}",
        labels={"month": "Month", "count": "Reviews"},
    )
    fig_time.update_layout(xaxis=dict(categoryorder="category ascending"))

    # VADER score distribution
    fig_vader = px.histogram(
        h, x="vader_score", nbins=30,
        color_discrete_sequence=["#3498db"],
        title=f"VADER Score Distribution — {hotel_name}",
        labels={"vader_score": "VADER Compound Score"},
    )

    # Stats
    stats = (
        f"**{hotel_name}** ({h['hotel_category'].iloc[0]} · {h['location'].iloc[0]})\n\n"
        f"| Metric | Value |\n|---|---|\n"
        f"| Total reviews | {len(h)} |\n"
        f"| Avg rating | {h['rating'].mean():.2f} |\n"
        f"| Avg price | €{h['price_eur'].mean():.0f} |\n"
        f"| Avg occupancy | {h['occupancy_rate'].mean():.1f}% |\n"
        f"| Avg revenue | €{h['revenue_eur'].mean():,.0f} |\n"
        f"| Avg VADER score | {h['vader_score'].mean():.3f} |\n"
        f"| Top recommendation | {h['recommendation'].mode().iloc[0]} |\n"
    )

    return fig_time, fig_vader, stats


# ──────────────────────────────────────────────
# 5. GRADIO UI
# ──────────────────────────────────────────────
with gr.Blocks(
    title="Hotel Pricing & Sentiment Analyzer",
    theme=gr.themes.Soft(),
) as demo:

    gr.Markdown(
        "# 🏨 Hotel Pricing & Sentiment Analyzer\n"
        "*ESCP Business School — AI for Big Data Management Group Project*\n\n"
        "**Research question:** How can a hotel chain optimize pricing and service quality "
        "using customer review sentiment and booking data?"
    )

    # ── TAB 1: Dashboard ──
    with gr.Tab("📊 Dashboard"):
        gr.Markdown("Filter by hotel, category, or season to explore the data.")
        with gr.Row():
            dd_hotels = gr.Dropdown(ALL_HOTELS, multiselect=True, label="Hotels (leave empty = all)")
            dd_cats = gr.Dropdown(ALL_CATEGORIES, multiselect=True, label="Categories")
            dd_seasons = gr.Dropdown(ALL_SEASONS, multiselect=True, label="Seasons")
        btn_dash = gr.Button("Update Dashboard", variant="primary")
        md_summary = gr.Markdown()
        with gr.Row():
            p_sent = gr.Plot(label="Sentiment")
            p_cat = gr.Plot(label="Rating by Category")
        with gr.Row():
            p_price = gr.Plot(label="Price by Sentiment")
            p_rev = gr.Plot(label="Revenue by Hotel")
        p_rec = gr.Plot(label="Recommendations")

        btn_dash.click(
            build_dashboard,
            inputs=[dd_hotels, dd_cats, dd_seasons],
            outputs=[p_sent, p_cat, p_price, p_rev, p_rec, md_summary],
        )

    # ── TAB 2: Hotel Deep-Dive ──
    with gr.Tab("🏢 Hotel Deep-Dive"):
        gr.Markdown("Select a hotel to see its detailed performance.")
        dd_hotel = gr.Dropdown(ALL_HOTELS, label="Hotel")
        btn_hotel = gr.Button("Analyze", variant="primary")
        md_hotel = gr.Markdown()
        with gr.Row():
            p_time = gr.Plot(label="Sentiment over time")
            p_vader = gr.Plot(label="VADER distribution")

        btn_hotel.click(
            hotel_deep_dive,
            inputs=[dd_hotel],
            outputs=[p_time, p_vader, md_hotel],
        )

    # ── TAB 3: Review Analyzer ──
    with gr.Tab("💬 Review Analyzer"):
        gr.Markdown("Paste a hotel review to get instant sentiment analysis and a pricing recommendation.")
        txt_review = gr.Textbox(lines=5, label="Paste a review here")
        btn_review = gr.Button("Analyze Review", variant="primary")
        md_detail = gr.Markdown(label="Sentiment detail")
        md_advice = gr.Markdown(label="Recommendation")

        btn_review.click(
            analyze_review,
            inputs=[txt_review],
            outputs=[md_detail, md_advice],
        )

    # ── TAB 4: Raw Data ──
    with gr.Tab("📋 Data Table"):
        gr.Markdown("Browse the enriched dataset with all variables and recommendations.")
        gr.Dataframe(
            value=df[["hotel_name", "hotel_category", "location", "rating",
                       "sentiment", "vader_score", "price_eur", "occupancy_rate",
                       "revenue_eur", "season", "recommendation"]].head(500),
            interactive=False,
        )

    # Load dashboard on start
    demo.load(
        build_dashboard,
        inputs=[dd_hotels, dd_cats, dd_seasons],
        outputs=[p_sent, p_cat, p_price, p_rev, p_rec, md_summary],
    )


demo.launch()