Spaces:

binaychandra
/

insightsreporter

Running

File size: 30,258 Bytes

from datetime import datetime, timedelta
import pandas as pd
import numpy as np
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.image import MIMEImage
import smtplib
import plotly.express as px
import plotly.graph_objects as go
from plotly.graph_objs import *
from glob import glob
import os
from PIL import Image
from itertools import product
import random
#import pdfkit

us_state_abbrev = {
        'AL': 'Alabama', 'AK': 'Alaska', 'AZ': 'Arizona', 'AR': 'Arkansas', 'CA': 'California', 'CO': 'Colorado', 'CT': 'Connecticut',
        'DE': 'Delaware', 'FL': 'Florida', 'GA': 'Georgia', 'HI': 'Hawaii', 'ID': 'Idaho', 'IL': 'Illinois', 'IN': 'Indiana',
        'IA': 'Iowa', 'KS': 'Kansas', 'KY': 'Kentucky', 'LA': 'Louisiana', 'ME': 'Maine', 'MD': 'Maryland', 'MA': 'Massachusetts',
        'MI': 'Michigan', 'MN': 'Minnesota', 'MS': 'Mississippi', 'MO': 'Missouri', 'MT': 'Montana', 'NE': 'Nebraska', 'NV': 'Nevada',
        'NH': 'New Hampshire', 'NJ': 'New Jersey', 'NM': 'New Mexico', 'NY': 'New York', 'NC': 'North Carolina', 'ND': 'North Dakota',
        'OH': 'Ohio', 'OK': 'Oklahoma', 'OR': 'Oregon', 'PA': 'Pennsylvania', 'RI': 'Rhode Island', 'SC': 'South Carolina',
        'SD': 'South Dakota', 'TN': 'Tennessee', 'TX': 'Texas', 'UT': 'Utah', 'VT': 'Vermont', 'VA': 'Virginia', 'WA': 'Washington',
        'WV': 'West Virginia', 'WI': 'Wisconsin', 'WY': 'Wyoming', 'DC': 'District of Columbia', 'MP': 'Northern Mariana Islands',
        'PW': 'Palau', 'PR': 'Puerto Rico', 'VI': 'Virgin Islands', 'AA': 'Armed Forces Americas (Except Canada)',
        'AE': 'Armed Forces Africa/Canada/Europe/Middle East', 'AP': 'Armed Forces Pacific'
    }

def ptagwrap(observationlist : list) -> str:
    """[The function is used to wrap the observation list in the paragraph tag]

    Args:
        observationlist (list): [The list of observations in the data]

    Returns:
        [str]: [The observations wrapped in the paragraph tag]
    """    
    ptemplate = """<li><p style="margin: 0; font-size: 16px; line-height: 1.2; word-break: break-word; margin-top: 5px; margin-bottom: 5px;">
                            <span style="font-size: 16px;">{placeholder}</span></p></li>"""
    placeholder = [ptemplate.format(placeholder=x) for x in observationlist]
    return ''.join(placeholder)

def bookingcomparison_year2019(qmodel_df : pd.DataFrame()) -> str:
    """[The function used to perform booking comparison on the data with 2019 bookings]

    Args:
        qmodel_df ([pd.DataFrame]): [The dataframe containing the bookings data]

    Returns:
        [str]: [The observations in the data wrappedin a paragraph]
    """    
    latestmodel = qmodel_df[(qmodel_df.Model_creation_date == qmodel_df.Model_creation_date.max())][['Date','state_code', 'ActualBooking', 'forecast_avg_bqfb']]
    #enddate = np.datetime64(qmodel_df.Model_creation_date.max()) -2
    # Assuming qmodel_df.Model_creation_date.max() returns the date string '11-10-2023'
    date_string = qmodel_df.Model_creation_date.max()

    # Convert the date string to datetime object
    datetime_obj = datetime.strptime(date_string, "%Y-%m-%d")

    # Convert datetime object to np.datetime64
    enddate = np.datetime64(datetime_obj) - np.timedelta64(2, 'D')
    startdate = enddate - 6
    
    thisweekbookings = latestmodel[(latestmodel.Date >= startdate) & (latestmodel.Date <= enddate)][['state_code', 'ActualBooking']]
    thisweekbookings = thisweekbookings.groupby(['state_code']).sum().reset_index().rename(columns={'ActualBooking':'thisWeekActualBooking'})
    
    year2019bookings = latestmodel[(latestmodel.Date >= startdate - np.timedelta64(2 * 365 + 1,'D')) & (latestmodel.Date <= enddate - np.timedelta64(2 * 365 + 1, 'D' ))][['state_code', 'ActualBooking']]
    year2019bookings = year2019bookings.groupby(['state_code']).sum().reset_index().rename(columns={'ActualBooking':'year2019ActualBooking'})
    thisweekvsyear2019 = pd.merge(thisweekbookings, year2019bookings, how = 'left', left_on='state_code', right_on='state_code')
    thisweekvsyear2019['percentchangefrom2019'] = (thisweekvsyear2019.thisWeekActualBooking - thisweekvsyear2019.year2019ActualBooking) * 100/ thisweekvsyear2019.year2019ActualBooking
    thisweekvsyear2019 = thisweekvsyear2019.sort_values(by='percentchangefrom2019')
    thisweekvsyear2019['percentchangefrom2019'] = thisweekvsyear2019['percentchangefrom2019'] + 10
    thisweekvsyear2019 = np.round(thisweekvsyear2019, 1)
    thisweekvsyear2019['CalcRange'] = thisweekvsyear2019['percentchangefrom2019'].apply(lambda x: 'high' if x > 5 else ('medium' if (-5 <= x <= 5) else 'low'))

    fig = px.choropleth(thisweekvsyear2019, locations='state_code',
                    #color='CalcRange', color_discrete_map={'low':'red', 'medium':'Yellow','high':'Green'},
                    color='CalcRange', color_discrete_map={'low':'rgb(150, 15, 11)', 'medium':'rgb(235, 158, 35)','high':'rgb(10, 125, 33)'},
                    hover_name='state_code', locationmode='USA-states', scope='usa')
    fig.add_scattergeo(locations=thisweekvsyear2019['state_code'], locationmode='USA-states', text=thisweekvsyear2019['percentchangefrom2019'],
                        mode='text', textfont=dict(family="Arial",size=10))
    fig.update_layout({'plot_bgcolor': 'grey','paper_bgcolor': 'rgba(0, 0, 0, 0)'})
    fig.update_layout(coloraxis_showscale=True)
    fig.update_layout(geo=dict(showlakes=False, lakecolor = 'rgba(0,0,0,0)', bgcolor= 'rgba(0,0,0,0)'))
    fig.update_layout(geo=dict(scope='usa', showlakes=False))
    fig.update_traces(showlegend=True)
    #fig.write_image("/code/static/images/usmap_percentchangefrmyear2019.png", engine='kaleido',format='png')
    # resize the image to fit in the cell
    im = Image.open('/code/static/images/usmap_percentchangefrmyear2019.png')
    width, height = im.size
    im1 = im.crop((100, 100, 600, 390)) #set the crop coordinates (x,y, x+width, y+height)
    # im1.save('/code/static/images/usmap_percentchangefrmyear2019.png')

    # Gather observations ___________
    observations_thisweekvsyear2019 = ["The overall bookings this week {isincreased} compared to 2019 by {percentincordec}%".format(isincreased = 'increased' if thisweekvsyear2019.percentchangefrom2019.sum() > 0 else 'decreased', percentincordec = round(abs(thisweekvsyear2019.percentchangefrom2019.mean()), 2))]
    
    lowstatelist = thisweekvsyear2019[thisweekvsyear2019.percentchangefrom2019 < 0].sort_values('percentchangefrom2019', ascending=True).head().state_code.tolist()
    lowstatelist = ', '.join([us_state_abbrev[x] for x in lowstatelist])
    last_char_index = lowstatelist.rfind(",")
    lowstatestr = lowstatelist[:last_char_index] + " and" + lowstatelist[last_char_index+1:]
    
    strlowstates = "The States {liststates_low} record lowest percentage bookings as compared to year 2019.".format(liststates_low = lowstatestr)
    observations_thisweekvsyear2019.append(strlowstates)
    
    stateswithhighpercentchange = thisweekvsyear2019[thisweekvsyear2019.percentchangefrom2019 > 0].sort_values('percentchangefrom2019', ascending=False).head().state_code.tolist()
    stateswithhighpercentchange = ', '.join([us_state_abbrev[x] for x in stateswithhighpercentchange])
    last_char_index = stateswithhighpercentchange.rfind(",")
    stateswithhighstatestr = stateswithhighpercentchange[:last_char_index] + " and" + stateswithhighpercentchange[last_char_index+1:]
    strhighstates = "The States {liststates_high} record maximum percentage increase in booking as compared to 2019.".format(liststates_high = stateswithhighstatestr)
    observations_thisweekvsyear2019.append(strhighstates)
    
    wrappedinptag = ptagwrap(observations_thisweekvsyear2019)

    return observations_thisweekvsyear2019, wrappedinptag

def bookingcomparison_previousweek(qmodel_df: pd.DataFrame()) -> str:
    """[The function compares the bookings of the previous week to the bookings of the current week]

    Args:
        qmodel_df (pd.DataFrame): [The dataframe that contains the booking data]

    Returns:
        str: [The observations of the bookings wrapped in a paragraph tag]
    """    
    """ This function is used to compare the booking count of the previous week with the booking count of the current week and returns the percentage change."""
    latestmodel = qmodel_df[(qmodel_df.Model_creation_date == qmodel_df.Model_creation_date.max())][['Date','state_code', 'ActualBooking', 'forecast_avg_bqfb']]
    # Assuming qmodel_df.Model_creation_date.max() returns the date string '11-10-2023'
    date_string = qmodel_df.Model_creation_date.max()

    # Convert the date string to datetime object
    datetime_obj = datetime.strptime(date_string, "%Y-%m-%d")

    # Convert datetime object to np.datetime64
    enddate = np.datetime64(datetime_obj) - np.timedelta64(2, 'D')
    
    startdate = enddate - np.timedelta64(6, 'D')

    thisweekbookings = latestmodel[(latestmodel.Date >= startdate) & (latestmodel.Date <= enddate)][['state_code', 'ActualBooking']]
    thisweekbookings = thisweekbookings.groupby(['state_code']).sum().reset_index().rename(columns={'ActualBooking':'thisWeekActualBooking'})
    
    print("last week booking stats")
    print("startdate_prev", startdate - np.timedelta64(7, 'D'))
    print("enddate_prev", enddate - np.timedelta64(7, 'D'))
    lastweekbookings = latestmodel[(latestmodel.Date >= startdate - np.timedelta64(7, 'D')) & (latestmodel.Date <= enddate - np.timedelta64(7, 'D'))][['state_code', 'ActualBooking']]
    lastweekbookings = lastweekbookings.groupby(['state_code']).sum().reset_index().rename(columns={'ActualBooking':'lastWeekActualBooking'})
    
    thisweekvslastweek = pd.merge(thisweekbookings, lastweekbookings, how = 'left', left_on='state_code', right_on='state_code')
    thisweekvslastweek['percentchangefromlastweek'] = (thisweekvslastweek.thisWeekActualBooking - thisweekvslastweek.lastWeekActualBooking) * 100/ thisweekvslastweek.lastWeekActualBooking
    thisweekvslastweek['percentchangefromlastweek'] = thisweekvslastweek['percentchangefromlastweek'] + 10
    thisweekvslastweek = thisweekvslastweek.sort_values(by='percentchangefromlastweek')
    thisweekvslastweek = np.round(thisweekvslastweek, 1)
    #addition today
    thisweekvslastweek['CalcRange'] = thisweekvslastweek['percentchangefromlastweek'].apply(lambda x: 'high' if x > 5 else ('medium' if (-5 <= x <= 5) else 'low'))
    print("Printing thisweekvs lastweek")
    print(thisweekvslastweek)
    fig = px.choropleth(thisweekvslastweek, locations='state_code',
                    #color='CalcRange', color_discrete_map={'low':'red', 'medium':'Yellow','high':'Green'},
                    color='CalcRange', color_discrete_map={'low':'rgb(150, 15, 11)', 'medium':'rgb(235, 158, 35)','high':'rgb(10, 125, 33)'},
                    hover_name='state_code', locationmode='USA-states', scope='usa')
    fig.add_scattergeo(locations=thisweekvslastweek['state_code'], locationmode='USA-states', text=thisweekvslastweek['percentchangefromlastweek'],
                        mode='text', textfont=dict(family="Arial",size=9))
    fig.update_layout({'plot_bgcolor': 'grey','paper_bgcolor': 'rgba(0, 0, 0, 0)'})
    fig.update_layout(coloraxis_showscale=False)
    fig.update_layout(geo=dict(showlakes=False, lakecolor = 'rgba(0,0,0,0)', bgcolor= 'rgba(0,0,0,0)'))
    fig.update_layout(geo=dict(scope='usa', showlakes=False))
    #fig.write_image("/code/static/images/usmap_percentchangefrmlastweek.png", engine='kaleido',format='png')
    
    im = Image.open('/code/static/images/usmap_percentchangefrmlastweek.png')
    width, height = im.size
    im1 = im.crop((100, 100, 600, 390)) #set the crop coordinates (x,y, x+width, y+height)
    #im1.save('/code/static/images/usmap_percentchangefrmlastweek.png')

    # Gather observations ___________
    observations_thisweekvslastweek = ["The overall bookings this week {isincreased} from the previous week by {percentincordec}%".format(isincreased = 'increased' if thisweekvslastweek.percentchangefromlastweek.sum() > 0 else 'decreased', percentincordec = round(thisweekvslastweek.percentchangefromlastweek.mean(), 2))]
    
    lowstatelist = thisweekvslastweek[thisweekvslastweek.percentchangefromlastweek < 0].sort_values('percentchangefromlastweek', ascending=True).head().state_code.tolist()
    lowstatelist = ', '.join([us_state_abbrev[x] for x in lowstatelist])
    last_char_index = lowstatelist.rfind(",")
    lowstatestr = lowstatelist[:last_char_index] + " and" + lowstatelist[last_char_index+1:]

    strlowstates = "{liststates_low} record lowest percentage bookings as compared to last week.".format(liststates_low = lowstatestr)
    observations_thisweekvslastweek.append(strlowstates)
    
    stateswithhighpercentchange = thisweekvslastweek[thisweekvslastweek.percentchangefromlastweek > 0].sort_values('percentchangefromlastweek', ascending=False).head().state_code.tolist()
    stateswithhighpercentchange = ', '.join([us_state_abbrev[x] for x in stateswithhighpercentchange])
    last_char_index = stateswithhighpercentchange.rfind(",")
    highstatestr = stateswithhighpercentchange[:last_char_index] + " and" + stateswithhighpercentchange[last_char_index+1:]
    strhighstates = "{liststates_high} record highest percentage bookings as compared to last week.".format(liststates_high = highstatestr)
    #observations_thisweekvslastweek.append(strhighstates)strhighstates = "The States {liststates_high} record maximum percentage increase in booking as compared to last week.".format(liststates_high = ', '.join(stateswithhighpercentchange))
    observations_thisweekvslastweek.append(strhighstates)
    
    wrappedinptag = ptagwrap(observations_thisweekvslastweek)

    return observations_thisweekvslastweek, wrappedinptag

def actualbookingstats(qmodel_df : pd.DataFrame()) -> str:
    """[The function extracts the actual booking stats and returns the html wrapped in a paragraph tag]

    Args:
        qmodel_df (pd.DataFrame): [The dataframe containing the actual booking data]

    Returns:
        str: [The observation string wrapped in a paragraph tag]
    """
    qmodel_df.Date = pd.to_datetime(qmodel_df.Date, dayfirst=False)
    dlatest = qmodel_df[qmodel_df.Model_creation_date == qmodel_df.Model_creation_date.max()]
    dlatest = dlatest[['Date', 'ActualBooking']].dropna(how='any')
    dlatest = dlatest.groupby('Date').sum().reset_index()
    dlatest.Date = pd.to_datetime(dlatest.Date)
    dlatest = dlatest.sort_values('Date')
    dlatest['year'] = dlatest.Date.dt.year
    dlatest['Date'] = dlatest.Date.dt.strftime('%d-%b')
    dlatest19 = dlatest[dlatest.year == 2019]
    dlatest20 = dlatest[dlatest.year == 2020]
    dlatest21 = dlatest[dlatest.year == 2021]
    dlatest21copy = dlatest21.copy()
    #Anomaly Detection
    rangemin = int(dlatest21copy.ActualBooking.mean() - 1.96 * dlatest21copy.ActualBooking.std())
    rangemax = int(dlatest21copy.ActualBooking.mean() + 1.96 * dlatest21copy.ActualBooking.std())
    dlatest21copy['isAnomaly'] = dlatest21copy.ActualBooking.apply(lambda x: False if x in range(rangemin, rangemax) else True)
    dlatest21copy = dlatest21copy[dlatest21copy.isAnomaly]
    print(dlatest21copy.head())
    #print(dlatest21copy)
    fig = go.Figure()
    print("fig go created ")
    fig.add_trace(go.Scatter(x=dlatest20.Date, y=dlatest20.ActualBooking, mode='lines',
        line=dict(color='rgb(212, 141, 133)', width=2.5), connectgaps=True, name = "2020"))
    print("added traced one")
    fig.add_trace(go.Scatter(x=dlatest19.Date, y=dlatest19.ActualBooking, mode='lines',
            line=dict(color="rgb(232, 221, 74)", width=2), connectgaps=True, name = "2019"))
    fig.add_trace(go.Scatter(x=dlatest21.Date, y=dlatest21.ActualBooking, mode='lines',
            line=dict(color="rgb(75, 130, 128)", width=3), connectgaps=True, name='2021'))
    fig.add_trace(go.Scatter(x=dlatest21copy.Date, y=dlatest21copy.ActualBooking, mode='markers', marker_symbol='x',
                            marker_line_color="rgb(0, 0, 0)", marker_color="rgb(255,0,0)",
                            marker_line_width=1, marker_size=10,
            line=dict(color="rgb(168, 54, 50)", width=2.5), connectgaps=True, name='Anomaly'))
    fig.update_layout({'plot_bgcolor': 'rgba(0, 0, 0, 0)','paper_bgcolor': 'rgba(0, 0, 0, 0)'}, xaxis = dict(tickmode = 'linear', tickangle = -45, tick0 = dlatest.Date[0],
            dtick = 30), legend=dict(orientation="h", yanchor="bottom", y=0.85, xanchor="left", x=0.05, font = dict(size=10)))
    print("Layout updated ")
    #fig.write_image(r"/code/static/images/dailybooking_lineplot.png", engine='kaleido',format='png', height=400, width = 600)
    # resize the image to fit in the cell - plot_bgcolor = "rgba(211, 245, 244,0.5)", 
    print("image written")
    im = Image.open('/code/static/images/dailybooking_lineplot.png')
    width, height = im.size
    im1 = im.crop((50, 75, 570, 390)) #set the crop coordinates (x,y, x+width, y+height)
    #im1.save('/code/static/images/dailybooking_lineplot.png')
    bookingstats = []
    bookingstats.append("The average number of booking per day is {perdaybooking2021} which is {percentdiff20n21:.1f}% {highorlow20n21} than 2020 {andorbut} {percentdiff21n19:.1f}% {highorlow21n19} than year 2019."\
                        .format(perdaybooking2021= format(int(dlatest21.ActualBooking.mean()), ','),
                                percentdiff20n21 = (abs(dlatest21.ActualBooking.mean() - dlatest20.ActualBooking.mean())) * 100 / dlatest20.ActualBooking.mean(), 
                                highorlow20n21 = 'higher' if (dlatest21.ActualBooking.mean() - dlatest20.ActualBooking.mean()) > 0 else 'lower',
                                andorbut = 'and' if (dlatest21.ActualBooking.mean() - dlatest19.ActualBooking.mean()) > 0 else 'but',
                                percentdiff21n19 = (abs(dlatest21.ActualBooking.mean() - dlatest19.ActualBooking.mean())) * 100 / dlatest19.ActualBooking.mean(), 
                                highorlow21n19 = 'higher' if (dlatest21.ActualBooking.mean() - dlatest19.ActualBooking.mean()) > 0 else 'lower'))
    qmodel_df1 = qmodel_df[['Date', 'ActualBooking']].copy()
    qmodel_df1.Date = pd.to_datetime(qmodel_df1.Date)
    qmodel_df1['Weekday'] = qmodel_df1.Date.dt.day_name()
    qmodel_df1['weekdayorweekend'] = qmodel_df1.Weekday.apply(lambda x: 'Weekend' if x in ['Saturday', 'Sunday'] else 'Weekday')
    qmodel_df2 = qmodel_df1[['weekdayorweekend', 'ActualBooking']].copy()
    df = qmodel_df2.groupby('weekdayorweekend').mean().reset_index()
    ratiobooking = (df[df.weekdayorweekend == 'Weekday'].ActualBooking.values / df[df.weekdayorweekend == 'Weekend'].ActualBooking.values)[0]
    bookingstats.append("The average number of bookings on Weekdays are {ratiobooking:.2f} times higher than weekends for the date range Jan-2019 till date.".format(ratiobooking = ratiobooking))
    datestr = ','.join([date + '-2021' for date in dlatest21copy.Date.values])
    last_char_index = datestr.rfind(",")
    datestr = datestr[:last_char_index] + " and " + datestr[last_char_index+1:]
    bookingstats.append("There are {num} anomalies found which is marked in red in the graph.".format(num=len(dlatest21copy), datestr = datestr))
    
    wrappedinptag = ptagwrap(bookingstats)
    
    return bookingstats, wrappedinptag

def statewiseanalysis_ytd(qmodel_df: pd.DataFrame()) -> str:
    """[The function used to perform statewise analysis on the data]

    Args:
        qmodel_df (pd.DataFrame): [The dataframe containing the bookings data]

    Returns:
        str: [The observations in the data with respect to state]
    """
    qmodel_df.Date = pd.to_datetime(qmodel_df.Date)
    dlatest = qmodel_df[(qmodel_df.Model_creation_date == qmodel_df.Model_creation_date.max()) & (qmodel_df.Date >= '2021-01-01')]
    avgBookings_state = dlatest[['State', 'ActualBooking']].groupby('State').mean().reset_index().sort_values(by='ActualBooking', ascending = False)

    fig = px.pie(avgBookings_state, values='ActualBooking', names='State', hole = .5, hover_data=['ActualBooking'])
    fig.update_traces(textposition='inside', textinfo='percent+label', showlegend = False)
    fig.update_layout({'plot_bgcolor': 'rgba(0, 0, 0, 0)','paper_bgcolor': 'rgba(0, 0, 0, 0)'})
    # fig.update_traces(textposition='inside', textinfo='percent+label')
    # fig.update_layout(coloraxis_showscale=False)
    #fig.write_image("/code/static/images/pie_statesytd.png", engine='kaleido',format='png')

    highstatelist = ', '.join([item for item in avgBookings_state.State[:5].values])
    last_char_index = highstatelist.rfind(",")
    highstateliststr = highstatelist[:last_char_index] + " and" + highstatelist[last_char_index+1:]
    strstates = ["The top 5 states with highest booking YTD are {liststates}.".format(liststates = highstateliststr)]
    
    lowstatelist = ', '.join([item for item in avgBookings_state.State[-5:].values])
    last_char_index = lowstatelist.rfind(",")
    lowstateliststr = lowstatelist[:last_char_index] + " and" + lowstatelist[last_char_index+1:]
    strstates.append("The top 5 states with lowest booking YTD are {liststates}.".format(liststates = lowstateliststr))
    
    wrappedinptag = ptagwrap(strstates)
    return strstates, wrappedinptag

def divisionwiseanalysis(qmodel_df: pd.DataFrame) -> str:
    """[Function to perform divisionwise analysis on the data]

    Args:
        qmodel_df ([pd.DataFrame]): [ Input DataFrame containing raw data]

    Returns:
        [str]: [Observations based on division in the data]
    """
    dlatest = qmodel_df[(qmodel_df.Model_creation_date == qmodel_df.Model_creation_date.max())]
    dlatest.Date = pd.to_datetime(dlatest.Date)
    dlatest = dlatest[['Date', 'Division', 'ActualBooking']]#.groupby('Division').mean().reset_index()
    dlatest19 = dlatest[(dlatest.Date >= '2019-01-01') & (dlatest.Date < '2020-01-01')][['Division', 'ActualBooking']]
    dlatest20 = dlatest[(dlatest.Date >= '2020-01-01') & (dlatest.Date < '2021-01-01')][['Division', 'ActualBooking']]
    dlatest21 = dlatest[dlatest.Date >= '2021-01-01'][['Division', 'ActualBooking']]
    df19 = dlatest19.groupby('Division').mean().reset_index()
    df19.rename(columns={'ActualBooking':'2019'}, inplace=True)
    df20 = dlatest20.groupby('Division').mean().reset_index()
    df20.rename(columns={'ActualBooking':'2020'}, inplace=True)
    df1920 = pd.merge(df19, df20, how='inner', left_on='Division', right_on = 'Division')
    df21 = dlatest21.groupby('Division').mean().reset_index()
    df21.rename(columns={'ActualBooking':'2021'}, inplace=True)
    df192021 = pd.merge(df1920, df21, how = 'inner', left_on='Division', right_on='Division')
    #df192021 = np.round(df192021,0)
    fig = go.Figure()
    fig.add_trace(go.Bar(x=df192021.Division, y=df192021['2019'], name = '2019'))
    fig.add_trace(go.Bar(x=df192021.Division, y=df192021['2020'], name = '2020'))
    fig.add_trace(go.Bar(x=df192021.Division, y=df192021['2021'], name = '2021'))
    fig.update_layout({'plot_bgcolor': 'rgba(0, 0, 0, 0)','paper_bgcolor': 'rgba(0, 0, 0, 0)'}, 
                  xaxis = dict(tickmode = 'linear', tickangle = -45), 
                  legend=dict(orientation="h", yanchor="bottom", y=0.95, xanchor="right", x=1, font = dict(size=10)))
    #fig.write_image(r"/code/static/images/dailybookingdivision_barplot.png", engine='kaleido',format='png', height=400, width = 600)
    
    df192021['pChangein1921'] = (df192021['2021'] - df192021['2019']) * 100 / df192021['2019']
    df192021['pChangein2021'] = (df192021['2021'] - df192021['2020']) * 100 / df192021['2019']
    divisonstats = []
    divisonstats.append("The average number of bookings per division per day in 2021 is {avgbookings2021},  \
            which is {pamount1921:.1f}% {moreorless1921} from 2019 {andorbut} {pamount2021}% {moreless2021} from year 2020".\
            format(avgbookings2021 = format(int(df192021['2021'].mean()), ','), 
                pamount1921 = round(abs(df192021.pChangein1921.mean()),1), 
                moreorless1921 = 'less' if df192021.pChangein1921.mean() < 0 else 'more',
                andorbut = 'and' if df192021.pChangein2021.mean() > 0 else 'but',
                pamount2021 = round(abs(df192021.pChangein2021.mean()),1),
                moreless2021 = 'less' if df192021.pChangein2021.mean() < 0 else 'more'))
    divisonstats.append("The division {max_division} has recorded average highest number of per day booking of {maxbooking}.".\
            format(max_division = df192021.sort_values(by = '2021', ascending=False).head(1).Division.tolist()[0],
                maxbooking= format(int(df192021['2021'].max()), ',')))
    divisonstats.append("The division {min_division} has recorded lowest number of per day booking of {minbooking}.".\
            format(min_division = df192021.sort_values(by = '2021', ascending=True).head(1).Division.tolist()[0],
               minbooking = format(int(df192021['2021'].min()), ',')))
    
    wrappedinptag = ptagwrap(divisonstats)
    return divisonstats, wrappedinptag

def sendmail_html(htmlcontent: str) -> None:
    """[The sendmail function sends the html content to the email address]

    Args:
        htmlcontent (str): [The html content to be sent]
    """
    # Send an HTML email with an embedded image and a plain text message for
    # email clients that don't want to display the HTML.
    strFrom = 'abc@gmail.com'
    #strTo = 'binay.chandra@publicissapient.com, will.stokvis@publicismedia.com, john.keating@digitas.com, daniel.stroik@publicismedia.com'
    strTo = 'xyz123@gmail.com, abc456@gmail.com'
    # Create the root message and fill in the from, to, and subject headers
    msgRoot = MIMEMultipart('related')
    msgRoot['Subject'] = 'Marriott Weekly Report'
    msgRoot['From'] = strFrom
    msgRoot['To'] = strTo
    msgRoot.preamble = 'This is a multi-part message in MIME format.'
    # Encapsulate the plain and HTML versions of the message body in an
    # 'alternative' part, so message agents can decide which they want to display.
    msgAlternative = MIMEMultipart('alternative')
    msgRoot.attach(msgAlternative)
    msgText = MIMEText(htmlcontent, 'html')
    msgAlternative.attach(msgText)

    # We reference the image in the IMG SRC attribute by the ID we give it below
    pngfileslist = glob(r'/code/static/images/*.png')
    for pngfile in pngfileslist:
        with open(pngfile, 'rb') as fp:
            msgImage = MIMEImage(fp.read())
            _, filename = os.path.split(pngfile)
        msgImage.add_header('Content-ID', '<{}>'.format(filename))
        msgRoot.attach(msgImage)  
    
    # Send the email (this example assumes SMTP authentication is required)
    smtp = smtplib.SMTP('smtp.gmail.com:587')
    smtp.ehlo()
    smtp.starttls()
    smtp.login("abc123@gmail.com", "<<xxxxyyyyyzzzzz>>")
    receivers = strTo.split(",") #+ receiver_bcc.split(",")
    smtp.sendmail(strFrom, receivers, msgRoot.as_string())
    smtp.quit()

def performanalysisandupdate(htmlinputcontent : str, qmodel_df : pd.DataFrame) -> str:
    """[This function is used to perform different analysis and update the html file]

    Args:
        htmlinputcontent ([str]): [The input HTML template file]

    Returns:
        [str]: [The updated HTML template file with the analysis results]
    """
    results_actualbookingstats = actualbookingstats(qmodel_df)
    htmlcontent = htmlinputcontent.replace("[[actualbookingstats]]", results_actualbookingstats)
    
    thisweekvslastweekobsptags = bookingcomparison_previousweek(qmodel_df)
    htmlcontent = htmlcontent.replace('[[thisweekvslastweekobservartions]]', thisweekvslastweekobsptags)
    
    thisweekvsyear2019obsptags = bookingcomparison_year2019(qmodel_df)
    htmlcontent = htmlcontent.replace('[[thisweekvsyear2019observartions]]', thisweekvsyear2019obsptags)
    
    statewiseanalysis_ytdstr = statewiseanalysis_ytd(qmodel_df)
    htmlcontent = htmlcontent.replace('[[statewiseanalysis_ytdobservartions]]', statewiseanalysis_ytdstr)
    
    divisionwiseanalysis_ytdstr = divisionwiseanalysis(qmodel_df)
    htmlcontent = htmlcontent.replace('[[divisionwiseanalysis_ytdobservartions]]', divisionwiseanalysis_ytdstr)
    
    return htmlcontent

def getbookingdf():
    startdate, enddate = '2019-01-01', datetime.now().strftime("%Y-%m-%d")
    dates = pd.date_range(start=startdate, end=enddate, freq='D')
    states_df = pd.read_csv(r"states.csv")
    usa_states = states_df.State.to_list()
    date_state_combinations = list(product(dates, usa_states))
    tempdf = pd.DataFrame(date_state_combinations, columns=['Date', 'State'])
    bookingdf = pd.merge(tempdf, states_df, left_on='State', right_on='State', how='inner')
    bookingdf['ActualBooking'] = np.random.randint(50, 1500, size=len(bookingdf))
    bookingdf['forecasted_booking'] = bookingdf['ActualBooking'] + random.randint(-50, 450)
    bookingdf['Model_creation_date'] = datetime.now().strftime('%Y-%m-%d')
    bookingdf.rename(columns={'State Code':'state_code', 'forecasted_booking':'forecast_avg_bqfb' }, inplace = True)
    return bookingdf

if __name__ == '__main__':

    # Read the bigquery query table data for bookings
    qmodel_df = getbookingdf()
    print(qmodel_df.head())

    # Read the HTML Template
    htmltemplate = r'templates\maildraft_template.html'
    with open(htmltemplate) as f: 
        rawhtmlcontent = f.read()
    
    # Call the functions Modify the HTML content with analysis results
    htmlcontent = performanalysisandupdate(rawhtmlcontent, qmodel_df)

    # Below code generates the PDF from HTML
    # Options for PDF generation (optional)
    # options = {
    #     'page-size': 'A4',
    #     'margin-top': '0mm',
    #     'margin-right': '0mm',
    #     'margin-bottom': '0mm',
    #     'margin-left': '0mm'
    # }
    # # Convert HTML to PDF
    # path_wkhtml2pdf = r"C:\Program Files\wkhtmltopdf\bin\wkhtmltopdf.exe"
    # config = pdfkit.configuration(wkhtmltopdf=path_wkhtml2pdf)
    # pdfkit.from_string(htmlcontent, "Weekly_Analysis.pdf", options=options, configuration=config)
    # Send the email with updated Html content
    sendmail_html(htmlcontent)