IvanStudent commited on
Commit
b0b5df0
·
1 Parent(s): d54219d

Guardar mis cambios locales

Browse files
Files changed (1) hide show
  1. app.py +124 -71
app.py CHANGED
@@ -2,104 +2,157 @@ import gradio as gr
2
  import pandas as pd
3
  import numpy as np
4
  from datetime import datetime
5
- import joblib
6
- import pmdarima as pm
7
- from pmdarima import auto_arima
8
  import plotly.graph_objects as go
 
9
  from transformers import pipeline, TapasTokenizer, TapasForQuestionAnswering
 
 
10
 
11
- # Load the TAPAS Model
12
- def load_tapas_model():
13
- model_name = joblib.load('arima_sales_model.pkl')
14
- tokenizer = TapasTokenizer.from_pretrained(model_name)
15
- model = TapasForQuestionAnswering.from_pretrained(model_name)
16
- pipe = pipeline("table-question-answering", model=model, tokenizer=tokenizer)
17
- return pipe
18
 
19
- pipe = load_tapas_model()
 
 
20
 
21
- # Helper Functions
22
  def drop(dataframe):
23
- # Drop unnecessary columns and keep only 'Date' and 'Sales'
24
- columns_to_keep = [col for col in dataframe.columns if "date" in col.lower() or "sales" in col.lower()]
25
- dataframe = dataframe[columns_to_keep].dropna()
26
- return dataframe
27
 
28
  def date_format(dataframe):
29
- # Convert the 'Date' column to a proper datetime format
30
- dataframe['Date'] = pd.to_datetime(dataframe['Date'].str.strip(), format="%m/%d/%Y")
31
- return dataframe
32
 
33
  def group_to_three(dataframe):
34
- # Group the data into three-day intervals and calculate the mean sales
35
- dataframe = dataframe.groupby(pd.Grouper(key='Date', freq='3D')).Sales.mean().dropna().round(2)
36
- return dataframe
37
 
38
  def series_to_df_exogenous(series):
39
- # Convert the series to a DataFrame and create exogenous variables
40
- dataframe = series.to_frame().reset_index().set_index('Date')
41
- dataframe['Sales First Difference'] = dataframe['Sales'].diff().dropna()
42
- dataframe['Seasonal First Difference'] = dataframe['Sales'].diff(12).dropna()
43
- return dataframe.dropna()
 
 
 
 
 
44
 
45
  def train_test(dataframe, n):
46
- # Split the data into training and testing sets
47
- training_y = dataframe['Sales'][:-n]
48
- test_y = dataframe['Sales'][-n:]
49
- training_X = dataframe.iloc[:-n, 1:]
50
- test_X = dataframe.iloc[-n:, 1:]
51
- return training_y, test_y, training_X, test_X
52
-
53
- def test_fitting(train_X, train_y):
54
- # Fit a SARIMAX model using auto_arima
55
- model = auto_arima(y=train_y, X=train_X, seasonal=True, m=12, stepwise=True, suppress_warnings=True)
56
- return model
57
-
58
- def forecast_sales(df, period):
59
- # Prepare data and make predictions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  df = drop(df)
61
  df = date_format(df)
 
62
  series = group_to_three(df)
63
- exog_df = series_to_df_exogenous(series)
64
 
65
- n_periods = int(len(exog_df) * 0.2)
66
- train_y, test_y, train_X, test_X = train_test(exog_df, n_periods)
67
 
68
- model = test_fitting(train_X, train_y)
69
- future_fitted, _ = model.predict(n_periods=int(period / 3), X=exog_df.iloc[-int(period / 3):, 1:], return_conf_int=True)
 
 
 
70
 
71
- future_dates = pd.date_range(start=series.index[-1], periods=int(period / 3), freq='3D')
72
- forecast_df = pd.DataFrame({'Date': future_dates, 'Forecasted Sales': future_fitted})
73
-
74
- return forecast_df
75
 
76
- def answer_question(forecast_df, question):
77
- # Use TAPAS model to answer questions
78
- answer = pipe(table=forecast_df, query=question)
79
- return answer['answer']
 
80
 
81
- # Gradio Interface
82
- def main(uploaded_file, period, question):
83
- # Main function to process uploaded file, forecast sales, and answer the question
84
- df = pd.read_csv(uploaded_file)
85
- forecast_df = forecast_sales(df, period)
86
- answer = answer_question(forecast_df, question)
87
 
88
- return forecast_df, answer
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
- interface = gr.Interface(
91
- fn=main,
 
92
  inputs=[
93
- gr.File(label="Upload CSV File"),
94
- gr.Slider(minimum=30, maximum=90, step=1, label="Forecast Days"),
95
- gr.Textbox(placeholder="Ask a question about the forecasted data", label="Question")
96
  ],
97
  outputs=[
98
- gr.Dataframe(label="Forecasted Sales Data"),
99
- gr.Textbox(label="Answer")
 
100
  ],
101
- title="Sales Forecasting Dashboard",
102
- description="Upload your sales data and get a forecast. You can also ask questions about the forecasted data."
 
103
  )
104
 
105
- interface.launch()
 
2
  import pandas as pd
3
  import numpy as np
4
  from datetime import datetime
 
 
 
5
  import plotly.graph_objects as go
6
+ import torch
7
  from transformers import pipeline, TapasTokenizer, TapasForQuestionAnswering
8
+ import pmdarima as pm
9
+ from pmdarima import auto_arima
10
 
11
+ # Preprocessing functions (same as before)
12
+ def merge(B, C, A):
13
+ # Implement merge function here...
14
+ pass
 
 
 
15
 
16
+ def merge_sort(dataframe):
17
+ # Implement merge_sort function here...
18
+ pass
19
 
 
20
  def drop(dataframe):
21
+ # Implement drop function here...
22
+ pass
 
 
23
 
24
  def date_format(dataframe):
25
+ # Implement date_format function here...
26
+ pass
 
27
 
28
  def group_to_three(dataframe):
29
+ # Implement group_to_three function here...
30
+ pass
 
31
 
32
  def series_to_df_exogenous(series):
33
+ # Implement series_to_df_exogenous function here...
34
+ pass
35
+
36
+ def dates_df(dataframe):
37
+ # Implement dates_df function here...
38
+ pass
39
+
40
+ def get_forecast_period(period):
41
+ # Implement get_forecast_period function here...
42
+ pass
43
 
44
  def train_test(dataframe, n):
45
+ # Implement train_test function here...
46
+ pass
47
+
48
+ def test_fitting(dataframe, Exo, trainY):
49
+ # Implement test_fitting function here...
50
+ pass
51
+
52
+ def forecast_accuracy(forecast, actual):
53
+ # Implement forecast_accuracy function here...
54
+ pass
55
+
56
+ def sales_growth(dataframe, fittedValues):
57
+ # Implement sales_growth function here...
58
+ pass
59
+
60
+ def merge_forecast_data(actual, predicted, future):
61
+ # Implement merge_forecast_data function here...
62
+ pass
63
+
64
+ def interpret_mape(mape_score):
65
+ # Implement interpret_mape function here...
66
+ pass
67
+
68
+ def load_tapas_model():
69
+ model_name = "google/tapas-large-finetuned-wtq"
70
+ tokenizer = TapasTokenizer.from_pretrained(model_name)
71
+ model = TapasForQuestionAnswering.from_pretrained(model_name, local_files_only=False)
72
+ pipe = pipeline("table-question-answering", model=model, tokenizer=tokenizer)
73
+ return pipe
74
+
75
+ pipe = load_tapas_model()
76
+
77
+ def get_answer(table, query):
78
+ answers = pipe(table=table, query=query)
79
+ return answers
80
+
81
+ def convert_answer(answer):
82
+ # Implement convert_answer function here...
83
+ pass
84
+
85
+ def get_converted_answer(table, query):
86
+ # Implement get_converted_answer function here...
87
+ pass
88
+
89
+ # Gradio Interface
90
+ def upload_and_forecast(uploaded_file, period):
91
+ if uploaded_file is None:
92
+ return "Please upload a file to proceed."
93
+
94
+ # Load the data
95
+ df = pd.read_csv(uploaded_file)
96
  df = drop(df)
97
  df = date_format(df)
98
+ merge_sort(df)
99
  series = group_to_three(df)
 
100
 
101
+ forecast_period = get_forecast_period(period)
102
+ df = series_to_df_exogenous(series)
103
 
104
+ # Train the model
105
+ n_periods = round(len(df) * 0.2)
106
+ train = train_test(df, n_periods)
107
+ training_y, test_y, test_y_series, training_X, test_X, future_X = train
108
+ train_test_model = test_fitting(df, training_X, training_y)
109
 
110
+ fitted, confint = train_test_model.predict(X=test_X, n_periods=n_periods, return_conf_int=True)
111
+ index_of_fc = test_y_series.index
112
+ fitted_series = pd.Series(fitted)
113
+ fitted_series.index = index_of_fc
114
 
115
+ future_n_periods = forecast_period
116
+ future_fitted, confint = train_test_model.predict(X=df.iloc[-future_n_periods:, 1:], n_periods=future_n_periods, return_conf_int=True, freq='3D')
117
+ future_index_of_fc = pd.date_range(df['Sales'].index[-1], periods=future_n_periods, freq='3D')
118
+ future_fitted_series = pd.Series(future_fitted)
119
+ future_fitted_series.index = future_index_of_fc
120
 
121
+ # Calculate sales growth
122
+ future_sales_growth = sales_growth(df, future_fitted_series)
123
+
124
+ # Prepare merged data for chart plotting
125
+ merged_data = merge_forecast_data(df['Sales'], fitted_series, future_fitted_series)
 
126
 
127
+ # Plot the charts
128
+ fig_compare = go.Figure()
129
+ fig_compare.add_trace(go.Scatter(x=merged_data[merged_data.columns[0]], y=merged_data['Actual Sales'], mode='lines', name='Actual Sales'))
130
+ fig_compare.add_trace(go.Scatter(x=merged_data[merged_data.columns[0]], y=merged_data['Predicted Sales'], mode='lines', name='Predicted Sales', line=dict(color='#006400')))
131
+ fig_compare.update_layout(title='Historical Sales Data', xaxis_title='Date', yaxis_title='Sales')
132
+
133
+ fig_forecast = go.Figure()
134
+ fig_forecast.add_trace(go.Scatter(x=merged_data[merged_data.columns[0]], y=merged_data['Actual Sales'], mode='lines', name='Actual Sales'))
135
+ fig_forecast.add_trace(go.Scatter(x=merged_data[merged_data.columns[0]], y=merged_data['Forecasted Future Sales'], mode='lines', name='Future Forecasted Sales'))
136
+ fig_forecast.update_layout(title='Forecasted Sales Data', xaxis_title='Date', yaxis_title='Sales')
137
+
138
+ # Return the figures and growth data
139
+ return fig_compare, fig_forecast, future_sales_growth
140
 
141
+ # Gradio Interface setup
142
+ iface = gr.Interface(
143
+ fn=upload_and_forecast,
144
  inputs=[
145
+ gr.File(label="Upload your sales data (CSV)"),
146
+ gr.Slider(minimum=30, maximum=90, step=1, label="Forecast Period (Days)")
 
147
  ],
148
  outputs=[
149
+ gr.Plot(label="Historical vs Predicted Sales"),
150
+ gr.Plot(label="Forecasted Sales Data"),
151
+ gr.DataFrame(label="Sales Growth")
152
  ],
153
+ live=True,
154
+ title="Sales Forecasting System",
155
+ description="Upload your sales data to start forecasting."
156
  )
157
 
158
+ iface.launch()