Spaces:
Running
Running
Commit ·
1898dec
1
Parent(s): 3e41dea
Updated app
Browse files
app.py
CHANGED
|
@@ -160,6 +160,7 @@ model_name = "google/tapas-large-finetuned-wtq"
|
|
| 160 |
tokenizer = TapasTokenizer.from_pretrained(model_name)
|
| 161 |
model = TapasForQuestionAnswering.from_pretrained(model_name, local_files_only=False)
|
| 162 |
|
|
|
|
| 163 |
def load_tapas_model(model, tokenizer):
|
| 164 |
pipe = pipeline("table-question-answering", model=model, tokenizer=tokenizer)
|
| 165 |
return pipe
|
|
@@ -168,25 +169,21 @@ pipe = load_tapas_model(model, tokenizer)
|
|
| 168 |
|
| 169 |
def get_answer(table, query):
|
| 170 |
answers = pipe(table=table, query=query)
|
| 171 |
-
print(answers['coordinates']) # FOR DEBUGGING PURPOSES
|
| 172 |
return answers
|
| 173 |
|
| 174 |
def convert_answer(answer):
|
| 175 |
if answer['aggregator'] == 'SUM':
|
| 176 |
-
print(answer['answer']) # FOR DEBUGGING
|
| 177 |
cells = answer['cells']
|
| 178 |
converted = sum(float(value.replace(',', '')) for value in cells)
|
| 179 |
return converted
|
| 180 |
|
| 181 |
if answer['aggregator'] == 'AVERAGE':
|
| 182 |
-
print(answer['answer']) # FOR DEBUGGING
|
| 183 |
cells = answer['cells']
|
| 184 |
values = [float(value.replace(',', '')) for value in cells]
|
| 185 |
converted = sum(values) / len(values)
|
| 186 |
return converted
|
| 187 |
|
| 188 |
if answer['aggregator'] == 'COUNT':
|
| 189 |
-
print(answer['answer']) # FOR DEBUGGING
|
| 190 |
cells = answer['cells']
|
| 191 |
converted = sum(int(value.replace(',', '')) for value in cells)
|
| 192 |
return converted
|
|
@@ -198,11 +195,14 @@ def get_converted_answer(table, query):
|
|
| 198 |
converted_answer = convert_answer(get_answer(table, query))
|
| 199 |
return converted_answer
|
| 200 |
|
|
|
|
|
|
|
|
|
|
| 201 |
st.title("Sales Forecasting Dashboard")
|
| 202 |
-
st.write("📈 Welcome User, start using the application by uploading your file in the
|
| 203 |
|
| 204 |
if 'uploaded' not in st.session_state:
|
| 205 |
-
st.session_state.uploaded =
|
| 206 |
|
| 207 |
# Sidebar Menu
|
| 208 |
with st.sidebar:
|
|
@@ -231,13 +231,52 @@ with st.sidebar:
|
|
| 231 |
if (st.session_state.uploaded):
|
| 232 |
st.line_chart(df)
|
| 233 |
|
| 234 |
-
|
| 235 |
'Start Forecasting',
|
| 236 |
key='forecast_button',
|
| 237 |
type="primary",
|
| 238 |
-
disabled=st.session_state.uploaded
|
| 239 |
)
|
| 240 |
|
| 241 |
-
if (
|
| 242 |
-
#
|
| 243 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
tokenizer = TapasTokenizer.from_pretrained(model_name)
|
| 161 |
model = TapasForQuestionAnswering.from_pretrained(model_name, local_files_only=False)
|
| 162 |
|
| 163 |
+
@st.cache
|
| 164 |
def load_tapas_model(model, tokenizer):
|
| 165 |
pipe = pipeline("table-question-answering", model=model, tokenizer=tokenizer)
|
| 166 |
return pipe
|
|
|
|
| 169 |
|
| 170 |
def get_answer(table, query):
|
| 171 |
answers = pipe(table=table, query=query)
|
|
|
|
| 172 |
return answers
|
| 173 |
|
| 174 |
def convert_answer(answer):
|
| 175 |
if answer['aggregator'] == 'SUM':
|
|
|
|
| 176 |
cells = answer['cells']
|
| 177 |
converted = sum(float(value.replace(',', '')) for value in cells)
|
| 178 |
return converted
|
| 179 |
|
| 180 |
if answer['aggregator'] == 'AVERAGE':
|
|
|
|
| 181 |
cells = answer['cells']
|
| 182 |
values = [float(value.replace(',', '')) for value in cells]
|
| 183 |
converted = sum(values) / len(values)
|
| 184 |
return converted
|
| 185 |
|
| 186 |
if answer['aggregator'] == 'COUNT':
|
|
|
|
| 187 |
cells = answer['cells']
|
| 188 |
converted = sum(int(value.replace(',', '')) for value in cells)
|
| 189 |
return converted
|
|
|
|
| 195 |
converted_answer = convert_answer(get_answer(table, query))
|
| 196 |
return converted_answer
|
| 197 |
|
| 198 |
+
|
| 199 |
+
# Web Application
|
| 200 |
+
|
| 201 |
st.title("Sales Forecasting Dashboard")
|
| 202 |
+
st.write("📈 Welcome User, start using the application by uploading your file in the sidebar!")
|
| 203 |
|
| 204 |
if 'uploaded' not in st.session_state:
|
| 205 |
+
st.session_state.uploaded = False
|
| 206 |
|
| 207 |
# Sidebar Menu
|
| 208 |
with st.sidebar:
|
|
|
|
| 231 |
if (st.session_state.uploaded):
|
| 232 |
st.line_chart(df)
|
| 233 |
|
| 234 |
+
forecast_button = st.button(
|
| 235 |
'Start Forecasting',
|
| 236 |
key='forecast_button',
|
| 237 |
type="primary",
|
| 238 |
+
disabled=st.session_state.uploaded
|
| 239 |
)
|
| 240 |
|
| 241 |
+
if (forecast_button):
|
| 242 |
+
# Create the eXogenous values
|
| 243 |
+
df['Sales First Difference'] = df['Sales'] - df['Sales'].shift(1)
|
| 244 |
+
df['Seasonal First Difference'] = df['Sales'] - df['Sales'].shift(12)
|
| 245 |
+
|
| 246 |
+
auto_train_test = train_test(df, 20)
|
| 247 |
+
training_y, test_y, test_y_series, training_X, test_X, future_X = auto_train_test
|
| 248 |
+
|
| 249 |
+
# Auto_arima to fit the model to forecast future sales
|
| 250 |
+
future_model = model_fitting(df, future_X)
|
| 251 |
+
# Auto_arima to check the accuracy of the train test split
|
| 252 |
+
train_test_model = test_fitting(df, training_X, training_y)
|
| 253 |
+
|
| 254 |
+
# Forecast (testing)
|
| 255 |
+
n_periods = 20
|
| 256 |
+
fitted, confint = train_test_model.predict(X=test_X, n_periods=n_periods, return_conf_int=True)
|
| 257 |
+
index_of_fc = test_y_series.index
|
| 258 |
+
|
| 259 |
+
# make series for plotting purpose
|
| 260 |
+
fitted_series = pd.Series(fitted)
|
| 261 |
+
fitted_series.index=index_of_fc
|
| 262 |
+
lower_series = pd.Series(confint[:, 0], index=index_of_fc)
|
| 263 |
+
upper_series = pd.Series(confint[:, 1], index=index_of_fc)
|
| 264 |
+
|
| 265 |
+
test_y, predictions = np.array(test_y), np.array(fitted)
|
| 266 |
+
forecast_accuracy(predictions, test_y)
|
| 267 |
+
|
| 268 |
+
# Forecast (actual)
|
| 269 |
+
n_periods = 36
|
| 270 |
+
freq='3D'
|
| 271 |
+
future_fitted, confint = future_model.predict(X=df.iloc[-n_periods:,1:], n_periods=n_periods, return_conf_int=True, freq=freq)
|
| 272 |
+
future_index_of_fc = pd.date_range(df['Sales'].index[-1], periods = n_periods, freq=freq)
|
| 273 |
+
|
| 274 |
+
# make series for plotting purpose
|
| 275 |
+
future_fitted_series = pd.Series(future_fitted)
|
| 276 |
+
future_fitted_series.index=future_index_of_fc
|
| 277 |
+
future_lower_series = pd.Series(confint[:, 0], index=future_index_of_fc)
|
| 278 |
+
future_upper_series = pd.Series(confint[:, 1], index=future_index_of_fc)
|
| 279 |
+
|
| 280 |
+
auto_sales_growth = sales_growth(df, future_fitted_series)
|
| 281 |
+
st.write("Forecasted sales in the next 3 months")
|
| 282 |
+
st.write(auto_sales_growth)
|