muhalwan commited on
Commit
dbac8dd
·
1 Parent(s): 68c5852
Files changed (3) hide show
  1. .gitignore +1 -0
  2. app.py +103 -67
  3. evaluator.py +1 -5
.gitignore CHANGED
@@ -11,3 +11,4 @@ optimize_data.py
11
  WORKFLOW.md
12
  data/
13
  hf_cache/
 
 
11
  WORKFLOW.md
12
  data/
13
  hf_cache/
14
+ MODEL_WORKFLOW.md
app.py CHANGED
@@ -1,11 +1,9 @@
1
  import logging
2
  import os
3
- import sys
4
- from pathlib import Path
5
 
6
  import gradio as gr
7
  import pandas as pd
8
- from typing import Optional, Tuple
9
 
10
  from config import Config
11
  from data_processor import DataProcessor
@@ -26,7 +24,13 @@ _backtest_metrics: Optional[dict] = None
26
 
27
  def initialize_system():
28
  """Initialize the prediction system (called once at startup)."""
29
- global _processor, _predictor, _config, _df_enrollment, _elective_codes, _backtest_metrics
 
 
 
 
 
 
30
 
31
  try:
32
  logger.info("Initializing prediction system...")
@@ -47,7 +51,9 @@ def initialize_system():
47
  return False
48
 
49
 
50
- def generate_predictions(year: int, semester: int) -> Tuple[str, Optional[pd.DataFrame], Optional[pd.DataFrame]]:
 
 
51
  """
52
  Generate enrollment predictions for a given year and semester.
53
 
@@ -58,7 +64,13 @@ def generate_predictions(year: int, semester: int) -> Tuple[str, Optional[pd.Dat
58
  Returns:
59
  Tuple of (summary_text, recommendations_df, all_predictions_df)
60
  """
61
- global _processor, _predictor, _config, _df_enrollment, _elective_codes, _backtest_metrics
 
 
 
 
 
 
62
 
63
  try:
64
  if semester not in [1, 2]:
@@ -67,8 +79,18 @@ def generate_predictions(year: int, semester: int) -> Tuple[str, Optional[pd.Dat
67
  if year < 2020 or year > 2030:
68
  return "❌ Error: Year must be between 2020 and 2030", None, None
69
 
70
- if _config is None or _predictor is None or _processor is None or _df_enrollment is None or _elective_codes is None:
71
- return "❌ Error: System not initialized. Please restart the app.", None, None
 
 
 
 
 
 
 
 
 
 
72
 
73
  logger.info(f"Generating predictions for {year} Semester {semester}...")
74
 
@@ -82,12 +104,12 @@ def generate_predictions(year: int, semester: int) -> Tuple[str, Optional[pd.Dat
82
 
83
  if backtest_results is None or len(backtest_results) == 0:
84
  logger.warning("Backtest returned no results, using defaults")
85
- _backtest_metrics = {'mae': 0, 'rmse': 0}
86
  else:
87
  _backtest_metrics = evaluator.generate_metrics(backtest_results)
88
  if _backtest_metrics is None:
89
  logger.warning("Metrics calculation failed, using defaults")
90
- _backtest_metrics = {'mae': 0, 'rmse': 0}
91
  else:
92
  logger.info("Using cached backtest metrics")
93
 
@@ -108,13 +130,13 @@ def generate_predictions(year: int, semester: int) -> Tuple[str, Optional[pd.Dat
108
  ## 📊 Prediction Summary for {year} Semester {semester_name}
109
 
110
  ### Model Performance (Backtest)
111
- - **Mean Absolute Error (MAE)**: {metrics['mae']:.2f} students
112
- - **Root Mean Squared Error (RMSE)**: {metrics['rmse']:.2f} students
113
 
114
  ### Recommendations
115
  - **Courses to Open**: {len(recommended)}
116
- - **Total Seats Needed**: {int(recommended['recommended_quota'].sum()) if not recommended.empty else 0}
117
- - **Estimated Students**: {int(recommended['predicted_enrollment'].sum()) if not recommended.empty else 0}
118
 
119
  ### Top Course
120
  """
@@ -122,40 +144,74 @@ def generate_predictions(year: int, semester: int) -> Tuple[str, Optional[pd.Dat
122
  if not recommended.empty:
123
  top_course = recommended.iloc[0]
124
  summary += f"- **{top_course['nama_mk']}** ({top_course['kode_mk']})\n"
125
- summary += f" - Predicted: {top_course['predicted_enrollment']:.0f} students\n"
126
- summary += f" - Recommended Quota: {top_course['recommended_quota']:.0f} seats"
 
 
 
 
127
  else:
128
  summary += "- No courses recommended to open"
129
 
130
  if not recommended.empty:
131
- recommended_display = recommended[[
132
- 'kode_mk', 'nama_mk', 'predicted_enrollment',
133
- 'recommended_quota', 'strategy'
134
- ]].copy()
 
 
 
 
 
135
  recommended_display.columns = [
136
- 'Course Code', 'Course Name', 'Predicted Students',
137
- 'Recommended Quota', 'Prediction Strategy'
 
 
 
138
  ]
139
- recommended_display['Predicted Students'] = recommended_display['Predicted Students'].round(1)
140
- recommended_display['Recommended Quota'] = recommended_display['Recommended Quota'].astype(int)
141
- recommended_display = recommended_display.sort_values('Predicted Students', ascending=False)
 
 
 
 
 
 
142
  else:
143
  recommended_display = pd.DataFrame()
144
 
145
  # All predictions
146
- all_predictions_display = predictions[[
147
- 'kode_mk', 'nama_mk', 'predicted_enrollment',
148
- 'recommended_quota', 'recommendation', 'strategy'
149
- ]].copy()
 
 
 
 
 
 
150
  all_predictions_display.columns = [
151
- 'Course Code', 'Course Name', 'Predicted Students',
152
- 'Recommended Quota', 'Recommendation', 'Strategy'
 
 
 
 
153
  ]
154
- all_predictions_display['Predicted Students'] = all_predictions_display['Predicted Students'].round(1)
155
- all_predictions_display['Recommended Quota'] = all_predictions_display['Recommended Quota'].astype(int)
156
- all_predictions_display = all_predictions_display.sort_values('Predicted Students', ascending=False)
 
 
 
 
 
 
157
 
158
- logger.info(f"✓ Predictions generated successfully")
159
  return summary, recommended_display, all_predictions_display
160
 
161
  except Exception as e:
@@ -190,7 +246,7 @@ def get_data_info() -> str:
190
  - **Mandatory Courses**: {len(courses) - len(elective_courses)}
191
 
192
  ### Student Population
193
- - **Years Available**: {students['thn'].min()} - {students['thn'].max()}
194
  - **Total Records**: {len(students)}
195
 
196
  ### Data Source
@@ -212,7 +268,6 @@ if not init_success:
212
 
213
  # Create Gradio Interface
214
  with gr.Blocks(title="SKS Enrollment Predictor") as demo:
215
-
216
  # Show disclaimer banner if using demo data
217
  if os.getenv("DEMO_MODE", "false").lower() == "true":
218
  gr.Markdown(
@@ -233,12 +288,11 @@ with gr.Blocks(title="SKS Enrollment Predictor") as demo:
233
  </details>
234
  </div>
235
  """,
236
- sanitize_html=False
237
  )
238
 
239
  with gr.Tabs():
240
  with gr.Tab("Generate Predictions"):
241
-
242
  with gr.Row():
243
  with gr.Column(scale=1):
244
  year_input = gr.Number(
@@ -247,62 +301,49 @@ with gr.Blocks(title="SKS Enrollment Predictor") as demo:
247
  precision=0,
248
  minimum=2020,
249
  maximum=2030,
250
- info="Masukkan tahun yang ingin diprediksi"
251
  )
252
 
253
  semester_input = gr.Radio(
254
  choices=[1, 2],
255
  label="Semester",
256
  value=2,
257
- info="1 = Ganjil, 2 = Genap"
258
  )
259
 
260
  predict_btn = gr.Button(
261
- "Generate Predictions",
262
- variant="primary",
263
- size="lg"
264
  )
265
 
266
  with gr.Column(scale=2):
267
  summary_output = gr.Markdown(
268
- label="Summary",
269
- value="Click 'Generate Predictions' to start"
270
  )
271
 
272
  gr.Markdown("### Recommended Courses to Open")
273
  recommended_output = gr.Dataframe(
274
- label="Courses Recommended to Open",
275
- wrap=True,
276
- interactive=False
277
  )
278
 
279
  with gr.Accordion("View All Predictions", open=False):
280
  all_predictions_output = gr.Dataframe(
281
- label="All Elective Courses",
282
- wrap=True,
283
- interactive=False
284
  )
285
 
286
  with gr.Tab("Data Information"):
287
- gr.Markdown(
288
- )
289
 
290
  data_info_btn = gr.Button("Refresh Data Info", variant="secondary")
291
  data_info_output = gr.Markdown()
292
 
293
- data_info_btn.click(
294
- fn=get_data_info,
295
- inputs=[],
296
- outputs=data_info_output
297
- )
298
 
299
  demo.load(fn=get_data_info, inputs=[], outputs=data_info_output)
300
 
301
-
302
  predict_btn.click(
303
  fn=generate_predictions,
304
  inputs=[year_input, semester_input],
305
- outputs=[summary_output, recommended_output, all_predictions_output]
306
  )
307
 
308
  # Footer
@@ -327,9 +368,4 @@ with gr.Blocks(title="SKS Enrollment Predictor") as demo:
327
 
328
  # Launch the app
329
  if __name__ == "__main__":
330
- demo.launch(
331
- server_name="0.0.0.0",
332
- server_port=7860,
333
- share=False,
334
- show_error=True
335
- )
 
1
  import logging
2
  import os
3
+ from typing import Optional, Tuple
 
4
 
5
  import gradio as gr
6
  import pandas as pd
 
7
 
8
  from config import Config
9
  from data_processor import DataProcessor
 
24
 
25
  def initialize_system():
26
  """Initialize the prediction system (called once at startup)."""
27
+ global \
28
+ _processor, \
29
+ _predictor, \
30
+ _config, \
31
+ _df_enrollment, \
32
+ _elective_codes, \
33
+ _backtest_metrics
34
 
35
  try:
36
  logger.info("Initializing prediction system...")
 
51
  return False
52
 
53
 
54
+ def generate_predictions(
55
+ year: int, semester: int
56
+ ) -> Tuple[str, Optional[pd.DataFrame], Optional[pd.DataFrame]]:
57
  """
58
  Generate enrollment predictions for a given year and semester.
59
 
 
64
  Returns:
65
  Tuple of (summary_text, recommendations_df, all_predictions_df)
66
  """
67
+ global \
68
+ _processor, \
69
+ _predictor, \
70
+ _config, \
71
+ _df_enrollment, \
72
+ _elective_codes, \
73
+ _backtest_metrics
74
 
75
  try:
76
  if semester not in [1, 2]:
 
79
  if year < 2020 or year > 2030:
80
  return "❌ Error: Year must be between 2020 and 2030", None, None
81
 
82
+ if (
83
+ _config is None
84
+ or _predictor is None
85
+ or _processor is None
86
+ or _df_enrollment is None
87
+ or _elective_codes is None
88
+ ):
89
+ return (
90
+ "❌ Error: System not initialized. Please restart the app.",
91
+ None,
92
+ None,
93
+ )
94
 
95
  logger.info(f"Generating predictions for {year} Semester {semester}...")
96
 
 
104
 
105
  if backtest_results is None or len(backtest_results) == 0:
106
  logger.warning("Backtest returned no results, using defaults")
107
+ _backtest_metrics = {"mae": 0, "rmse": 0}
108
  else:
109
  _backtest_metrics = evaluator.generate_metrics(backtest_results)
110
  if _backtest_metrics is None:
111
  logger.warning("Metrics calculation failed, using defaults")
112
+ _backtest_metrics = {"mae": 0, "rmse": 0}
113
  else:
114
  logger.info("Using cached backtest metrics")
115
 
 
130
  ## 📊 Prediction Summary for {year} Semester {semester_name}
131
 
132
  ### Model Performance (Backtest)
133
+ - **Mean Absolute Error (MAE)**: {metrics["mae"]:.2f} students
134
+ - **Root Mean Squared Error (RMSE)**: {metrics["rmse"]:.2f} students
135
 
136
  ### Recommendations
137
  - **Courses to Open**: {len(recommended)}
138
+ - **Total Seats Needed**: {int(recommended["recommended_quota"].sum()) if not recommended.empty else 0}
139
+ - **Estimated Students**: {int(recommended["predicted_enrollment"].sum()) if not recommended.empty else 0}
140
 
141
  ### Top Course
142
  """
 
144
  if not recommended.empty:
145
  top_course = recommended.iloc[0]
146
  summary += f"- **{top_course['nama_mk']}** ({top_course['kode_mk']})\n"
147
+ summary += (
148
+ f" - Predicted: {top_course['predicted_enrollment']:.0f} students\n"
149
+ )
150
+ summary += (
151
+ f" - Recommended Quota: {top_course['recommended_quota']:.0f} seats"
152
+ )
153
  else:
154
  summary += "- No courses recommended to open"
155
 
156
  if not recommended.empty:
157
+ recommended_display = recommended[
158
+ [
159
+ "kode_mk",
160
+ "nama_mk",
161
+ "predicted_enrollment",
162
+ "recommended_quota",
163
+ "strategy",
164
+ ]
165
+ ].copy()
166
  recommended_display.columns = [
167
+ "Course Code",
168
+ "Course Name",
169
+ "Predicted Students",
170
+ "Recommended Quota",
171
+ "Prediction Strategy",
172
  ]
173
+ recommended_display["Predicted Students"] = recommended_display[
174
+ "Predicted Students"
175
+ ].round(1)
176
+ recommended_display["Recommended Quota"] = recommended_display[
177
+ "Recommended Quota"
178
+ ].astype(int)
179
+ recommended_display = recommended_display.sort_values(
180
+ "Predicted Students", ascending=False
181
+ )
182
  else:
183
  recommended_display = pd.DataFrame()
184
 
185
  # All predictions
186
+ all_predictions_display = predictions[
187
+ [
188
+ "kode_mk",
189
+ "nama_mk",
190
+ "predicted_enrollment",
191
+ "recommended_quota",
192
+ "recommendation",
193
+ "strategy",
194
+ ]
195
+ ].copy()
196
  all_predictions_display.columns = [
197
+ "Course Code",
198
+ "Course Name",
199
+ "Predicted Students",
200
+ "Recommended Quota",
201
+ "Recommendation",
202
+ "Strategy",
203
  ]
204
+ all_predictions_display["Predicted Students"] = all_predictions_display[
205
+ "Predicted Students"
206
+ ].round(1)
207
+ all_predictions_display["Recommended Quota"] = all_predictions_display[
208
+ "Recommended Quota"
209
+ ].astype(int)
210
+ all_predictions_display = all_predictions_display.sort_values(
211
+ "Predicted Students", ascending=False
212
+ )
213
 
214
+ logger.info("✓ Predictions generated successfully")
215
  return summary, recommended_display, all_predictions_display
216
 
217
  except Exception as e:
 
246
  - **Mandatory Courses**: {len(courses) - len(elective_courses)}
247
 
248
  ### Student Population
249
+ - **Years Available**: {students["thn"].min()} - {students["thn"].max()}
250
  - **Total Records**: {len(students)}
251
 
252
  ### Data Source
 
268
 
269
  # Create Gradio Interface
270
  with gr.Blocks(title="SKS Enrollment Predictor") as demo:
 
271
  # Show disclaimer banner if using demo data
272
  if os.getenv("DEMO_MODE", "false").lower() == "true":
273
  gr.Markdown(
 
288
  </details>
289
  </div>
290
  """,
291
+ sanitize_html=False,
292
  )
293
 
294
  with gr.Tabs():
295
  with gr.Tab("Generate Predictions"):
 
296
  with gr.Row():
297
  with gr.Column(scale=1):
298
  year_input = gr.Number(
 
301
  precision=0,
302
  minimum=2020,
303
  maximum=2030,
304
+ info="Masukkan tahun yang ingin diprediksi",
305
  )
306
 
307
  semester_input = gr.Radio(
308
  choices=[1, 2],
309
  label="Semester",
310
  value=2,
311
+ info="1 = Ganjil, 2 = Genap",
312
  )
313
 
314
  predict_btn = gr.Button(
315
+ "Generate Predictions", variant="primary", size="lg"
 
 
316
  )
317
 
318
  with gr.Column(scale=2):
319
  summary_output = gr.Markdown(
320
+ label="Summary", value="Click 'Generate Predictions' to start"
 
321
  )
322
 
323
  gr.Markdown("### Recommended Courses to Open")
324
  recommended_output = gr.Dataframe(
325
+ label="Courses Recommended to Open", wrap=True, interactive=False
 
 
326
  )
327
 
328
  with gr.Accordion("View All Predictions", open=False):
329
  all_predictions_output = gr.Dataframe(
330
+ label="All Elective Courses", wrap=True, interactive=False
 
 
331
  )
332
 
333
  with gr.Tab("Data Information"):
334
+ gr.Markdown()
 
335
 
336
  data_info_btn = gr.Button("Refresh Data Info", variant="secondary")
337
  data_info_output = gr.Markdown()
338
 
339
+ data_info_btn.click(fn=get_data_info, inputs=[], outputs=data_info_output)
 
 
 
 
340
 
341
  demo.load(fn=get_data_info, inputs=[], outputs=data_info_output)
342
 
 
343
  predict_btn.click(
344
  fn=generate_predictions,
345
  inputs=[year_input, semester_input],
346
+ outputs=[summary_output, recommended_output, all_predictions_output],
347
  )
348
 
349
  # Footer
 
368
 
369
  # Launch the app
370
  if __name__ == "__main__":
371
+ demo.launch(server_name="0.0.0.0", server_port=7860, share=False, show_error=True)
 
 
 
 
 
evaluator.py CHANGED
@@ -1,5 +1,4 @@
1
  import logging
2
- import os
3
  from pathlib import Path
4
 
5
  import matplotlib.pyplot as plt
@@ -81,10 +80,7 @@ class Evaluator:
81
 
82
  self._plot_results(results)
83
 
84
- return {
85
- 'mae': mae,
86
- 'rmse': rmse
87
- }
88
 
89
  def _plot_results(self, df):
90
  """Generate simple Actual vs Predicted scatter plot."""
 
1
  import logging
 
2
  from pathlib import Path
3
 
4
  import matplotlib.pyplot as plt
 
80
 
81
  self._plot_results(results)
82
 
83
+ return {"mae": mae, "rmse": rmse}
 
 
 
84
 
85
  def _plot_results(self, df):
86
  """Generate simple Actual vs Predicted scatter plot."""