ShinyaJ commited on
Commit
47a507a
·
verified ·
1 Parent(s): e66437e

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +255 -106
app.py CHANGED
@@ -5,7 +5,6 @@ import numpy as np
5
  import re
6
  import os
7
  import uuid
8
- from io import BytesIO
9
  from typing import List, Dict, Tuple, Optional
10
  try:
11
  from rapidfuzz import process as rf_process
@@ -16,15 +15,13 @@ except Exception:
16
  APP_TITLE = "Ward Ranking Random Assigner"
17
  DESCRIPTION = """
18
  **Flow**
19
- 1) อัปโหลดไฟล์ .csv/.xlsx
20
- 2) เลือกวอร์ดที่ใช้ + ใส่ capacity
21
- 3) ตรวจหัวคอลัมน์ที่อ่านได้ (Available columns)
22
- 4) **เลือกวิธี mapping**:
23
- - Auto-detect (คำไทย/อังกฤษ + fuzzy) ระบบเติมให้อัตโนมัติ
24
- - หรือกรอก **หมายเลขคอลัมน์** ตามรายการ Available columns (เลขเริ่ม 1)
25
- 5) Clean เหลือเฉพาะ NAME, ID, และคอลัมน์วอร์ดที่เลือก (ค่าจัดอันดับถูกแปลงเป็นตัวเลข)
26
- 6) Assign → สุ่มตามลำดับอันดับ โดยเคารพ capacity
27
- - **จะตรวจว่าจำนวนนักศึกษา <= ผลรวม capacity** (ขาดได้แต่ห้ามเกิน)
28
  """
29
 
30
  WARD_CHOICES = [
@@ -38,14 +35,30 @@ WARD_CHOICES = [
38
  ("Obstetrics", "สูติศาสตร์"),
39
  ]
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  # Keyword dictionary for auto mapping
42
  AUTO_MAP = {
43
  "NAME": ["ชื่อ-สกุล", "ชื่อ - สกุล", "fullname", "full name", "name", "student name"],
44
  "ID": ["รหัสนักศึกษา", "รหัส", "student id", "id", "studentid"],
45
  "Medical": ["อายุรศาสตร์", "medical"],
46
- "Medical_1": ["อายุรศาสตร์_1", "medical_1", "med_1","med1"],
47
  "Medical_2": ["อายุรศาสตร์_2", "medical_2", "med_2", "med2"],
48
- "Surgical": ["ศัลยศาสตร์", "surgical", "surgery","surg"],
49
  "Pediatric": ["เด็ก", "pediatric", "pediatrics"],
50
  "Community": ["ชุมชน", "community"],
51
  "Psychiatric": ["จิตเวช", "psychiatric"],
@@ -54,7 +67,7 @@ AUTO_MAP = {
54
 
55
  def read_table(file) -> Tuple[Optional[pd.DataFrame], str]:
56
  if file is None:
57
- return None, "กรุณาอัปโหลดไฟล์ก่อน (.csv หรือ .xlsx)"
58
  name = file.name.lower() if hasattr(file, "name") else ""
59
  try:
60
  if name.endswith(".csv"):
@@ -62,13 +75,12 @@ def read_table(file) -> Tuple[Optional[pd.DataFrame], str]:
62
  elif name.endswith(".xlsx"):
63
  df = pd.read_excel(file.name if hasattr(file, "name") else file)
64
  else:
65
- # ลองเดาว่าเป็น csv
66
  try:
67
  df = pd.read_csv(file)
68
  except Exception:
69
- return None, "รองรับเฉพาะ .csv หรือ .xlsx เท่านั้น"
70
  except Exception as e:
71
- return None, f"อ่านไฟล์ไม่สำเร็จ: {e}"
72
  df.columns = [str(c).strip() for c in df.columns]
73
  return df, ""
74
 
@@ -91,8 +103,6 @@ def parse_rank(value) -> Optional[int]:
91
  return None
92
 
93
  def auto_map_columns(df: pd.DataFrame, selected_wards: List[str]) -> Dict[str, int]:
94
- """Return mapping as index (1-based) for NAME, ID, and selected ward columns.
95
- Use keyword dictionary and fuzzy fallback (if available)."""
96
  cols = list(df.columns)
97
  col_lower = [c.lower() for c in cols]
98
  result: Dict[str, int] = {}
@@ -100,11 +110,9 @@ def auto_map_columns(df: pd.DataFrame, selected_wards: List[str]) -> Dict[str, i
100
  def find_by_keywords(keywords: List[str]) -> Optional[int]:
101
  for kw in keywords:
102
  kw_low = kw.lower()
103
- # contains search
104
  for idx, c_low in enumerate(col_lower):
105
  if kw_low in c_low:
106
- return idx + 1 # 1-based
107
- # fuzzy fallback
108
  if HAS_FUZZ:
109
  best_idx = None
110
  best_score = -1
@@ -120,28 +128,19 @@ def auto_map_columns(df: pd.DataFrame, selected_wards: List[str]) -> Dict[str, i
120
  return best_idx
121
  return None
122
 
123
- # NAME / ID
124
  n_idx = find_by_keywords(AUTO_MAP["NAME"])
125
  if n_idx: result["NAME"] = n_idx
126
  i_idx = find_by_keywords(AUTO_MAP["ID"])
127
  if i_idx: result["ID"] = i_idx
128
-
129
- # wards
130
  for w in selected_wards:
131
  kws = AUTO_MAP.get(w, [w])
132
  w_idx = find_by_keywords(kws)
133
  if w_idx:
134
  result[w] = w_idx
135
-
136
  return result
137
 
138
  def build_cleaned_from_indices(df: pd.DataFrame,
139
  mapping_indices: Dict[str, int]) -> pd.DataFrame:
140
- """
141
- mapping_indices: {Field -> 1-based column index in df}
142
- Keep only NAME, ID, and ward columns. Convert ward values to Int (ranks).
143
- """
144
- # Resolve names
145
  def idx_to_name(k: str) -> str:
146
  idx = mapping_indices.get(k, None)
147
  if idx is None: return ""
@@ -154,11 +153,9 @@ def build_cleaned_from_indices(df: pd.DataFrame,
154
  missing = []
155
  if not name_col: missing.append("NAME")
156
  if not id_col: missing.append("ID")
157
- raise ValueError(f"หาไม่พบคอลัมน์บังคับ: {', '.join(missing)}")
158
 
159
- # collect ward columns
160
- ward_cols_src = []
161
- ward_cols_dst = []
162
  for w, _th in WARD_CHOICES:
163
  if w in mapping_indices:
164
  c = idx_to_name(w)
@@ -172,31 +169,28 @@ def build_cleaned_from_indices(df: pd.DataFrame,
172
  rename_map.update({src: dst for src, dst in zip(ward_cols_src, ward_cols_dst)})
173
  cleaned = cleaned.rename(columns=rename_map)
174
 
175
- # parse ranks
176
  for c in cleaned.columns:
177
  if c not in ("NAME", "ID"):
178
  cleaned[c] = cleaned[c].apply(parse_rank).astype("Int64")
179
- # order
180
  ordered = ["NAME", "ID"] + [c for c in cleaned.columns if c not in ("NAME", "ID")]
181
  cleaned = cleaned[ordered]
182
  return cleaned
183
 
184
  def random_assign(cleaned: pd.DataFrame,
185
  capacities: Dict[str, int]) -> Tuple[pd.DataFrame, pd.DataFrame, Dict[str, int]]:
186
- """Assign by rank rounds; tie-break with numpy's global RNG (np.random.choice)."""
187
  wards = [w for w in cleaned.columns if w not in ("NAME", "ID")]
188
  cap = {w: int(capacities.get(w, 0)) for w in wards}
189
 
190
  assigned = pd.Series(index=cleaned.index, data=pd.NA, dtype="object")
191
  choice_no = pd.Series(index=cleaned.index, data=pd.NA, dtype="Int64")
192
 
193
- max_rank = 0
194
  for w in wards:
195
  m = cleaned[w].max(skipna=True)
196
  if pd.notna(m):
197
- max_rank = max(max_rank, int(m))
198
 
199
- for r in range(1, max_rank + 1):
200
  if all(c <= 0 for c in cap.values()):
201
  break
202
  for w in wards:
@@ -221,13 +215,114 @@ def random_assign(cleaned: pd.DataFrame,
221
  not_assigned = result[result["AssignedWard"].isna()].copy()
222
  return result.fillna(""), not_assigned.fillna(""), cap
223
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
  # ===== Helpers for temp file paths =====
225
  def _tmp(name: str) -> str:
226
  os.makedirs("/tmp", exist_ok=True)
227
  return f"/tmp/{uuid.uuid4().hex}-{name}"
228
 
229
  # ===== Gradio callbacks =====
230
-
231
  def update_capacity_table(selected_wards: List[str]) -> pd.DataFrame:
232
  rows = []
233
  for w, th in WARD_CHOICES:
@@ -238,13 +333,9 @@ def update_capacity_table(selected_wards: List[str]) -> pd.DataFrame:
238
  def on_upload(file, selected_wards):
239
  df, msg = read_table(file)
240
  if df is None:
241
- # return flat outputs for all mapping fields
242
  return gr.update(value=msg, visible=True), "", None, None, None, None, None, None, None, None, None, None
243
- # Show available columns
244
  avail = available_columns_text(df)
245
- # Auto-detect mapping (indices)
246
  auto_idx = auto_map_columns(df, selected_wards or [])
247
- # Prepare number inputs defaults
248
  def idx_or_none(key):
249
  return int(auto_idx[key]) if key in auto_idx else None
250
  name_num = idx_or_none("NAME")
@@ -257,24 +348,23 @@ def on_upload(file, selected_wards):
257
  comm_num = idx_or_none("Community")
258
  psy_num = idx_or_none("Psychiatric")
259
  obs_num = idx_or_none("Obstetrics")
260
- return (gr.update(value="✓ อ่านไฟล์สำเร็จ", visible=True), avail, name_num, id_num,
261
  med_num, med1_num, med2_num, surg_num, ped_num, comm_num, psy_num, obs_num)
262
 
263
  def collect_mapping_numbers(name_num, id_num, ward_nums, selected_wards, n_cols):
264
- """Validate numeric mapping and build mapping dict {Field: index}"""
265
  errors = []
266
  mapping = {}
267
  def valid(num, label):
268
  if num is None:
269
- errors.append(f"- กรุณาใส่หมายเลขของ {label}")
270
  return None
271
  try:
272
  num = int(num)
273
  except Exception:
274
- errors.append(f"- {label} ต้องเป็นตัวเลข")
275
  return None
276
  if not (1 <= num <= n_cols):
277
- errors.append(f"- {label} ต้องอยู่ระหว่าง 1–{n_cols}")
278
  return None
279
  return num
280
 
@@ -293,7 +383,7 @@ def collect_mapping_numbers(name_num, id_num, ward_nums, selected_wards, n_cols)
293
  def on_clean(file, selected_wards, capacity_df, name_num, id_num,
294
  med_num, med1_num, med2_num, surg_num, ped_num, comm_num, psy_num, obs_num):
295
  if not selected_wards:
296
- return gr.update(value="กรุณาเลือกวอร์ดอย่างน้อย 1", visible=True), None, None, None
297
 
298
  df, msg = read_table(file)
299
  if df is None:
@@ -307,29 +397,82 @@ def on_clean(file, selected_wards, capacity_df, name_num, id_num,
307
  }
308
  errors, mapping_idx = collect_mapping_numbers(name_num, id_num, ward_nums, selected_wards, n_cols)
309
  if errors:
310
- return gr.update(value="❌ Mapping ไม่ครบ/ไม่ถูกต้อง:\n" + "\n".join(errors), visible=True), None, None, None
311
 
312
  try:
313
  cleaned = build_cleaned_from_indices(df, mapping_idx)
314
  except Exception as e:
315
- return gr.update(value=f"❌ เกิดข้อผิดพลาด: {e}", visible=True), None, None, None
316
 
317
- # Write to a unique temp file path
318
  cleaned_path = _tmp("cleaned.csv")
319
  cleaned.to_csv(cleaned_path, index=False, encoding="utf-8-sig")
320
 
321
- info = "✓ Cleaning สำเร็จ"
322
  return gr.update(value=info, visible=True), cleaned.head(30), cleaned_path, len(cleaned)
323
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
324
  def on_assign(file, selected_wards, capacity_df, name_num, id_num,
325
  med_num, med1_num, med2_num, surg_num, ped_num, comm_num, psy_num, obs_num):
326
- # Clean first to get the cleaned df and student count
327
  status, cleaned_preview, cleaned_file, n_students = on_clean(file, selected_wards, capacity_df, name_num, id_num,
328
  med_num, med1_num, med2_num, surg_num, ped_num, comm_num, psy_num, obs_num)
329
  if cleaned_preview is None:
330
- return status, None, None, None, None
331
 
332
- # Recreate full cleaned df (not just head) for assignment
333
  df, _ = read_table(file)
334
  n_cols = len(df.columns)
335
  ward_nums = {
@@ -340,56 +483,43 @@ def on_assign(file, selected_wards, capacity_df, name_num, id_num,
340
  _errors, mapping_idx = collect_mapping_numbers(name_num, id_num, ward_nums, selected_wards, n_cols)
341
  cleaned = build_cleaned_from_indices(df, mapping_idx)
342
 
343
- # Build capacity map
344
- cap_df = capacity_df.copy()
345
- if cap_df is None or cap_df.empty:
346
- return gr.update(value="กรุณากรอก capacity ก่อน", visible=True), None, None, None, None
347
- cap_df.columns = ["Ward", "Thai Name", "Capacity"]
348
- cap_df = cap_df[cap_df["Ward"].isin([c for c in cleaned.columns if c not in ("NAME", "ID")])]
349
- cap_map = {}
350
- for _, row in cap_df.iterrows():
351
- try:
352
- cap_map[str(row["Ward"])] = int(row["Capacity"])
353
- except Exception:
354
- cap_map[str(row["Ward"])] = 0
355
 
356
- total_capacity = sum(cap_map.values())
357
- # Pre-check: students must be <= total capacity (ขาดได้แต่ห้ามเกิน)
358
  if n_students is None:
359
  n_students = len(cleaned)
360
  if n_students > total_capacity:
361
- msg = f"❌ จำนวนผู้สมัคร {n_students} คน มากกว่า capacity รวม {total_capacity} ที่กำหนด (ขาดได้แต่ห้ามเกิน)"
362
- return gr.update(value=msg, visible=True), None, None, None, None
363
 
364
- # Assign without seed; use np.random.choice
365
- assigned, not_assigned, leftover = random_assign(cleaned, cap_map)
366
 
367
- # Write files to unique temp paths
368
  assigned_path = _tmp("assigned.csv")
369
  not_assigned_path = _tmp("not_assigned.csv")
370
  assigned.to_csv(assigned_path, index=False, encoding="utf-8-sig")
371
  not_assigned.to_csv(not_assigned_path, index=False, encoding="utf-8-sig")
372
 
373
- leftover_text = "ความจุคงเหลือ:\n" + "\n".join([f"- {k}: {v}" for k, v in leftover.items()])
 
374
 
375
- return status, assigned.head(30), assigned_path, not_assigned_path, leftover_text
376
 
377
  with gr.Blocks(title=APP_TITLE) as demo:
378
  gr.Markdown(f"# {APP_TITLE}")
379
  gr.Markdown(DESCRIPTION)
380
 
381
  with gr.Row():
382
- file = gr.File(file_count="single", file_types=[".csv", ".xlsx"], label="อัปโหลดข้อมูล (.csv/.xlsx)")
383
 
384
- with gr.Accordion("1) เลือกวอร์ดที่ต้องใช้", open=True):
385
  selected_wards = gr.CheckboxGroup(
386
  choices=[w for w, _ in WARD_CHOICES],
387
- label="เลือกวอร์ด (เลือกได้หลายข้อ)",
388
  value=["Medical", "Surgical", "Pediatric", "Community", "Psychiatric", "Obstetrics"]
389
  )
390
- gr.Markdown("คำแปล: " + ", ".join([f"**{w}** = {th}" for w, th in WARD_CHOICES]))
391
 
392
- with gr.Accordion("2) กำหนด Capacity ต่อวอร์ด", open=True):
393
  capacity_df = gr.Dataframe(
394
  headers=["Ward", "Thai Name", "Capacity"],
395
  value=[],
@@ -397,38 +527,56 @@ with gr.Blocks(title=APP_TITLE) as demo:
397
  col_count=3,
398
  interactive=True,
399
  wrap=True,
400
- label="กรอกเฉพาะแถวของวอร์ดที่เลือก"
401
  )
402
  selected_wards.change(fn=update_capacity_table, inputs=selected_wards, outputs=capacity_df)
403
 
404
- with gr.Accordion("3) ตรวจหัวคอลัมน์ & เลือก mapping (Auto/ตัวเลข)", open=True):
405
  status = gr.Markdown(visible=False)
406
- available = gr.Code(label="Available columns (เลขเริ่มที่ 1)", language="markdown", interactive=False)
407
- auto_btn = gr.Button("อ่านไฟล์ & Auto-detect mapping")
408
- # numeric mapping inputs
409
- name_num = gr.Number(label="หมายเลขคอลัมน์สำหรับ NAME", precision=0)
410
- id_num = gr.Number(label="หมายเลขคอลัมน์สำหรับ ID", precision=0)
411
  with gr.Row():
412
- med_num = gr.Number(label="หมายเลขคอลัมน์ Medical", precision=0)
413
- med1_num = gr.Number(label="หมายเลขคอลัมน์ Medical_1", precision=0)
414
- med2_num = gr.Number(label="หมายเลขคอลัมน์ Medical_2", precision=0)
415
  with gr.Row():
416
- surg_num = gr.Number(label="หมายเลขคอลัมน์ Surgical", precision=0)
417
- ped_num = gr.Number(label="หมายเลขคอลัมน์ Pediatric", precision=0)
418
- comm_num = gr.Number(label="หมายเลขคอลัมน์ Community", precision=0)
419
  with gr.Row():
420
- psy_num = gr.Number(label="หมายเลขคอลัมน์ Psychiatric", precision=0)
421
- obs_num = gr.Number(label="หมายเลขคอลัมน์ Obstetrics", precision=0)
422
 
423
  auto_btn.click(fn=on_upload, inputs=[file, selected_wards],
424
  outputs=[status, available, name_num, id_num,
425
  med_num, med1_num, med2_num, surg_num, ped_num, comm_num, psy_num, obs_num])
426
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
427
  with gr.Row():
428
  clean_btn = gr.Button("Clean data (ดูพรีวิว)", variant="primary")
429
 
430
- preview = gr.Dataframe(label="พรีวิวข้อมูลที่ผ่านการ clean (หัว 30 แถว)", visible=True)
431
- cleaned_file = gr.File(label="ดาวน์โหลดไฟล์ cleaned.csv")
432
 
433
  clean_btn.click(
434
  fn=on_clean,
@@ -438,16 +586,17 @@ with gr.Blocks(title=APP_TITLE) as demo:
438
  )
439
 
440
  assign_btn = gr.Button("Assign (สุ่มตามลำดับอันดับ)")
441
- assigned_preview = gr.Dataframe(label="ตัวอย่างผลการจัดสรร (หัว 30 แถว)")
442
- assigned_file = gr.File(label="ดาวน์โหลดไฟล์ assigned.csv")
443
- not_assigned_file = gr.File(label="ดาวน์โหลดไฟล์ not_assigned.csv")
444
- leftover_text = gr.Textbox(label="สรุปความจุคงเหลือ", interactive=False)
 
445
 
446
  assign_btn.click(
447
  fn=on_assign,
448
  inputs=[file, selected_wards, capacity_df, name_num, id_num,
449
  med_num, med1_num, med2_num, surg_num, ped_num, comm_num, psy_num, obs_num],
450
- outputs=[status, assigned_preview, assigned_file, not_assigned_file, leftover_text]
451
  )
452
 
453
  if __name__ == "__main__":
 
5
  import re
6
  import os
7
  import uuid
 
8
  from typing import List, Dict, Tuple, Optional
9
  try:
10
  from rapidfuzz import process as rf_process
 
15
  APP_TITLE = "Ward Ranking Random Assigner"
16
  DESCRIPTION = """
17
  **Flow**
18
+ 1) Upload .csv/.xlsx
19
+ 2) Choose wards + set capacity
20
+ 3) Check Available columns
21
+ 4) Map by Auto-detect (Thai/English + fuzzy) or by numbers (1-based)
22
+ 5) Clean keep NAME/ID + selected wards; convert ranks to integers
23
+ 6) Assign round-by-rank with random tie-breaking; respect capacity
24
+ - Check: #students <= total capacity (shortage allowed, not exceed)
 
 
25
  """
26
 
27
  WARD_CHOICES = [
 
35
  ("Obstetrics", "สูติศาสตร์"),
36
  ]
37
 
38
+ # ===== Display labels (English-first with Thai in parentheses) =====
39
+ WARD_LABELS = {
40
+ "Medical": ("Internal Medicine", "อายุรศาสตร์"),
41
+ "Medical_1": ("Internal Medicine 1", "อายุรศาสตร์_1"),
42
+ "Medical_2": ("Internal Medicine 2", "อายุรศาสตร์_2"),
43
+ "Surgical": ("Surgery", "ศัลยศาสตร์"),
44
+ "Pediatric": ("Pediatrics", "เด็ก"),
45
+ "Community": ("Community Health", "ชุมชน"),
46
+ "Psychiatric": ("Psychiatry", "จิตเวช"),
47
+ "Obstetrics": ("Obstetrics", "สูติศาสตร์"),
48
+ }
49
+
50
+ def ward_display(ward_key: str) -> str:
51
+ en, th = WARD_LABELS.get(ward_key, (ward_key, ward_key))
52
+ return f"{en} ({th})"
53
+
54
  # Keyword dictionary for auto mapping
55
  AUTO_MAP = {
56
  "NAME": ["ชื่อ-สกุล", "ชื่อ - สกุล", "fullname", "full name", "name", "student name"],
57
  "ID": ["รหัสนักศึกษา", "รหัส", "student id", "id", "studentid"],
58
  "Medical": ["อายุรศาสตร์", "medical"],
59
+ "Medical_1": ["อายุรศาสตร์_1", "medical_1", "med_1", "med1"],
60
  "Medical_2": ["อายุรศาสตร์_2", "medical_2", "med_2", "med2"],
61
+ "Surgical": ["ศัลยศาสตร์", "surgical", "surgery", "surg"],
62
  "Pediatric": ["เด็ก", "pediatric", "pediatrics"],
63
  "Community": ["ชุมชน", "community"],
64
  "Psychiatric": ["จิตเวช", "psychiatric"],
 
67
 
68
  def read_table(file) -> Tuple[Optional[pd.DataFrame], str]:
69
  if file is None:
70
+ return None, "Please upload a file (.csv or .xlsx)"
71
  name = file.name.lower() if hasattr(file, "name") else ""
72
  try:
73
  if name.endswith(".csv"):
 
75
  elif name.endswith(".xlsx"):
76
  df = pd.read_excel(file.name if hasattr(file, "name") else file)
77
  else:
 
78
  try:
79
  df = pd.read_csv(file)
80
  except Exception:
81
+ return None, "Only .csv or .xlsx are supported"
82
  except Exception as e:
83
+ return None, f"Failed to read file: {e}"
84
  df.columns = [str(c).strip() for c in df.columns]
85
  return df, ""
86
 
 
103
  return None
104
 
105
  def auto_map_columns(df: pd.DataFrame, selected_wards: List[str]) -> Dict[str, int]:
 
 
106
  cols = list(df.columns)
107
  col_lower = [c.lower() for c in cols]
108
  result: Dict[str, int] = {}
 
110
  def find_by_keywords(keywords: List[str]) -> Optional[int]:
111
  for kw in keywords:
112
  kw_low = kw.lower()
 
113
  for idx, c_low in enumerate(col_lower):
114
  if kw_low in c_low:
115
+ return idx + 1
 
116
  if HAS_FUZZ:
117
  best_idx = None
118
  best_score = -1
 
128
  return best_idx
129
  return None
130
 
 
131
  n_idx = find_by_keywords(AUTO_MAP["NAME"])
132
  if n_idx: result["NAME"] = n_idx
133
  i_idx = find_by_keywords(AUTO_MAP["ID"])
134
  if i_idx: result["ID"] = i_idx
 
 
135
  for w in selected_wards:
136
  kws = AUTO_MAP.get(w, [w])
137
  w_idx = find_by_keywords(kws)
138
  if w_idx:
139
  result[w] = w_idx
 
140
  return result
141
 
142
  def build_cleaned_from_indices(df: pd.DataFrame,
143
  mapping_indices: Dict[str, int]) -> pd.DataFrame:
 
 
 
 
 
144
  def idx_to_name(k: str) -> str:
145
  idx = mapping_indices.get(k, None)
146
  if idx is None: return ""
 
153
  missing = []
154
  if not name_col: missing.append("NAME")
155
  if not id_col: missing.append("ID")
156
+ raise ValueError(f"Missing required columns: {', '.join(missing)}")
157
 
158
+ ward_cols_src, ward_cols_dst = [], []
 
 
159
  for w, _th in WARD_CHOICES:
160
  if w in mapping_indices:
161
  c = idx_to_name(w)
 
169
  rename_map.update({src: dst for src, dst in zip(ward_cols_src, ward_cols_dst)})
170
  cleaned = cleaned.rename(columns=rename_map)
171
 
 
172
  for c in cleaned.columns:
173
  if c not in ("NAME", "ID"):
174
  cleaned[c] = cleaned[c].apply(parse_rank).astype("Int64")
 
175
  ordered = ["NAME", "ID"] + [c for c in cleaned.columns if c not in ("NAME", "ID")]
176
  cleaned = cleaned[ordered]
177
  return cleaned
178
 
179
  def random_assign(cleaned: pd.DataFrame,
180
  capacities: Dict[str, int]) -> Tuple[pd.DataFrame, pd.DataFrame, Dict[str, int]]:
 
181
  wards = [w for w in cleaned.columns if w not in ("NAME", "ID")]
182
  cap = {w: int(capacities.get(w, 0)) for w in wards}
183
 
184
  assigned = pd.Series(index=cleaned.index, data=pd.NA, dtype="object")
185
  choice_no = pd.Series(index=cleaned.index, data=pd.NA, dtype="Int64")
186
 
187
+ mr = 0
188
  for w in wards:
189
  m = cleaned[w].max(skipna=True)
190
  if pd.notna(m):
191
+ mr = max(mr, int(m))
192
 
193
+ for r in range(1, mr + 1):
194
  if all(c <= 0 for c in cap.values()):
195
  break
196
  for w in wards:
 
215
  not_assigned = result[result["AssignedWard"].isna()].copy()
216
  return result.fillna(""), not_assigned.fillna(""), cap
217
 
218
+ # ===== Reporting helpers =====
219
+ def ward_display(ward_key: str) -> str:
220
+ en, th = WARD_LABELS.get(ward_key, (ward_key, ward_key))
221
+ return f"{en} ({th})"
222
+
223
+ def max_rank_in(cleaned: pd.DataFrame) -> int:
224
+ wards = [w for w in cleaned.columns if w not in ("NAME", "ID")]
225
+ mr = 0
226
+ for w in wards:
227
+ m = cleaned[w].max(skipna=True)
228
+ if pd.notna(m):
229
+ mr = max(mr, int(m))
230
+ return int(mr)
231
+
232
+ def make_rank1_report(cleaned: pd.DataFrame, capacities: Dict[str, int]) -> str:
233
+ wards = [w for w in cleaned.columns if w not in ("NAME", "ID")]
234
+ total_students = len(cleaned)
235
+ total_capacity = sum(int(capacities.get(w, 0)) for w in wards)
236
+ lines = []
237
+ lines.append("## Rank 1 Results (การแสดงผลอันดับที่ 1)")
238
+ lines.append("")
239
+ lines.append(f"- **Total Students (จำนวนนักศึกษาทั้งหมด):** {total_students} students (คน)")
240
+ lines.append(f"- **Total Capacity (ความจุรวม):** {total_capacity} people (คน)")
241
+ lines.append("")
242
+ header = "| Ward (วอร์ด) | Capacity (ความจุ) | Rank 1 Count (จำนวนเลือกอันดับ 1) | Students (รายชื่อนักศึกษา) |"
243
+ sep = "|---|---:|---:|---|"
244
+ lines += [header, sep]
245
+ over = []
246
+ under = []
247
+ for w in wards:
248
+ cap = int(capacities.get(w, 0))
249
+ rank1_students = cleaned.loc[cleaned[w] == 1, "NAME"].astype(str).tolist()
250
+ r1_count = len(rank1_students)
251
+ display_students = ", ".join(rank1_students[:3]) + ("..." if r1_count > 3 else "")
252
+ lines.append(f"| {ward_display(w)} | {cap} | {r1_count} | {display_students} |")
253
+ if r1_count > cap:
254
+ over.append((w, r1_count, cap))
255
+ elif r1_count < cap:
256
+ under.append((w, r1_count, cap))
257
+ lines.append("")
258
+ lines.append("### Additional Statistics (สถิติเพิ่มเติม)")
259
+ lines.append("")
260
+ if over:
261
+ lines.append("**Wards where Rank 1 count exceeds capacity (วอร์ดที่มีคนเลือกอันดับ 1 เกินความจุ):**")
262
+ for w, c, cap in over:
263
+ lines.append(f"- {ward_display(w)}: {c} selected (capacity {cap})")
264
+ else:
265
+ lines.append("- No wards exceed capacity at Rank 1. (ไม่มีวอร์ดใดเกินความจุในอันดับ 1)")
266
+ if under:
267
+ lines.append("")
268
+ lines.append("**Wards where Rank 1 count below capacity (วอร์ดที่มีคนเลือกอันดับ 1 น้อยกว่าความจุ):**")
269
+ for w, c, cap in under:
270
+ lines.append(f"- {ward_display(w)}: {c} selected (capacity {cap})")
271
+ return "\n".join(lines)
272
+
273
+ def make_rank_report(cleaned: pd.DataFrame, capacities: Dict[str, int], rank: int) -> str:
274
+ wards = [w for w in cleaned.columns if w not in ("NAME", "ID")]
275
+ lines = []
276
+ lines.append(f"## Rank {rank} Results (การแสดงผลอันดับที่ {rank})")
277
+ total_students = len(cleaned)
278
+ total_capacity = sum(int(capacities.get(w, 0)) for w in wards)
279
+ lines.append(f"- **Total Students (จำนวนนักศึกษาทั้งหมด):** {total_students} students (คน)")
280
+ lines.append(f"- **Total Capacity (ความจุรวม):** {total_capacity} people (คน)")
281
+ lines.append("")
282
+ header = "| Ward (วอร์ด) | Capacity (ความจุ) | Rank {rank} Count (จำนวนเลือกอันดับ {rank}) | Students (รายชื่อนักศึกษา) |".format(rank=rank)
283
+ sep = "|---|---:|---:|---|"
284
+ lines += [header, sep]
285
+ over, under = [], []
286
+ for w in wards:
287
+ cap = int(capacities.get(w, 0))
288
+ names = cleaned.loc[cleaned[w] == rank, "NAME"].astype(str).tolist()
289
+ cnt = len(names)
290
+ sample = ", ".join(names[:3]) + ("..." if cnt > 3 else "")
291
+ lines.append(f"| {ward_display(w)} | {cap} | {cnt} | {sample} |")
292
+ if cnt > cap:
293
+ over.append((w, cnt, cap))
294
+ elif cnt < cap:
295
+ under.append((w, cnt, cap))
296
+ lines.append("")
297
+ lines.append("**Additional Statistics (สถิติเพิ่มเติม):**")
298
+ if over:
299
+ lines.append("- Wards where count exceeds capacity (เกินความจุ):")
300
+ for w, c, cap in over:
301
+ lines.append(f" - {ward_display(w)}: {c} selected (capacity {cap})")
302
+ else:
303
+ lines.append("- No wards exceed capacity at this rank. (ไม่มีวอร์ดเกินความจุในอันดับนี้)")
304
+ if under:
305
+ lines.append("- Wards where count below capacity (ต่ำกว่าความจุ):")
306
+ for w, c, cap in under:
307
+ lines.append(f" - {ward_display(w)}: {c} selected (capacity {cap})")
308
+ return "\n".join(lines)
309
+
310
+ def make_all_ranks_report(cleaned: pd.DataFrame, capacities: Dict[str, int]) -> str:
311
+ mr = max_rank_in(cleaned)
312
+ if mr == 0:
313
+ return "No ranking numbers found. (ไม่พบข้อมูลอันดับเป็นตัวเลข)"
314
+ parts = []
315
+ for r in range(1, mr + 1):
316
+ parts.append(make_rank_report(cleaned, capacities, r))
317
+ parts.append("\n---\n")
318
+ return "\n".join(parts)
319
+
320
  # ===== Helpers for temp file paths =====
321
  def _tmp(name: str) -> str:
322
  os.makedirs("/tmp", exist_ok=True)
323
  return f"/tmp/{uuid.uuid4().hex}-{name}"
324
 
325
  # ===== Gradio callbacks =====
 
326
  def update_capacity_table(selected_wards: List[str]) -> pd.DataFrame:
327
  rows = []
328
  for w, th in WARD_CHOICES:
 
333
  def on_upload(file, selected_wards):
334
  df, msg = read_table(file)
335
  if df is None:
 
336
  return gr.update(value=msg, visible=True), "", None, None, None, None, None, None, None, None, None, None
 
337
  avail = available_columns_text(df)
 
338
  auto_idx = auto_map_columns(df, selected_wards or [])
 
339
  def idx_or_none(key):
340
  return int(auto_idx[key]) if key in auto_idx else None
341
  name_num = idx_or_none("NAME")
 
348
  comm_num = idx_or_none("Community")
349
  psy_num = idx_or_none("Psychiatric")
350
  obs_num = idx_or_none("Obstetrics")
351
+ return (gr.update(value="✓ File loaded", visible=True), avail, name_num, id_num,
352
  med_num, med1_num, med2_num, surg_num, ped_num, comm_num, psy_num, obs_num)
353
 
354
  def collect_mapping_numbers(name_num, id_num, ward_nums, selected_wards, n_cols):
 
355
  errors = []
356
  mapping = {}
357
  def valid(num, label):
358
  if num is None:
359
+ errors.append(f"- Please enter column number for {label}")
360
  return None
361
  try:
362
  num = int(num)
363
  except Exception:
364
+ errors.append(f"- {label} must be a number")
365
  return None
366
  if not (1 <= num <= n_cols):
367
+ errors.append(f"- {label} must be within 1–{n_cols}")
368
  return None
369
  return num
370
 
 
383
  def on_clean(file, selected_wards, capacity_df, name_num, id_num,
384
  med_num, med1_num, med2_num, surg_num, ped_num, comm_num, psy_num, obs_num):
385
  if not selected_wards:
386
+ return gr.update(value="Please select at least one ward.", visible=True), None, None, None
387
 
388
  df, msg = read_table(file)
389
  if df is None:
 
397
  }
398
  errors, mapping_idx = collect_mapping_numbers(name_num, id_num, ward_nums, selected_wards, n_cols)
399
  if errors:
400
+ return gr.update(value="❌ Mapping invalid:\n" + "\n".join(errors), visible=True), None, None, None
401
 
402
  try:
403
  cleaned = build_cleaned_from_indices(df, mapping_idx)
404
  except Exception as e:
405
+ return gr.update(value=f"❌ Error: {e}", visible=True), None, None, None
406
 
 
407
  cleaned_path = _tmp("cleaned.csv")
408
  cleaned.to_csv(cleaned_path, index=False, encoding="utf-8-sig")
409
 
410
+ info = "✓ Cleaning completed"
411
  return gr.update(value=info, visible=True), cleaned.head(30), cleaned_path, len(cleaned)
412
 
413
+ def _capacities_from_df(cleaned: pd.DataFrame, capacity_df: Optional[pd.DataFrame]) -> Dict[str, int]:
414
+ if capacity_df is None or capacity_df.empty:
415
+ return {w: 0 for w in cleaned.columns if w not in ("NAME", "ID")}
416
+ cap_df = capacity_df.copy()
417
+ cap_df.columns = ["Ward", "Thai Name", "Capacity"]
418
+ cap_df = cap_df[cap_df["Ward"].isin([c for c in cleaned.columns if c not in ("NAME", "ID")])]
419
+ capacities = {}
420
+ for _, row in cap_df.iterrows():
421
+ try:
422
+ capacities[str(row["Ward"])] = int(row["Capacity"])
423
+ except Exception:
424
+ capacities[str(row["Ward"])] = 0
425
+ return capacities
426
+
427
+ def on_rank1_report(file, selected_wards, capacity_df, name_num, id_num,
428
+ med_num, med1_num, med2_num, surg_num, ped_num, comm_num, psy_num, obs_num):
429
+ df, msg = read_table(file)
430
+ if df is None:
431
+ return "Please upload a valid file."
432
+ n_cols = len(df.columns)
433
+ ward_nums = {
434
+ "Medical": med_num, "Medical_1": med1_num, "Medical_2": med2_num,
435
+ "Surgical": surg_num, "Pediatric": ped_num, "Community": comm_num,
436
+ "Psychiatric": psy_num, "Obstetrics": obs_num
437
+ }
438
+ errors, mapping_idx = collect_mapping_numbers(name_num, id_num, ward_nums, selected_wards, n_cols)
439
+ if errors:
440
+ return "❌ Mapping invalid:\n" + "\n".join(errors)
441
+ try:
442
+ cleaned = build_cleaned_from_indices(df, mapping_idx)
443
+ except Exception as e:
444
+ return f"❌ Error building cleaned data: {e}"
445
+ capacities = _capacities_from_df(cleaned, capacity_df)
446
+ return make_rank1_report(cleaned, capacities)
447
+
448
+ def on_all_ranks_report(file, selected_wards, capacity_df, name_num, id_num,
449
+ med_num, med1_num, med2_num, surg_num, ped_num, comm_num, psy_num, obs_num):
450
+ df, msg = read_table(file)
451
+ if df is None:
452
+ return "Please upload a valid file."
453
+ n_cols = len(df.columns)
454
+ ward_nums = {
455
+ "Medical": med_num, "Medical_1": med1_num, "Medical_2": med2_num,
456
+ "Surgical": surg_num, "Pediatric": ped_num, "Community": comm_num,
457
+ "Psychiatric": psy_num, "Obstetrics": obs_num
458
+ }
459
+ errors, mapping_idx = collect_mapping_numbers(name_num, id_num, ward_nums, selected_wards, n_cols)
460
+ if errors:
461
+ return "❌ Mapping invalid:\n" + "\n".join(errors)
462
+ try:
463
+ cleaned = build_cleaned_from_indices(df, mapping_idx)
464
+ except Exception as e:
465
+ return f"❌ Error building cleaned data: {e}"
466
+ capacities = _capacities_from_df(cleaned, capacity_df)
467
+ return make_all_ranks_report(cleaned, capacities)
468
+
469
  def on_assign(file, selected_wards, capacity_df, name_num, id_num,
470
  med_num, med1_num, med2_num, surg_num, ped_num, comm_num, psy_num, obs_num):
 
471
  status, cleaned_preview, cleaned_file, n_students = on_clean(file, selected_wards, capacity_df, name_num, id_num,
472
  med_num, med1_num, med2_num, surg_num, ped_num, comm_num, psy_num, obs_num)
473
  if cleaned_preview is None:
474
+ return status, None, None, None, None, None
475
 
 
476
  df, _ = read_table(file)
477
  n_cols = len(df.columns)
478
  ward_nums = {
 
483
  _errors, mapping_idx = collect_mapping_numbers(name_num, id_num, ward_nums, selected_wards, n_cols)
484
  cleaned = build_cleaned_from_indices(df, mapping_idx)
485
 
486
+ capacities = _capacities_from_df(cleaned, capacity_df)
 
 
 
 
 
 
 
 
 
 
 
487
 
488
+ total_capacity = sum(capacities.values())
 
489
  if n_students is None:
490
  n_students = len(cleaned)
491
  if n_students > total_capacity:
492
+ msg = f"❌ Students {n_students} > total capacity {total_capacity} (shortage allowed, not exceed)"
493
+ return gr.update(value=msg, visible=True), None, None, None, None, None
494
 
495
+ assigned, not_assigned, leftover = random_assign(cleaned, capacities)
 
496
 
 
497
  assigned_path = _tmp("assigned.csv")
498
  not_assigned_path = _tmp("not_assigned.csv")
499
  assigned.to_csv(assigned_path, index=False, encoding="utf-8-sig")
500
  not_assigned.to_csv(not_assigned_path, index=False, encoding="utf-8-sig")
501
 
502
+ leftover_text = "Remaining capacity (จำนวนรับที่เหลือ):\n" + "\n".join([f"- {ward_display(k)}: {v}" for k, v in leftover.items()])
503
+ allocation = make_all_ranks_report(cleaned, capacities) + "\n\n---\n\n" + "## Allocation Summary (สรุปการจัดสรร)\n" # Keep the all-ranks context too
504
 
505
+ return status, assigned.head(30), assigned_path, not_assigned_path, leftover_text, allocation
506
 
507
  with gr.Blocks(title=APP_TITLE) as demo:
508
  gr.Markdown(f"# {APP_TITLE}")
509
  gr.Markdown(DESCRIPTION)
510
 
511
  with gr.Row():
512
+ file = gr.File(file_count="single", file_types=[".csv", ".xlsx"], label="Upload data (.csv/.xlsx)")
513
 
514
+ with gr.Accordion("1) Select wards (เลือกวอร์ด)", open=True):
515
  selected_wards = gr.CheckboxGroup(
516
  choices=[w for w, _ in WARD_CHOICES],
517
+ label="Select wards (เลือกได้หลายข้อ)",
518
  value=["Medical", "Surgical", "Pediatric", "Community", "Psychiatric", "Obstetrics"]
519
  )
520
+ gr.Markdown("Legend: " + ", ".join([f"**{w}** = {ward_display(w)}" for w, _ in WARD_CHOICES]))
521
 
522
+ with gr.Accordion("2) Set capacity per ward (กำหนดความจุต่อวอร์ด)", open=True):
523
  capacity_df = gr.Dataframe(
524
  headers=["Ward", "Thai Name", "Capacity"],
525
  value=[],
 
527
  col_count=3,
528
  interactive=True,
529
  wrap=True,
530
+ label="Fill only selected wards"
531
  )
532
  selected_wards.change(fn=update_capacity_table, inputs=selected_wards, outputs=capacity_df)
533
 
534
+ with gr.Accordion("3) Column headers & mapping (หัวคอลัมน์และการจับคู่)", open=True):
535
  status = gr.Markdown(visible=False)
536
+ available = gr.Code(label="Available columns (index starts at 1)", language="markdown", interactive=False)
537
+ auto_btn = gr.Button("Read & Auto-detect mapping (อ่านไฟล์ & เดา mapping)")
538
+ name_num = gr.Number(label="Column number for NAME", precision=0)
539
+ id_num = gr.Number(label="Column number for ID", precision=0)
 
540
  with gr.Row():
541
+ med_num = gr.Number(label="Column number Medical", precision=0)
542
+ med1_num = gr.Number(label="Column number Medical_1", precision=0)
543
+ med2_num = gr.Number(label="Column number Medical_2", precision=0)
544
  with gr.Row():
545
+ surg_num = gr.Number(label="Column number Surgical", precision=0)
546
+ ped_num = gr.Number(label="Column number Pediatric", precision=0)
547
+ comm_num = gr.Number(label="Column number Community", precision=0)
548
  with gr.Row():
549
+ psy_num = gr.Number(label="Column number Psychiatric", precision=0)
550
+ obs_num = gr.Number(label="Column number Obstetrics", precision=0)
551
 
552
  auto_btn.click(fn=on_upload, inputs=[file, selected_wards],
553
  outputs=[status, available, name_num, id_num,
554
  med_num, med1_num, med2_num, surg_num, ped_num, comm_num, psy_num, obs_num])
555
 
556
+ # Reports (pre-assignment)
557
+ rank1_btn = gr.Button("Show Rank 1 Report (ดูสรุปอันดับ 1)")
558
+ rank1_report = gr.Markdown(label="Rank 1 Results (การแสดงผลอันดับที่ 1)")
559
+ rank1_btn.click(
560
+ fn=on_rank1_report,
561
+ inputs=[file, selected_wards, capacity_df, name_num, id_num,
562
+ med_num, med1_num, med2_num, surg_num, ped_num, comm_num, psy_num, obs_num],
563
+ outputs=rank1_report
564
+ )
565
+
566
+ all_ranks_btn = gr.Button("Show All Ranks Report (ดูสรุปทุกอันดับ)")
567
+ all_ranks_report = gr.Markdown(label="All Ranks Report (การแสดงผลทุกอันดับ)")
568
+ all_ranks_btn.click(
569
+ fn=on_all_ranks_report,
570
+ inputs=[file, selected_wards, capacity_df, name_num, id_num,
571
+ med_num, med1_num, med2_num, surg_num, ped_num, comm_num, psy_num, obs_num],
572
+ outputs=all_ranks_report
573
+ )
574
+
575
  with gr.Row():
576
  clean_btn = gr.Button("Clean data (ดูพรีวิว)", variant="primary")
577
 
578
+ preview = gr.Dataframe(label="Cleaned preview (first 30 rows)", visible=True)
579
+ cleaned_file = gr.File(label="Download cleaned.csv")
580
 
581
  clean_btn.click(
582
  fn=on_clean,
 
586
  )
587
 
588
  assign_btn = gr.Button("Assign (สุ่มตามลำดับอันดับ)")
589
+ assigned_preview = gr.Dataframe(label="Assigned preview (first 30 rows)")
590
+ assigned_file = gr.File(label="Download assigned.csv")
591
+ not_assigned_file = gr.File(label="Download not_assigned.csv")
592
+ leftover_text = gr.Textbox(label="Remaining capacity summary", interactive=False)
593
+ allocation_report = gr.Markdown(label="All Ranks & Allocation Report")
594
 
595
  assign_btn.click(
596
  fn=on_assign,
597
  inputs=[file, selected_wards, capacity_df, name_num, id_num,
598
  med_num, med1_num, med2_num, surg_num, ped_num, comm_num, psy_num, obs_num],
599
+ outputs=[status, assigned_preview, assigned_file, not_assigned_file, leftover_text, allocation_report]
600
  )
601
 
602
  if __name__ == "__main__":