Spaces:

ShinyaJ
/

Ward_Assignment_System_Nursing_CMU

Sleeping

App Files Files Community

ShinyaJ commited on Sep 27, 2025

Commit

1b707be

verified ·

1 Parent(s): 17c0dad

Upload 3 files

Browse files

Files changed (3) hide show

README.md +7 -7
app.py +263 -202
requirements.txt +2 -1

README.md CHANGED Viewed

@@ -4,16 +4,16 @@ emoji: 🎲
 colorFrom: pink
 colorTo: blue
 sdk: gradio
-sdk_version: "4.44.0"
 app_file: app.py
 pinned: false
 ---
 # Ward Ranking Cleaner & Random Assigner (Gradio)
-## วิธีใช้งาน
-1. อัปโหลดไฟล์ .csv หรือ .xlsx ที่มีข้อมูลนักศึกษา/ผู้เรียน
-2. เลือกวอร์ดที่ต้องใช้ แล้วกรอก capacity
-3. ใส่ชื่อคอลัมน์จริงของ NAME, ID และคอลัมน์วอร์ดที่เลือก
-4. กด **Clean data** → ดูพรีวิว → ดาวน์โหลด cleaned.csv
-5. กด **Assign** → สุ่มจัดสรรตามอันดับ → ดาวน์โหลด assigned.csv / not_assigned.csv

 colorFrom: pink
 colorTo: blue
 sdk: gradio
+sdk_version: "4.44.1"
 app_file: app.py
 pinned: false
 ---
 # Ward Ranking Cleaner & Random Assigner (Gradio)
+- Auto-detect column mapping (Thai/English keywords + fuzzy)
+- Or map by **column numbers** based on the "Available columns" list
+- Clean to keep only `NAME`, `ID`, and selected ward ranking columns (parse ranks → ints)
+- Assign students by rank round (1→2→3…) with random tie-breaking, respecting **capacity**
+- Pre-check: `#students <= total capacity` (shortage allowed, **not exceed**)

app.py CHANGED Viewed

@@ -5,17 +5,24 @@ import numpy as np
 import re
 from io import BytesIO
 from typing import List, Dict, Tuple, Optional
-APP_TITLE = "Ward Ranking Cleaner & Random Assigner (Flexible Columns)"
 DESCRIPTION = """
-1) เลือก **วอร์ด** ที่จะใช้ (จากรายการ 8 วอร์ดด้านล่าง) และใส่ **capacity** แต่ละวอร์ด
-2) ระบุ **หัวคอลัมน์ในไฟล์** ของคุณสำหรับ: NAME, ID และคอลัมน์คะแนน/อันดับของแต่ละวอร์ด (ชื่อคอลัมน์จริงในไฟล์)
-3) อัปโหลดไฟล์ .csv หรือ .xlsx แล้วกด **Clean data** เพื่อดูตารางที่เหลือเฉพาะ NAME, ID และคอลัมน์วอร์ดที่เลือก (คอลัมน์อื่นจะถูก drop)
-4) กด **Assign (สุ่มตามลำดับอันดับ)** เพื่อสุ่มจัดสรรทีละอันดับ 1 → 2 → 3 ... ตาม capacity ของแต่ละวอร์ด
-5) ดาวน์โหลด CSV ผลลัพธ์ได้
-- การอ่าน "อันดับ" จะดึง **ตัวเลข** จากสตริง (เช่น `1st`, `อันดับ 3`, `4th`) — ถ้าหาเลขไม่เจอจะถือว่าเป็นค่าว่าง
-- ถ้าคุณมีคอลัมน์ชื่อไม่แน่นอน สามารถใส่ชื่อที่แน่ใจลงไป หรือใช้ชื่อบางส่วนแล้วยกให้ **โหมดจับคู่ยืดหยุ่น** (regex) ช่วยค้นหา
 """
 WARD_CHOICES = [
@@ -29,6 +36,20 @@ WARD_CHOICES = [
     ("Obstetrics", "สูติศาสตร์"),
 ]
 def read_table(file) -> Tuple[Optional[pd.DataFrame], str]:
     if file is None:
         return None, "กรุณาอัปโหลดไฟล์ก่อน (.csv หรือ .xlsx)"
@@ -46,40 +67,16 @@ def read_table(file) -> Tuple[Optional[pd.DataFrame], str]:
                 return None, "รองรับเฉพาะ .csv หรือ .xlsx เท่านั้น"
     except Exception as e:
         return None, f"อ่านไฟล์ไม่สำเร็จ: {e}"
-    # ปรับชื่อคอลัมน์ (trim)
     df.columns = [str(c).strip() for c in df.columns]
     return df, ""
-def find_column(df: pd.DataFrame, key: str, flexible: bool) -> Optional[str]:
-    """
-    ค้นหาคอลัมน์ตามชื่อที่ผู้ใช้กรอก:
-    - ถ้า flexible=False → ค้นหาแบบตรงตัว (case-sensitive แบบเดิม แต่เราทำ trim แล้��)
-    - ถ้า flexible=True → จับคู่แบบยืดหยุ่น: ถ้า key มีอักขระพิเศษ ถือเป็น regex; ถ้าไม่ ก็มองเป็นสตริงย่อยที่ต้องพบในชื่อคอลัมน์
-    คืนชื่อคอลัมน์จริงถ้าพบ (ตัวแรกที่พบ), ไม่งั้นคืน None
-    """
-    cols = list(df.columns)
-    if not flexible:
-        return key if key in cols else None
-    # โหมดยืดหยุ่น
-    # ถ้า key เป็นสตริงธรรมดา ให้ค้นหาแบบ "มี key เป็นส่วนหนึ่งของชื่อคอลัมน์" (case-insensitive)
-    try:
-        pattern = re.compile(key, flags=re.IGNORECASE)
-        for c in cols:
-            if re.search(pattern, c):
-                return c
-    except re.error:
-        # ถ้า regex ไม่ valid ให้ fallback เป็น contains (case-insensitive)
-        low = key.lower()
-        for c in cols:
-            if low in c.lower():
-                return c
-    return None
 def parse_rank(value) -> Optional[int]:
-    """
-    รับค่าจากคอลัมน์อันดับ เช่น '1st', 'อันดับ 3', '2', 'third' (จะไม่รองรับคำภาษาอังกฤษเต็ม)
-    คืนเป็น int ถ้าพบเลข, ถ้าไม่พบคืน None
-    """
     if pd.isna(value):
         return None
     s = str(value)
@@ -91,102 +88,127 @@ def parse_rank(value) -> Optional[int]:
             return None
     return None
-def build_cleaned(df: pd.DataFrame,
-                  name_key: str,
-                  id_key: str,
-                  ward_to_key: Dict[str, str],
-                  flexible_match: bool) -> Tuple[pd.DataFrame, List[str]]:
     """
-    สร้างตาราง cleaned: เก็บเฉพาะ NAME, ID, และคอลัมน์วอร์ดที่เลือก
-    แปลงค่าคอลัมน์วอร์ดเป็น int (ตัวเลขอันดับ) ถ้าทำไม่ได้จะเป็น NaN
     """
-    messages = []
-    # หา NAME / ID
-    name_col = find_column(df, name_key.strip(), flexible_match)
-    id_col = find_column(df, id_key.strip(), flexible_match)
-    if name_col is None or id_col is None:
         missing = []
-        if name_col is None: missing.append("NAME")
-        if id_col is None: missing.append("ID")
         raise ValueError(f"หาไม่พบคอลัมน์บังคับ: {', '.join(missing)}")
-    keep_cols = [name_col, id_col]
-    renamed = {name_col: "NAME", id_col: "ID"}
-    # หาและแปลงคอลัมน์วอร์ด
-    for ward, key in ward_to_key.items():
-        key = key.strip()
-        if not key:
-            continue
-        col = find_column(df, key, flexible_match)
-        if col is None:
-            messages.append(f"⚠️ ไม่พบคอลัมน์ของวอร์ด '{ward}' จากคีย์ '{key}' (ข้ามวอร์ดนี้)")
-            continue
-        keep_cols.append(col)
-        renamed[col] = ward  # เปลี่ยนชื่อคอลัมน์เป็นชื่อวอร์ดมาตรฐาน
-    # unique และรักษาลำดับ
-    seen = set()
-    keep_unique = []
-    for c in keep_cols:
-        if c not in seen:
-            seen.add(c)
-            keep_unique.append(c)
-    cleaned = df[keep_unique].rename(columns=renamed).copy()
-    # แปลงอันดับเป็น int
-    ward_cols = [c for c in cleaned.columns if c not in ("NAME", "ID")]
-    for c in ward_cols:
-        cleaned[c] = cleaned[c].apply(parse_rank).astype("Int64")
-    # จัดเรียงคอลัมน์
-    cleaned = cleaned[["NAME", "ID"] + ward_cols]
-    return cleaned, messages
 def random_assign(cleaned: pd.DataFrame,
                   capacities: Dict[str, int],
                   seed: Optional[int] = None) -> Tuple[pd.DataFrame, pd.DataFrame, Dict[str, int]]:
-    """
-    สุ่มจัดสรรแบบรอบเลือกอันดับ: เริ่มจากอันดับ 1 → 2 → 3 → ...
-    - ในแต่ละอันดับและแต่ละวอร์ด: ถ้าเกิน capacity ที่เหลือ ให้สุ่มเลือก
-    - คืนผลลัพธ์: assignments, not_assigned, leftover_capacities
-    """
     rng = np.random.default_rng(seed)
     wards = [w for w in cleaned.columns if w not in ("NAME", "ID")]
-    # กำหนด capacity ที่ใช้จริง เฉพาะวอร์ดที่อยู่ในตาราง
     cap = {w: int(capacities.get(w, 0)) for w in wards}
-    # เตรียมข้อมูลทำงาน
-    assigned = pd.Series(index=cleaned.index, data=pd.NA, dtype="object")   # ชื่อวอร์ดที่ได้
-    choice_no = pd.Series(index=cleaned.index, data=pd.NA, dtype="Int64")   # อันดับที่ได้
-    # หาค่า max rank ที่ปรากฏ (เช่น 1..6)
     max_rank = 0
     for w in wards:
-        max_w = cleaned[w].max(skipna=True)
-        if pd.notna(max_w):
-            max_rank = max(max_rank, int(max_w))
-    # วนทีละอันดับ
     for r in range(1, max_rank + 1):
-        # ข้ามถ้าทุกวอร์ดเต็มแล้ว
         if all(c <= 0 for c in cap.values()):
             break
-        # สำหรับแต่ละวอร์ด
         for w in wards:
             if cap[w] <= 0:
                 continue
-            # ผู้สมัครที่ยังไม่ได้รับการจัดสรร และเลือกวอร์ดนี้ที่อันดับ r
             mask = (assigned.isna()) & (cleaned[w] == r)
             candidates = cleaned.index[mask].tolist()
-            if len(candidates) == 0:
                 continue
             if len(candidates) <= cap[w]:
                 pick = candidates
             else:
                 pick = list(rng.choice(candidates, size=cap[w], replace=False))
-            # ทำการจ��ดสรร
             assigned.loc[pick] = w
             choice_no.loc[pick] = r
             cap[w] -= len(pick)
@@ -196,90 +218,116 @@ def random_assign(cleaned: pd.DataFrame,
     result["ChoiceNumber"] = choice_no
     not_assigned = result[result["AssignedWard"].isna()].copy()
-    # แปลง NA ให้ดูง่ายขึ้นใน preview
-    result_preview = result.copy()
-    result_preview = result_preview.fillna("")
-    return result_preview, not_assigned.fillna(""), cap
 def update_capacity_table(selected_wards: List[str]) -> pd.DataFrame:
     rows = []
     for w, th in WARD_CHOICES:
         if selected_wards and w in selected_wards:
             rows.append([w, th, 0])
-    if not rows:
-        return pd.DataFrame(columns=["Ward", "Thai Name", "Capacity"])
     return pd.DataFrame(rows, columns=["Ward", "Thai Name", "Capacity"])
-def update_mapping_table(selected_wards: List[str]) -> pd.DataFrame:
-    rows = [["NAME", ""], ["ID", ""]]
-    for w, th in WARD_CHOICES:
-        if selected_wards and w in selected_wards:
-            rows.append([w, ""])
-    return pd.DataFrame(rows, columns=["Field", "Your Column Header (exact or regex)"])
-def on_clean(file, selected_wards, capacity_df, mapping_df, flexible):
-    if not selected_wards:
-        return gr.update(value="กรุณาเลือกวอร์ดอย่างน้อย 1", visible=True), None, None
-    # อ่านไฟล์
     df, msg = read_table(file)
     if df is None:
-        return gr.update(value=msg, visible=True), None, None
-    # ดึงชื่อคอลัมน์ที่ผู้ใช้ระบุ
-    mapping_df = mapping_df.copy()
-    mapping_df.columns = ["Field", "Key"]
-    mapping = {row["Field"]: str(row["Key"]).strip() for _, row in mapping_df.iterrows() if str(row["Field"]).strip()}
-    name_key = mapping.get("NAME", "")
-    id_key = mapping.get("ID", "")
-    if not name_key or not id_key:
-        return gr.update(value="กรุณาใส่หัวคอลัมน์ของ NAME และ ID", visible=True), None, None
-    ward_to_key = {}
-    for w in selected_wards:
-        ward_to_key[w] = mapping.get(w, "")
     try:
-        cleaned, messages = build_cleaned(df, name_key, id_key, ward_to_key, bool(flexible))
     except Exception as e:
-        return gr.update(value=f"❌ เกิดข้อผิดพลาด: {e}", visible=True), None, None
-    info = "✓ Cleaning สำเร็จ"
-    if messages:
-        info += "\n" + "\n".join(messages)
-    # เตรียมไฟล์ดาวน์โหลด
     buf = BytesIO()
     cleaned.to_csv(buf, index=False, encoding="utf-8-sig")
     buf.seek(0)
-    return gr.update(value=info, visible=True), cleaned.head(30), ("cleaned.csv", buf)
-def on_assign(file, selected_wards, capacity_df, mapping_df, flexible, seed):
-    # ต้อง clean ก่อน (เราอ่านไฟล์เดิมแล้ว clean ในฟังก์ชันนี้อีกครั้งเพื่อความแน่นอน)
-    status, cleaned_preview, cleaned_file = on_clean(file, selected_wards, capacity_df, mapping_df, flexible)
     if cleaned_preview is None:
         return status, None, None, None, None
-    # โหลด cleaned จากไฟล์ใน memory อีกครั้งเพื่อความแม่นยำ
-    # แต่เรามีเฉพาะ preview; จึง clean ซ้ำเพื่อได้ dataframe เต็ม
     df, _ = read_table(file)
-    mapping_df = mapping_df.copy()
-    mapping_df.columns = ["Field", "Key"]
-    mapping = {row["Field"]: str(row["Key"]).strip() for _, row in mapping_df.iterrows() if str(row["Field"]).strip()}
-    name_key = mapping.get("NAME", "")
-    id_key = mapping.get("ID", "")
-    ward_to_key = {w: mapping.get(w, "") for w in selected_wards}
-    cleaned, _ = build_cleaned(df, name_key, id_key, ward_to_key, bool(flexible))
-    # capacities
-    if capacity_df is None or len(capacity_df) == 0:
-        return gr.update(value="กรุณากรอก capacity ก่อน", visible=True), None, None, None, None
-    # ทำให้แน่ใจว่ามีคอลัมน์ตามชื่อที่เราคาด
     cap_df = capacity_df.copy()
     cap_df.columns = ["Ward", "Thai Name", "Capacity"]
     cap_df = cap_df[cap_df["Ward"].isin([c for c in cleaned.columns if c not in ("NAME", "ID")])]
     cap_map = {}
@@ -289,39 +337,38 @@ def on_assign(file, selected_wards, capacity_df, mapping_df, flexible, seed):
         except Exception:
             cap_map[str(row["Ward"])] = 0
-    assigned, not_assigned, leftover = random_assign(cleaned, cap_map, seed=seed if seed not in (None, "") else None)
-    # สร้างไฟล์ดาวน์โหลด
-    out_all = BytesIO()
-    assigned.to_csv(out_all, index=False, encoding="utf-8-sig")
-    out_all.seek(0)
     out_un = BytesIO()
-    not_assigned.to_csv(out_un, index=False, encoding="utf-8-sig")
-    out_un.seek(0)
     leftover_text = "ความจุคงเหลือ:\n" + "\n".join([f"- {k}: {v}" for k, v in leftover.items()])
     return status, assigned.head(30), ("assigned.csv", out_all), ("not_assigned.csv", out_un), leftover_text
 with gr.Blocks(title=APP_TITLE) as demo:
     gr.Markdown(f"# {APP_TITLE}")
     gr.Markdown(DESCRIPTION)
     with gr.Row():
-        file = gr.File(file_count="single", file_types=[".csv", ".xlsx"], label="อัปโหลดข้อมูลนักศึกษา/ผู้เรียน (.csv / .xlsx)")
     with gr.Accordion("1) เลือกวอร์ดที่ต้องใช้", open=True):
         selected_wards = gr.CheckboxGroup(
             choices=[w for w, _ in WARD_CHOICES],
             label="เลือกวอร์ด (เลือกได้หลายข้อ)",
-            value=["Medical", "Surgical"]  # ค่าเริ่มต้นเล็กน้อย
-        )
-        gr.Markdown(
-            "คำแปล (อ้างอิง): " +
-            ", ".join([f"**{w}** = {th}" for w, th in WARD_CHOICES])
         )
     with gr.Accordion("2) กำหนด Capacity ต่อวอร์ด", open=True):
         capacity_df = gr.Dataframe(
@@ -331,46 +378,60 @@ with gr.Blocks(title=APP_TITLE) as demo:
             col_count=3,
             interactive=True,
             wrap=True,
-            label="กรอกแค่แถวของวอร์ดที่เลือก"
         )
         selected_wards.change(fn=update_capacity_table, inputs=selected_wards, outputs=capacity_df)
-    with gr.Accordion("3) ระบุหัวคอลัมน์จริงในไฟล์ของคุณ", open=True):
-        gr.Markdown("ใส่ชื่อคอลัมน์ **จริง** ที่อยู่ในไฟล์ของคุณ (จะใช้แมตช์ตรงตัว หรือเปิดโหมดยืดหยุ่นก็ได้)")
-        mapping_df = gr.Dataframe(
-            headers=["Field", "Your Column Header (exact or regex)"],
-            value=[["NAME",""],["ID",""]],
-            row_count=(2, "dynamic"),
-            col_count=2,
-            interactive=True,
-            wrap=True
-        )
-        selected_wards.change(fn=update_mapping_table, inputs=selected_wards, outputs=mapping_df)
-        flexible = gr.Checkbox(label="เปิดโหมดจับคู่คอลัมน์แบบยืดหยุ่น (regex / contains)", value=True)
     with gr.Row():
-        clean_btn = gr.Button("Clean data (ดูพรีวิว)")
-        assign_btn = gr.Button("Assign (สุ่มตามลำดับอันดับ)")
-    info = gr.Markdown(visible=False)
-    preview = gr.Dataframe(label="พรีวิวข้อมูลที่ผ่านการ clean (แสดงหัว 30 แถว)", visible=True)
     cleaned_file = gr.File(label="ดาวน์โหลดไฟล์ cleaned.csv")
-    assigned_preview = gr.Dataframe(label="ตัวอย่างผลการจัดสรร (หัว 30 แถว)", visible=True)
-    assigned_file = gr.File(label="ดาวน์โหลดไฟล์ assigned.csv")
-    not_assigned_file = gr.File(label="ดาวน์โหลดไฟล์ not_assigned.csv")
-    leftover_text = gr.Textbox(label="สรุปความจุคงเหลือ", interactive=False)
-    seed = gr.Textbox(label="Random seed (เว้นว่างเพื่อให้สุ่มใหม่ทุกครั้ง)", value="")
     clean_btn.click(
         fn=on_clean,
-        inputs=[file, selected_wards, capacity_df, mapping_df, flexible],
-        outputs=[info, preview, cleaned_file]
     )
     assign_btn.click(
         fn=on_assign,
-        inputs=[file, selected_wards, capacity_df, mapping_df, flexible, seed],
-        outputs=[info, assigned_preview, assigned_file, not_assigned_file, leftover_text]
     )
 if __name__ == "__main__":

 import re
 from io import BytesIO
 from typing import List, Dict, Tuple, Optional
+try:
+    from rapidfuzz import process as rf_process
+    HAS_FUZZ = True
+except Exception:
+    HAS_FUZZ = False
+APP_TITLE = "Ward Ranking Cleaner & Random Assigner (Auto-map + Number Mapping)"
 DESCRIPTION = """
+**Flow**
+1) อัปโหลดไฟล์ .csv/.xlsx
+2) เลือกวอร์ดที่ใช้ + ใส่ capacity
+3) ตรวจหัวคอลัมน์ที่อ่านได้ (Available columns)
+4) **เลือกวิธี mapping**:
+   - Auto-detect (คำไทย/อังกฤษ + fuzzy) → ระบบเติมให้อัตโนมัติ
+   - หรือกรอก **หมายเลขคอลัมน์** ตามรายการ Available columns (เลขเริ่ม 1)
+5) Clean → เหลือเฉพาะ NAME, ID, และคอลัมน์วอร์ดที่เลือก (ค่าจัดอันดับถูกแปลงเป็นตัวเลข)
+6) Assign → สุ่มตามลำดับอันดับ โดยเคารพ capacity
+   - **จะตรวจว่าจำนวนนักศึกษา <= ผลรวม capacity** (ขาดได้ แต่ห้ามเกิน)
 """
 WARD_CHOICES = [
     ("Obstetrics", "สูติศาสตร์"),
 ]
+# Keyword dictionary for auto mapping
+AUTO_MAP = {
+    "NAME": ["ชื่อ-สกุล", "ชื่อ - สกุล", "fullname", "full name", "name", "student name"],
+    "ID": ["รหัสนักศึกษา", "รหัส", "student id", "id", "studentid"],
+    "Medical": ["อายุรศาสตร์", "medical"],
+    "Medical_1": ["อายุรศาสตร์_1", "medical_1", "med_1"],
+    "Medical_2": ["อายุรศาสตร์_2", "medical_2", "med_2"],
+    "Surgical": ["ศัลยศาสตร์", "surgical", "surgery"],
+    "Pediatric": ["เด็ก", "pediatric", "pediatrics"],
+    "Community": ["ชุมชน", "community"],
+    "Psychiatric": ["จิตเวช", "psychiatric"],
+    "Obstetrics": ["สูติศาสตร์", "obstetrics", "obgyn", "ob/gyn"],
+}
 def read_table(file) -> Tuple[Optional[pd.DataFrame], str]:
     if file is None:
         return None, "กรุณาอัปโหลดไฟล์ก่อน (.csv หรือ .xlsx)"
                 return None, "รองรับเฉพาะ .csv หรือ .xlsx เท่านั้น"
     except Exception as e:
         return None, f"อ่านไฟล์ไม่สำเร็จ: {e}"
     df.columns = [str(c).strip() for c in df.columns]
     return df, ""
+def available_columns_text(df: pd.DataFrame) -> str:
+    lines = ["Available columns:"]
+    for i, c in enumerate(df.columns, start=1):
+        lines.append(f"{i}. {c}")
+    return "\n".join(lines)
 def parse_rank(value) -> Optional[int]:
     if pd.isna(value):
         return None
     s = str(value)
             return None
     return None
+def auto_map_columns(df: pd.DataFrame, selected_wards: List[str]) -> Dict[str, int]:
+    """Return mapping as index (1-based) for NAME, ID, and selected ward columns.
+       Use keyword dictionary and fuzzy fallback (if available)."""
+    cols = list(df.columns)
+    col_lower = [c.lower() for c in cols]
+    result: Dict[str, int] = {}
+    def find_by_keywords(keywords: List[str]) -> Optional[int]:
+        for kw in keywords:
+            kw_low = kw.lower()
+            # contains search
+            for idx, c_low in enumerate(col_lower):
+                if kw_low in c_low:
+                    return idx + 1  # 1-based
+        # fuzzy fallback
+        if HAS_FUZZ:
+            best_idx = None
+            best_score = -1
+            for idx, c in enumerate(cols):
+                for kw in keywords:
+                    match = rf_process.extractOne(kw, [c], score_cutoff=85)
+                    if match:
+                        _, score, _ = match
+                        if score > best_score:
+                            best_score = score
+                            best_idx = idx + 1
+            if best_idx is not None:
+                return best_idx
+        return None
+    # NAME / ID
+    n_idx = find_by_keywords(AUTO_MAP["NAME"])
+    if n_idx: result["NAME"] = n_idx
+    i_idx = find_by_keywords(AUTO_MAP["ID"])
+    if i_idx: result["ID"] = i_idx
+    # wards
+    for w in selected_wards:
+        kws = AUTO_MAP.get(w, [w])
+        w_idx = find_by_keywords(kws)
+        if w_idx:
+            result[w] = w_idx
+    return result
+def build_cleaned_from_indices(df: pd.DataFrame,
+                               mapping_indices: Dict[str, int]) -> pd.DataFrame:
     """
+    mapping_indices: {Field -> 1-based column index in df}
+    Keep only NAME, ID, and ward columns. Convert ward values to Int (ranks).
     """
+    # Resolve names
+    def idx_to_name(k: str) -> str:
+        idx = mapping_indices.get(k, None)
+        if idx is None: return ""
+        if not (1 <= idx <= len(df.columns)): return ""
+        return df.columns[idx - 1]
+    name_col = idx_to_name("NAME")
+    id_col = idx_to_name("ID")
+    if not name_col or not id_col:
         missing = []
+        if not name_col: missing.append("NAME")
+        if not id_col: missing.append("ID")
         raise ValueError(f"หาไม่พบคอลัมน์บังคับ: {', '.join(missing)}")
+    # collect ward columns
+    ward_cols_src = []
+    ward_cols_dst = []
+    for w, _th in WARD_CHOICES:
+        if w in mapping_indices:
+            c = idx_to_name(w)
+            if c:
+                ward_cols_src.append(c)
+                ward_cols_dst.append(w)
+    keep_cols = [name_col, id_col] + ward_cols_src
+    cleaned = df[keep_cols].copy()
+    rename_map = {name_col: "NAME", id_col: "ID"}
+    rename_map.update({src: dst for src, dst in zip(ward_cols_src, ward_cols_dst)})
+    cleaned = cleaned.rename(columns=rename_map)
+    # parse ranks
+    for c in cleaned.columns:
+        if c not in ("NAME", "ID"):
+            cleaned[c] = cleaned[c].apply(parse_rank).astype("Int64")
+    # order
+    ordered = ["NAME", "ID"] + [c for c in cleaned.columns if c not in ("NAME", "ID")]
+    cleaned = cleaned[ordered]
+    return cleaned
 def random_assign(cleaned: pd.DataFrame,
                   capacities: Dict[str, int],
                   seed: Optional[int] = None) -> Tuple[pd.DataFrame, pd.DataFrame, Dict[str, int]]:
     rng = np.random.default_rng(seed)
     wards = [w for w in cleaned.columns if w not in ("NAME", "ID")]
     cap = {w: int(capacities.get(w, 0)) for w in wards}
+    assigned = pd.Series(index=cleaned.index, data=pd.NA, dtype="object")
+    choice_no = pd.Series(index=cleaned.index, data=pd.NA, dtype="Int64")
     max_rank = 0
     for w in wards:
+        m = cleaned[w].max(skipna=True)
+        if pd.notna(m):
+            max_rank = max(max_rank, int(m))
     for r in range(1, max_rank + 1):
         if all(c <= 0 for c in cap.values()):
             break
         for w in wards:
             if cap[w] <= 0:
                 continue
             mask = (assigned.isna()) & (cleaned[w] == r)
             candidates = cleaned.index[mask].tolist()
+            if not candidates:
                 continue
             if len(candidates) <= cap[w]:
                 pick = candidates
             else:
                 pick = list(rng.choice(candidates, size=cap[w], replace=False))
             assigned.loc[pick] = w
             choice_no.loc[pick] = r
             cap[w] -= len(pick)
     result["ChoiceNumber"] = choice_no
     not_assigned = result[result["AssignedWard"].isna()].copy()
+    return result.fillna(""), not_assigned.fillna(""), cap
+# ===== Gradio callbacks =====
 def update_capacity_table(selected_wards: List[str]) -> pd.DataFrame:
     rows = []
     for w, th in WARD_CHOICES:
         if selected_wards and w in selected_wards:
             rows.append([w, th, 0])
     return pd.DataFrame(rows, columns=["Ward", "Thai Name", "Capacity"])
+def on_upload(file, selected_wards):
     df, msg = read_table(file)
     if df is None:
+        return gr.update(value=msg, visible=True), "", None, None, None
+    # Show available columns
+    avail = available_columns_text(df)
+    # Auto-detect mapping (indices)
+    auto_idx = auto_map_columns(df, selected_wards or [])
+    # Prepare number inputs defaults
+    def idx_or_blank(key):
+        return int(auto_idx[key]) if key in auto_idx else None
+    name_num = idx_or_blank("NAME")
+    id_num = idx_or_blank("ID")
+    ward_nums = {w: idx_or_blank(w) for w, _ in WARD_CHOICES}
+    return gr.update(value="✓ อ่านไฟล์สำเร็จ", visible=True), avail, name_num, id_num, ward_nums
+def collect_mapping_numbers(name_num, id_num, ward_nums, selected_wards, n_cols):
+    """Validate numeric mapping and build mapping dict {Field: index}"""
+    errors = []
+    mapping = {}
+    def valid(num, label):
+        if num is None:
+            errors.append(f"- กรุณาใส่หมายเลขของ {label}")
+            return None
+        try:
+            num = int(num)
+        except Exception:
+            errors.append(f"- {label} ต้องเป็นตัวเลข")
+            return None
+        if not (1 <= num <= n_cols):
+            errors.append(f"- {label} ต้องอยู่ระหว่าง 1–{n_cols}")
+            return None
+        return num
+    nn = valid(name_num, "NAME")
+    ii = valid(id_num, "ID")
+    if nn: mapping["NAME"] = nn
+    if ii: mapping["ID"] = ii
+    for w in selected_wards:
+        wn = valid(ward_nums.get(w, None), f"{w}")
+        if wn:
+            mapping[w] = wn
+    return errors, mapping
+def on_clean(file, selected_wards, capacity_df, name_num, id_num,
+             med_num, med1_num, med2_num, surg_num, ped_num, comm_num, psy_num, obs_num):
+    if not selected_wards:
+        return gr.update(value="กรุณาเลือกวอร์ดอย่างน้อย 1", visible=True), None, None, None
+    df, msg = read_table(file)
+    if df is None:
+        return gr.update(value=msg, visible=True), None, None, None
+    n_cols = len(df.columns)
+    ward_nums = {
+        "Medical": med_num, "Medical_1": med1_num, "Medical_2": med2_num,
+        "Surgical": surg_num, "Pediatric": ped_num, "Community": comm_num,
+        "Psychiatric": psy_num, "Obstetrics": obs_num
+    }
+    errors, mapping_idx = collect_mapping_numbers(name_num, id_num, ward_nums, selected_wards, n_cols)
+    if errors:
+        return gr.update(value="❌ Mapping ไม่ครบ/ไม่ถูกต้อง:\n" + "\n".join(errors), visible=True), None, None, None
     try:
+        cleaned = build_cleaned_from_indices(df, mapping_idx)
     except Exception as e:
+        return gr.update(value=f"❌ เกิดข้อผิดพลาด: {e}", visible=True), None, None, None
     buf = BytesIO()
     cleaned.to_csv(buf, index=False, encoding="utf-8-sig")
     buf.seek(0)
+    info = "✓ Cleaning สำเร็จ"
+    return gr.update(value=info, visible=True), cleaned.head(30), ("cleaned.csv", buf), len(cleaned)
+def on_assign(file, selected_wards, capacity_df, name_num, id_num,
+              med_num, med1_num, med2_num, surg_num, ped_num, comm_num, psy_num, obs_num, seed):
+    # Clean first to get the cleaned df and student count
+    status, cleaned_preview, cleaned_file, n_students = on_clean(file, selected_wards, capacity_df, name_num, id_num,
+                                                                 med_num, med1_num, med2_num, surg_num, ped_num, comm_num, psy_num, obs_num)
     if cleaned_preview is None:
         return status, None, None, None, None
+    # Recreate full cleaned df (not just head) for assignment
     df, _ = read_table(file)
+    n_cols = len(df.columns)
+    ward_nums = {
+        "Medical": med_num, "Medical_1": med1_num, "Medical_2": med2_num,
+        "Surgical": surg_num, "Pediatric": ped_num, "Community": comm_num,
+        "Psychiatric": psy_num, "Obstetrics": obs_num
+    }
+    _errors, mapping_idx = collect_mapping_numbers(name_num, id_num, ward_nums, selected_wards, n_cols)
+    cleaned = build_cleaned_from_indices(df, mapping_idx)
+    # Build capacity map
     cap_df = capacity_df.copy()
+    if cap_df is None or cap_df.empty:
+        return gr.update(value="กรุณากรอก capacity ก่อน", visible=True), None, None, None, None
     cap_df.columns = ["Ward", "Thai Name", "Capacity"]
     cap_df = cap_df[cap_df["Ward"].isin([c for c in cleaned.columns if c not in ("NAME", "ID")])]
     cap_map = {}
         except Exception:
             cap_map[str(row["Ward"])] = 0
+    total_capacity = sum(cap_map.values())
+    # Pre-check: students must be <= total capacity (ขาดได้แต่ห้ามเกิน)
+    if n_students is None:
+        n_students = len(cleaned)
+    if n_students > total_capacity:
+        msg = f"❌ จำนวนผู้สมัคร {n_students} คน มากกว่า capacity รวม {total_capacity} ที่กำหนด (ขาดได้แต่ห้ามเกิน)"
+        return gr.update(value=msg, visible=True), None, None, None, None
+    assigned, not_assigned, leftover = random_assign(cleaned, cap_map, seed=int(seed) if str(seed).strip().isdigit() else None)
+    out_all = BytesIO()
+    assigned.to_csv(out_all, index=False, encoding="utf-8-sig"); out_all.seek(0)
     out_un = BytesIO()
+    not_assigned.to_csv(out_un, index=False, encoding="utf-8-sig"); out_un.seek(0)
     leftover_text = "ความจุคงเหลือ:\n" + "\n".join([f"- {k}: {v}" for k, v in leftover.items()])
     return status, assigned.head(30), ("assigned.csv", out_all), ("not_assigned.csv", out_un), leftover_text
 with gr.Blocks(title=APP_TITLE) as demo:
     gr.Markdown(f"# {APP_TITLE}")
     gr.Markdown(DESCRIPTION)
     with gr.Row():
+        file = gr.File(file_count="single", file_types=[".csv", ".xlsx"], label="อัปโหลดข้อมูล (.csv/.xlsx)")
     with gr.Accordion("1) เลือกวอร์ดที่ต้องใช้", open=True):
         selected_wards = gr.CheckboxGroup(
             choices=[w for w, _ in WARD_CHOICES],
             label="เลือกวอร์ด (เลือกได้หลายข้อ)",
+            value=["Medical", "Surgical", "Pediatric", "Community", "Psychiatric", "Obstetrics"]
         )
+        gr.Markdown("คำแปล: " + ", ".join([f"**{w}** = {th}" for w, th in WARD_CHOICES]))
     with gr.Accordion("2) กำหนด Capacity ต่อวอร์ด", open=True):
         capacity_df = gr.Dataframe(
             col_count=3,
             interactive=True,
             wrap=True,
+            label="กรอกเฉพาะแถวของวอร์ดที่เลือก"
         )
         selected_wards.change(fn=update_capacity_table, inputs=selected_wards, outputs=capacity_df)
+    with gr.Accordion("3) ตรวจหัวคอลัมน์ & เลือก mapping (Auto/ตัวเลข)", open=True):
+        status = gr.Markdown(visible=False)
+        available = gr.Code(label="Available columns (เลขเริ่มที่ 1)", language="markdown", interactive=False)
+        auto_btn = gr.Button("อ่านไฟล์ & Auto-detect mapping")
+        # numeric mapping inputs
+        name_num = gr.Number(label="หมายเลขคอลัมน์สำหรับ NAME", precision=0)
+        id_num = gr.Number(label="หมายเลขคอลัมน์สำหรับ ID", precision=0)
+        with gr.Row():
+            med_num = gr.Number(label="หมายเลขคอลัมน์ Medical", precision=0)
+            med1_num = gr.Number(label="หมายเลขคอลัมน์ Medical_1", precision=0)
+            med2_num = gr.Number(label="หมายเลขคอลัมน์ Medical_2", precision=0)
+        with gr.Row():
+            surg_num = gr.Number(label="หมายเลขคอลัมน์ Surgical", precision=0)
+            ped_num = gr.Number(label="หมายเลขคอลัมน์ Pediatric", precision=0)
+            comm_num = gr.Number(label="หมายเลขคอลัมน์ Community", precision=0)
+        with gr.Row():
+            psy_num = gr.Number(label="หมายเลขคอลัมน์ Psychiatric", precision=0)
+            obs_num = gr.Number(label="หมายเลขคอลัมน์ Obstetrics", precision=0)
+        auto_btn.click(fn=on_upload, inputs=[file, selected_wards],
+                       outputs=[status, available, name_num, id_num,
+                                {"Medical": med_num, "Medical_1": med1_num, "Medical_2": med2_num,
+                                 "Surgical": surg_num, "Pediatric": ped_num, "Community": comm_num,
+                                 "Psychiatric": psy_num, "Obstetrics": obs_num}])
     with gr.Row():
+        clean_btn = gr.Button("Clean data (ดูพรีวิว)", variant="primary")
+        seed = gr.Textbox(label="Random seed (เว้นว่างเพื่อสุ่มใหม่)", value="")
+    preview = gr.Dataframe(label="พรีวิวข้อมูลที่ผ่านการ clean (หัว 30 แถว)", visible=True)
     cleaned_file = gr.File(label="ดาวน์โหลดไฟล์ cleaned.csv")
     clean_btn.click(
         fn=on_clean,
+        inputs=[file, selected_wards, capacity_df, name_num, id_num,
+                med_num, med1_num, med2_num, surg_num, ped_num, comm_num, psy_num, obs_num],
+        outputs=[status, preview, cleaned_file, gr.State()]
     )
+    assign_btn = gr.Button("Assign (สุ่มตามลำดับอันดับ)")
+    assigned_preview = gr.Dataframe(label="ตัวอย่างผลการจัดสรร (หัว 30 แถว)")
+    assigned_file = gr.File(label="ดาวน์โหลดไฟล์ assigned.csv")
+    not_assigned_file = gr.File(label="ดาวน์โหลดไฟล์ not_assigned.csv")
+    leftover_text = gr.Textbox(label="สรุปความจุคงเหลือ", interactive=False)
     assign_btn.click(
         fn=on_assign,
+        inputs=[file, selected_wards, capacity_df, name_num, id_num,
+                med_num, med1_num, med2_num, surg_num, ped_num, comm_num, psy_num, obs_num, seed],
+        outputs=[status, assigned_preview, assigned_file, not_assigned_file, leftover_text]
     )
 if __name__ == "__main__":

requirements.txt CHANGED Viewed

@@ -1,4 +1,5 @@
-gradio==4.44.0
 pandas==2.2.2
 openpyxl==3.1.5
 numpy==2.0.2

+gradio==4.44.1
 pandas==2.2.2
 openpyxl==3.1.5
 numpy==2.0.2
+rapidfuzz==3.9.7