Spaces:

internationalscholarsprogram
/

docx-json-sync

Sleeping

App Files Files Community

internationalscholarsprogram commited on Dec 5, 2025

Commit

40200e1

1 Parent(s): a45863a

Brand ISP Automated Handbook Sync Data Pipeline

Browse files

Files changed (1) hide show

app.py +79 -22

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import os
 import json
 import re
-from typing import Dict, Any, List, Tuple
 import gradio as gr
 from docx import Document
@@ -243,14 +243,21 @@ def parse_programs_block(block: List[str]) -> Dict[str, Any]:
     )
     # Remove the header row if present
-    header_keywords = {"Program", "Designation", "Entrance Exam Required", "Entrance Examination", "Examples of Career Pathways", "Funding Category"}
     cleaned: List[str] = []
     for line in program_lines:
         if line in header_keywords:
             continue
         cleaned.append(line)
-    # Now group by 5-6 lines per program:
     # 0: program_name
     # 1: designation
     # 2: entrance_exam
@@ -370,9 +377,6 @@ def run_full_sync(docx_file) -> str:
             current_data = fetch_section_json(uni_id, section_key)
             if current_data is None:
-                # No existing record or invalid JSON – we still require that the row exists;
-                # if not, we just log and skip.
-                # If you want to INSERT missing rows, you can add that logic here.
                 logs.append(
                     f"[INFO] No existing JSON for uni_id={uni_id}, section_key='{section_key}'. "
                     f"Will only update if row exists."
@@ -401,32 +405,84 @@ def run_full_sync(docx_file) -> str:
 # -----------------------------
-# GRADIO UI
 # -----------------------------
-with gr.Blocks() as demo:
-    gr.Markdown("# ISP Handbook → Database Sync (Full Auto)")
     gr.Markdown(
-        """
-Upload the **full ISP Handbook DOCX**.
-On **Run full sync**, the app will:
-1. Parse each university block from the handbook
-2. Extract **Overview**, **Benefits**, and **Programs** sections
-3. Compare them with `university_handbook_sections.section_json`
-4. Update only rows that have changed
-Only sections that are sourced from the handbook are touched:
-- `overview`
-- `benefits`
-- `programs`
-Sections like `campus_image` / `image` are **never updated** here.
         """
     )
     file_input = gr.File(label="Upload ISP Handbook DOCX", file_types=[".docx"])
-    sync_button = gr.Button("Run full sync")
     log_output = gr.Textbox(
         label="Sync Log",
         lines=30,
@@ -439,5 +495,6 @@ Sections like `campus_image` / `image` are **never updated** here.
         outputs=log_output,
     )
 if __name__ == "__main__":
     demo.launch()

 import os
 import json
 import re
+from typing import Dict, Any, List
 import gradio as gr
 from docx import Document
     )
     # Remove the header row if present
+    header_keywords = {
+        "Program",
+        "Designation",
+        "Entrance Exam Required",
+        "Entrance Examination",
+        "Examples of Career Pathways",
+        "Funding Category",
+    }
     cleaned: List[str] = []
     for line in program_lines:
         if line in header_keywords:
             continue
         cleaned.append(line)
+    # Now group by 5-6 lines per program:
     # 0: program_name
     # 1: designation
     # 2: entrance_exam
             current_data = fetch_section_json(uni_id, section_key)
             if current_data is None:
                 logs.append(
                     f"[INFO] No existing JSON for uni_id={uni_id}, section_key='{section_key}'. "
                     f"Will only update if row exists."
 # -----------------------------
+# ISP BRANDING & GRADIO UI
 # -----------------------------
+ISP_PRIMARY = "#062A4D"
+ISP_GOLD = "#D6A229"
+ISP_BG = "#F5F7FA"
+ISP_TEXT = "#333333"
+ISP_LOGO = "https://qhtestingserver.com/assets/logo-DRvZB3HV.svg"
+css = f"""
+#isp-header {{
+    background: {ISP_PRIMARY};
+    padding: 20px;
+    border-radius: 6px;
+    display: flex;
+    align-items: center;
+    gap: 20px;
+}}
+#isp-header h1 {{
+    color: white !important;
+    font-size: 28px !important;
+    margin: 0;
+}}
+#isp-logo {{
+    height: 60px;
+}}
+.gradio-container {{
+    background: {ISP_BG} !important;
+}}
+button {{
+    background-color: {ISP_GOLD} !important;
+    color: black !important;
+    font-weight: bold !important;
+    border-radius: 8px !important;
+}}
+"""
+with gr.Blocks(css=css, title="Automated Handbook Sync Data Pipeline") as demo:
+    # Header with Logo + Title
+    with gr.Row(elem_id="isp-header"):
+        gr.HTML(f"""
+            <img id='isp-logo' src='{ISP_LOGO}'/>
+            <h1>Automated Handbook Sync Data Pipeline</h1>
+        """)
     gr.Markdown(
+        f"""
+### Welcome to the ISP Handbook Sync System
+This internal tool fully automates:
+- Parsing university sections from the official ISP Handbook
+- Comparing extracted content with the **university_handbook_sections** table
+- Updating only fields that have changed
+- Maintaining data uniformity and reducing manual effort
+---
+#### **Instructions**
+1. Upload the complete **ISP Handbook (.docx)**
+2. Click **Run Full Sync**
+3. Review the logs to see which university sections were updated
+Only official handbook-sourced fields are updated:
+- `overview`
+- `benefits`
+- `programs`
+Other database sections (e.g., images) remain untouched.
+---
         """
     )
     file_input = gr.File(label="Upload ISP Handbook DOCX", file_types=[".docx"])
+    sync_button = gr.Button("Run Full Sync")
     log_output = gr.Textbox(
         label="Sync Log",
         lines=30,
         outputs=log_output,
     )
 if __name__ == "__main__":
     demo.launch()