internationalscholarsprogram commited on
Commit
92080b9
·
1 Parent(s): a81d369

Fix Gradio CSS and apply ISP branding

Browse files
Files changed (1) hide show
  1. app.py +136 -273
app.py CHANGED
@@ -23,7 +23,6 @@ UNIVERSITY_ID_MAP = {
23
  "William Jessup University": 10,
24
  "Wilkes University": 14,
25
  "University of South Dakota (USD)": 16,
26
- # Extend as you add more rows to university_handbook_sections
27
  }
28
 
29
 
@@ -31,11 +30,6 @@ UNIVERSITY_ID_MAP = {
31
  # DB CONNECTION HELPERS
32
  # -----------------------------
33
  def get_db_connection():
34
- """
35
- Create and return a MySQL connection using environment variables.
36
- Set these in HF Space secrets:
37
- DB_HOST, DB_PORT, DB_USER, DB_PASSWORD, DB_NAME
38
- """
39
  return mysql.connector.connect(
40
  host=os.getenv("DB_HOST", "localhost"),
41
  port=int(os.getenv("DB_PORT", "3306")),
@@ -46,29 +40,21 @@ def get_db_connection():
46
 
47
 
48
  def fetch_section_json(university_id: int, section_key: str):
49
- """
50
- Fetch existing JSON for given university_id + section_key from DB.
51
- Returns parsed dict or None if not found.
52
- """
53
  conn = get_db_connection()
54
  try:
55
  cursor = conn.cursor()
56
- query = """
57
  SELECT section_json
58
  FROM university_handbook_sections
59
- WHERE university_id = %s AND section_key = %s
60
  LIMIT 1
61
- """
62
- cursor.execute(query, (university_id, section_key))
63
  row = cursor.fetchone()
64
- if not row:
65
- return None
66
- if not row[0]:
67
  return None
68
  try:
69
  return json.loads(row[0])
70
- except Exception:
71
- # JSON malformed in DB – treat as None to force overwrite
72
  return None
73
  finally:
74
  cursor.close()
@@ -76,19 +62,15 @@ def fetch_section_json(university_id: int, section_key: str):
76
 
77
 
78
  def update_section_json(university_id: int, section_key: str, new_data: Dict[str, Any]):
79
- """
80
- Update section_json in DB for given university_id + section_key.
81
- """
82
  conn = get_db_connection()
83
  try:
84
  cursor = conn.cursor()
85
- new_json_str = json.dumps(new_data, ensure_ascii=False)
86
- query = """
87
  UPDATE university_handbook_sections
88
- SET section_json = %s
89
- WHERE university_id = %s AND section_key = %s
90
- """
91
- cursor.execute(query, (new_json_str, university_id, section_key))
92
  conn.commit()
93
  finally:
94
  cursor.close()
@@ -98,362 +80,243 @@ def update_section_json(university_id: int, section_key: str, new_data: Dict[str
98
  # -----------------------------
99
  # DOCX PARSING HELPERS
100
  # -----------------------------
101
- def normalize_text(text: str) -> str:
102
- return " ".join(text.split()).strip()
103
-
104
 
105
- def split_doc_by_university(doc: Document) -> Dict[str, List[str]]:
106
- """
107
- Split the docx into blocks per university name using headings that match
108
- the keys in UNIVERSITY_ID_MAP.
109
- Returns dict: { "University Name": [list_of_paragraph_texts_in_block] }
110
- """
111
- paragraphs = [normalize_text(p.text) for p in doc.paragraphs]
112
- # Remove empties
113
- paragraphs = [p for p in paragraphs if p]
114
 
115
- # Find start indices for each known university name
 
116
  indices = []
117
  for i, p in enumerate(paragraphs):
118
- for uni_name in UNIVERSITY_ID_MAP.keys():
119
- # Exact match or paragraph starting with that name
120
- if p == uni_name or p.startswith(uni_name):
121
- indices.append((i, uni_name))
122
 
123
- # Sort by index
124
  indices.sort(key=lambda x: x[0])
125
 
126
- uni_blocks: Dict[str, List[str]] = {}
127
- for idx, (start_idx, uni_name) in enumerate(indices):
128
- end_idx = indices[idx + 1][0] if idx + 1 < len(indices) else len(paragraphs)
129
- block = paragraphs[start_idx:end_idx]
130
- uni_blocks[uni_name] = block
131
-
132
  return uni_blocks
133
 
134
 
135
- def parse_overview_block(block: List[str]) -> Dict[str, Any]:
136
- """
137
- Given the full block for a university, extract the overview section as JSON.
138
- We look for lines containing 'Founded:', 'Total Students:', etc.
139
- """
140
- overview = {}
141
  for line in block:
142
  if line.startswith("Founded:"):
143
- overview["founded"] = int(re.sub(r"[^\d]", "", line.split(":", 1)[1]))
144
- elif line.startswith("Total Students") or line.startswith("Total Students:"):
145
- overview["total_students"] = int(
146
- re.sub(r"[^\d]", "", line.split(":", 1)[1])
147
- )
148
- elif "Postgraduate" in line and "Students" in line:
149
  digits = re.sub(r"[^\d]", "", line.split(":", 1)[1])
150
- overview["postgraduate_students"] = int(digits) if digits else None
151
  elif line.startswith("Acceptance rate"):
152
- overview["acceptance_rate"] = line.split(":", 1)[1].strip()
153
  elif line.startswith("Location:"):
154
- overview["location"] = line.split(":", 1)[1].strip()
155
- elif "Yearly Out of State Tuition Fees" in line or "Yearly Tuition Fees" in line:
156
  digits = re.sub(r"[^\d]", "", line.split(":", 1)[1])
157
- overview["tuition_out_of_state_yearly"] = int(digits) if digits else None
 
158
 
159
- return overview
160
 
161
-
162
- def extract_between(block: List[str], start_marker: str, stop_markers: List[str]) -> List[str]:
163
- """
164
- Extract lines between a line containing `start_marker` and the first line
165
- containing any of `stop_markers`.
166
- """
167
- started = False
168
- buf: List[str] = []
169
  for line in block:
170
- if not started and start_marker in line:
171
  started = True
172
  continue
173
  if started:
174
- if any(m in line for m in stop_markers):
175
  break
176
  if line.strip():
177
- buf.append(line.strip())
178
- return buf
179
 
180
 
181
- def parse_benefits_block(block: List[str]) -> Dict[str, Any]:
182
- """
183
- Benefits are the lines following 'Benefits for ISP students at this school'
184
- until 'To qualify for The International Scholars Program' or university change.
185
- """
186
- benefits_lines = extract_between(
187
  block,
188
- start_marker="Benefits for ISP students at this school",
189
- stop_markers=[
190
- "To qualify for The International Scholars Program at",
191
- "To qualify for The International Scholars Program",
192
- ],
193
  )
194
- benefits = [normalize_text(l) for l in benefits_lines if l]
195
- return {"benefits": benefits}
196
 
197
 
198
- def parse_programs_block(block: List[str]) -> Dict[str, Any]:
199
- """
200
- Parse the 'Program table' portion.
201
- """
202
- program_lines = extract_between(
203
  block,
204
- start_marker="To qualify for The International Scholars Program",
205
- stop_markers=[
206
- "Montclair State University",
207
- "Missouri State University",
208
- "Indiana University of Pennsylvania",
209
- "University of Louisville",
210
- "University of Delaware",
211
- "Grand Valley State University",
212
- "Quinnipiac University",
213
- "William Jessup University",
214
- "Wilkes University",
215
- "University of South Dakota",
216
- ],
217
  )
 
 
218
 
219
- header_keywords = {
220
- "Program",
221
- "Designation",
222
- "Entrance Exam Required",
223
- "Entrance Examination",
224
- "Examples of Career Pathways",
225
- "Funding Category",
226
- }
227
- cleaned: List[str] = []
228
- for line in program_lines:
229
- if line in header_keywords:
230
- continue
231
- cleaned.append(line)
232
-
233
- programs: List[Dict[str, Any]] = []
234
- i = 0
235
  while i < len(cleaned):
236
  remaining = len(cleaned) - i
237
- if remaining < 4:
238
- break
239
-
240
- program_name = cleaned[i].strip()
241
- designation = cleaned[i + 1].strip() if remaining > 1 else ""
242
- entrance_exam = cleaned[i + 2].strip() if remaining > 2 else ""
243
- career_paths: List[str] = []
244
- j = i + 3
245
  while j < len(cleaned) and not cleaned[j].startswith("TIER"):
246
- career_paths.append(cleaned[j].strip())
247
  j += 1
248
-
249
- funding_category = cleaned[j].strip() if j < len(cleaned) else ""
250
-
251
- programs.append(
252
- {
253
- "program_name": program_name,
254
- "designation": designation,
255
- "entrance_exam": entrance_exam,
256
- "career_pathways": career_paths,
257
- "funding_category": funding_category,
258
- }
259
- )
260
-
261
  i = j + 1
262
-
263
  return {"programs": programs}
264
 
265
 
266
- def parse_university_block(uni_name: str, block: List[str]) -> Dict[str, Dict[str, Any]]:
267
- """
268
- Parse all sections for a single university block:
269
- - overview
270
- - benefits
271
- - programs
272
- """
273
- sections: Dict[str, Dict[str, Any]] = {}
274
-
275
- overview = parse_overview_block(block)
276
- if overview:
277
- overview.setdefault("university_name", uni_name)
278
- sections["overview"] = overview
279
 
280
- benefits = parse_benefits_block(block)
281
- if benefits.get("benefits"):
282
- sections["benefits"] = benefits
283
 
284
- programs = parse_programs_block(block)
285
- if programs.get("programs"):
286
- sections["programs"] = programs
287
 
288
  return sections
289
 
290
 
291
  # -----------------------------
292
- # MAIN SYNC FUNCTION
293
  # -----------------------------
294
- def run_full_sync(docx_file) -> str:
295
- """
296
- 1. Parse DOCX into university blocks
297
- 2. For each known university_id:
298
- a. Parse overview/benefits/programs from the handbook
299
- b. Fetch existing section_json from DB
300
- c. Compare (DeepDiff)
301
- d. If different, update DB
302
- 3. Return human-readable log
303
- """
304
  if docx_file is None:
305
  return "No handbook file uploaded."
306
 
307
  try:
308
- document = Document(docx_file.name)
309
  except Exception as e:
310
- return f"Failed to read DOCX: {e}"
311
 
312
- uni_blocks = split_doc_by_university(document)
313
-
314
- logs: List[str] = []
315
- total_updates = 0
316
 
317
  for uni_name, uni_id in UNIVERSITY_ID_MAP.items():
318
- block = uni_blocks.get(uni_name)
319
  if not block:
320
- logs.append(f"[WARN] No block found in handbook for '{uni_name}'. Skipping.")
321
  continue
322
 
323
- parsed_sections = parse_university_block(uni_name, block)
324
- if not parsed_sections:
325
- logs.append(f"[WARN] No parsable sections for '{uni_name}'. Skipping.")
326
  continue
327
 
328
- for section_key, new_data in parsed_sections.items():
329
- if section_key not in ("overview", "benefits", "programs"):
330
  continue
331
 
332
- current_data = fetch_section_json(uni_id, section_key)
333
- if current_data is None:
334
- logs.append(
335
- f"[INFO] No existing JSON for uni_id={uni_id}, section_key='{section_key}'. "
336
- f"Will only update if row exists."
337
- )
338
 
339
- diff = DeepDiff(current_data or {}, new_data, ignore_order=True)
340
  if not diff:
341
- logs.append(f"[OK] '{uni_name}' [{section_key}] – no change.")
342
  continue
343
 
344
  try:
345
- update_section_json(uni_id, section_key, new_data)
346
- total_updates += 1
347
- logs.append(
348
- f"[UPDATED] '{uni_name}' [{section_key}] – DB updated (differences detected)."
349
- )
350
  except Exception as e:
351
- logs.append(
352
- f"[ERROR] Failed to update '{uni_name}' [{section_key}]: {e}"
353
- )
354
 
355
- summary = f"\n\nTotal sections updated: {total_updates}\n"
356
- return "\n".join(logs) + summary
357
 
358
 
359
  # -----------------------------
360
- # ISP BRANDING & GRADIO UI
361
  # -----------------------------
362
  ISP_PRIMARY = "#062A4D"
363
  ISP_GOLD = "#D6A229"
364
  ISP_BG = "#F5F7FA"
365
- ISP_TEXT = "#333333"
366
 
367
- # Prefer local logo file (you must add this file in your repo: assets/logo-DRvZB3HV.svg)
368
- LOCAL_LOGO_PATH = "assets/logo-DRvZB3HV.svg"
369
- if os.path.exists(LOCAL_LOGO_PATH):
370
- ISP_LOGO_SRC = LOCAL_LOGO_PATH
371
- else:
372
- # Fallback to remote logo if local file missing
373
- ISP_LOGO_SRC = "https://qhtestingserver.com/assets/logo-DRvZB3HV.svg"
374
 
375
- css = f"""
 
376
  #isp-header {{
377
  background: {ISP_PRIMARY};
378
  padding: 20px;
379
- border-radius: 6px;
380
  display: flex;
381
  align-items: center;
382
  gap: 20px;
383
  }}
384
  #isp-header h1 {{
385
- color: white !important;
386
- font-size: 28px !important;
387
  margin: 0;
 
388
  }}
389
  #isp-logo {{
390
  height: 60px;
391
  }}
392
- .gradio-container {{
393
- background: {ISP_BG} !important;
394
- }}
395
  button {{
396
  background-color: {ISP_GOLD} !important;
397
  color: black !important;
398
- font-weight: bold !important;
399
  border-radius: 8px !important;
 
400
  }}
 
 
 
 
401
  """
402
 
403
- with gr.Blocks(css=css, title="Automated Handbook Sync Data Pipeline") as demo:
404
-
405
- # Header with Logo + Title
406
- with gr.Row(elem_id="isp-header"):
407
- gr.HTML(
408
- f"""
409
- <img id='isp-logo' src='{ISP_LOGO_SRC}' alt='ISP Logo'/>
410
- <h1>ISP Handbook → Data Pipeline Sync (Full Auto)</h1>
411
- """
412
- )
413
 
414
- gr.Markdown(
415
- """
416
- ### Automated Handbook Sync Data Pipeline
417
-
418
- This internal ISP tool automates:
419
-
420
- - Parsing university sections from the official ISP Handbook
421
- - Comparing extracted content with the **university_handbook_sections** table
422
- - Updating only fields that have changed
423
- - Maintaining data uniformity and reducing manual effort
424
 
425
- ---
426
 
427
- #### Instructions
 
 
 
 
 
 
428
 
429
- 1. Upload the complete **ISP Handbook (.docx)**
430
- 2. Click **Run Full Sync**
431
- 3. Review the logs to see which university sections were updated
432
 
433
- Only handbook-sourced fields are updated:
434
 
435
- - `overview`
436
- - `benefits`
437
- - `programs`
 
438
 
439
- Other database sections (e.g., images) remain untouched.
440
- """
441
- )
442
 
443
  file_input = gr.File(label="Upload ISP Handbook DOCX", file_types=[".docx"])
 
444
 
445
- sync_button = gr.Button("Run Full Sync")
446
- log_output = gr.Textbox(
447
- label="Sync Log",
448
- lines=30,
449
- interactive=False,
450
- )
451
 
452
- sync_button.click(
453
- fn=run_full_sync,
454
- inputs=file_input,
455
- outputs=log_output,
456
- )
457
 
458
 
459
  if __name__ == "__main__":
 
23
  "William Jessup University": 10,
24
  "Wilkes University": 14,
25
  "University of South Dakota (USD)": 16,
 
26
  }
27
 
28
 
 
30
  # DB CONNECTION HELPERS
31
  # -----------------------------
32
  def get_db_connection():
 
 
 
 
 
33
  return mysql.connector.connect(
34
  host=os.getenv("DB_HOST", "localhost"),
35
  port=int(os.getenv("DB_PORT", "3306")),
 
40
 
41
 
42
  def fetch_section_json(university_id: int, section_key: str):
 
 
 
 
43
  conn = get_db_connection()
44
  try:
45
  cursor = conn.cursor()
46
+ cursor.execute("""
47
  SELECT section_json
48
  FROM university_handbook_sections
49
+ WHERE university_id=%s AND section_key=%s
50
  LIMIT 1
51
+ """, (university_id, section_key))
 
52
  row = cursor.fetchone()
53
+ if not row or not row[0]:
 
 
54
  return None
55
  try:
56
  return json.loads(row[0])
57
+ except:
 
58
  return None
59
  finally:
60
  cursor.close()
 
62
 
63
 
64
  def update_section_json(university_id: int, section_key: str, new_data: Dict[str, Any]):
 
 
 
65
  conn = get_db_connection()
66
  try:
67
  cursor = conn.cursor()
68
+ new_json = json.dumps(new_data, ensure_ascii=False)
69
+ cursor.execute("""
70
  UPDATE university_handbook_sections
71
+ SET section_json=%s
72
+ WHERE university_id=%s AND section_key=%s
73
+ """, (new_json, university_id, section_key))
 
74
  conn.commit()
75
  finally:
76
  cursor.close()
 
80
  # -----------------------------
81
  # DOCX PARSING HELPERS
82
  # -----------------------------
83
+ def normalize_text(t): return " ".join(t.split()).strip()
 
 
84
 
 
 
 
 
 
 
 
 
 
85
 
86
+ def split_doc_by_university(doc: Document):
87
+ paragraphs = [normalize_text(p.text) for p in doc.paragraphs if p.text.strip()]
88
  indices = []
89
  for i, p in enumerate(paragraphs):
90
+ for uni in UNIVERSITY_ID_MAP.keys():
91
+ if p == uni or p.startswith(uni):
92
+ indices.append((i, uni))
 
93
 
 
94
  indices.sort(key=lambda x: x[0])
95
 
96
+ uni_blocks = {}
97
+ for idx, (start, uni_name) in enumerate(indices):
98
+ end = indices[idx+1][0] if idx + 1 < len(indices) else len(paragraphs)
99
+ uni_blocks[uni_name] = paragraphs[start:end]
 
 
100
  return uni_blocks
101
 
102
 
103
+ def parse_overview_block(block: List[str]):
104
+ data = {}
 
 
 
 
105
  for line in block:
106
  if line.startswith("Founded:"):
107
+ data["founded"] = int(re.sub(r"[^\d]", "", line.split(":", 1)[1]))
108
+ elif line.startswith("Total Students"):
109
+ data["total_students"] = int(re.sub(r"[^\d]", "", line.split(":", 1)[1]))
110
+ elif "Postgraduate" in line:
 
 
111
  digits = re.sub(r"[^\d]", "", line.split(":", 1)[1])
112
+ data["postgraduate_students"] = int(digits) if digits else None
113
  elif line.startswith("Acceptance rate"):
114
+ data["acceptance_rate"] = line.split(":", 1)[1].strip()
115
  elif line.startswith("Location:"):
116
+ data["location"] = line.split(":", 1)[1].strip()
117
+ elif "Tuition" in line:
118
  digits = re.sub(r"[^\d]", "", line.split(":", 1)[1])
119
+ data["tuition_out_of_state_yearly"] = int(digits) if digits else None
120
+ return data
121
 
 
122
 
123
+ def extract_between(block, start, stops):
124
+ out, started = [], False
 
 
 
 
 
 
125
  for line in block:
126
+ if not started and start in line:
127
  started = True
128
  continue
129
  if started:
130
+ if any(s in line for s in stops):
131
  break
132
  if line.strip():
133
+ out.append(line)
134
+ return out
135
 
136
 
137
+ def parse_benefits_block(block):
138
+ lines = extract_between(
 
 
 
 
139
  block,
140
+ "Benefits for ISP students at this school",
141
+ ["To qualify for The International Scholars Program"]
 
 
 
142
  )
143
+ return {"benefits": [normalize_text(l) for l in lines]}
 
144
 
145
 
146
+ def parse_programs_block(block):
147
+ lines = extract_between(
 
 
 
148
  block,
149
+ "To qualify for The International Scholars Program",
150
+ list(UNIVERSITY_ID_MAP.keys())
 
 
 
 
 
 
 
 
 
 
 
151
  )
152
+ headers = {"Program", "Designation", "Entrance Exam Required", "Examples of Career Pathways", "Funding Category"}
153
+ cleaned = [l for l in lines if l not in headers]
154
 
155
+ programs, i = [], 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
  while i < len(cleaned):
157
  remaining = len(cleaned) - i
158
+ if remaining < 4: break
159
+ name = cleaned[i]
160
+ designation = cleaned[i+1]
161
+ exam = cleaned[i+2]
162
+ careers = []
163
+ j = i+3
 
 
164
  while j < len(cleaned) and not cleaned[j].startswith("TIER"):
165
+ careers.append(cleaned[j])
166
  j += 1
167
+ tier = cleaned[j] if j < len(cleaned) else ""
168
+ programs.append({
169
+ "program_name": name,
170
+ "designation": designation,
171
+ "entrance_exam": exam,
172
+ "career_pathways": careers,
173
+ "funding_category": tier
174
+ })
 
 
 
 
 
175
  i = j + 1
 
176
  return {"programs": programs}
177
 
178
 
179
+ def parse_university_block(name: str, block: List[str]):
180
+ sections = {}
181
+ ov = parse_overview_block(block)
182
+ if ov:
183
+ ov["university_name"] = name
184
+ sections["overview"] = ov
 
 
 
 
 
 
 
185
 
186
+ ben = parse_benefits_block(block)
187
+ if ben.get("benefits"):
188
+ sections["benefits"] = ben
189
 
190
+ prog = parse_programs_block(block)
191
+ if prog.get("programs"):
192
+ sections["programs"] = prog
193
 
194
  return sections
195
 
196
 
197
  # -----------------------------
198
+ # MAIN SYNC LOGIC
199
  # -----------------------------
200
+ def run_full_sync(docx_file):
 
 
 
 
 
 
 
 
 
201
  if docx_file is None:
202
  return "No handbook file uploaded."
203
 
204
  try:
205
+ doc = Document(docx_file.name)
206
  except Exception as e:
207
+ return f"Error reading DOCX: {e}"
208
 
209
+ blocks = split_doc_by_university(doc)
210
+ logs, updated = [], 0
 
 
211
 
212
  for uni_name, uni_id in UNIVERSITY_ID_MAP.items():
213
+ block = blocks.get(uni_name)
214
  if not block:
215
+ logs.append(f"[WARN] Missing block for: {uni_name}")
216
  continue
217
 
218
+ data = parse_university_block(uni_name, block)
219
+ if not data:
220
+ logs.append(f"[WARN] No valid sections found for: {uni_name}")
221
  continue
222
 
223
+ for key, new_json in data.items():
224
+ if key not in ("overview", "benefits", "programs"):
225
  continue
226
 
227
+ old_json = fetch_section_json(uni_id, key)
228
+ diff = DeepDiff(old_json or {}, new_json, ignore_order=True)
 
 
 
 
229
 
 
230
  if not diff:
231
+ logs.append(f"[OK] {uni_name} [{key}] unchanged.")
232
  continue
233
 
234
  try:
235
+ update_section_json(uni_id, key, new_json)
236
+ logs.append(f"[UPDATED] {uni_name} [{key}] updated.")
237
+ updated += 1
 
 
238
  except Exception as e:
239
+ logs.append(f"[ERROR] Updating {uni_name} [{key}]: {e}")
 
 
240
 
241
+ logs.append(f"\nTotal sections updated: {updated}")
242
+ return "\n".join(logs)
243
 
244
 
245
  # -----------------------------
246
+ # ISP BRANDING (NO css= ARGUMENT)
247
  # -----------------------------
248
  ISP_PRIMARY = "#062A4D"
249
  ISP_GOLD = "#D6A229"
250
  ISP_BG = "#F5F7FA"
 
251
 
252
+ LOCAL_LOGO = "assets/logo-DRvZB3HV.svg"
253
+ LOGO_SRC = LOCAL_LOGO if os.path.exists(LOCAL_LOGO) else "https://qhtestingserver.com/assets/logo-DRvZB3HV.svg"
 
 
 
 
 
254
 
255
+ CUSTOM_CSS = f"""
256
+ <style>
257
  #isp-header {{
258
  background: {ISP_PRIMARY};
259
  padding: 20px;
260
+ border-radius: 8px;
261
  display: flex;
262
  align-items: center;
263
  gap: 20px;
264
  }}
265
  #isp-header h1 {{
266
+ color: white;
 
267
  margin: 0;
268
+ font-size: 26px;
269
  }}
270
  #isp-logo {{
271
  height: 60px;
272
  }}
 
 
 
273
  button {{
274
  background-color: {ISP_GOLD} !important;
275
  color: black !important;
 
276
  border-radius: 8px !important;
277
+ font-weight: bold !important;
278
  }}
279
+ .gradio-container {{
280
+ background: {ISP_BG} !important;
281
+ }}
282
+ </style>
283
  """
284
 
 
 
 
 
 
 
 
 
 
 
285
 
286
+ # -----------------------------
287
+ # GRADIO UI
288
+ # -----------------------------
289
+ with gr.Blocks(title="ISP Automated Handbook Data Pipeline") as demo:
 
 
 
 
 
 
290
 
291
+ gr.HTML(CUSTOM_CSS)
292
 
293
+ # Header with logo + title
294
+ gr.HTML(f"""
295
+ <div id='isp-header'>
296
+ <img id='isp-logo' src='{LOGO_SRC}' alt='ISP Logo'/>
297
+ <h1>ISP Handbook → Data Pipeline Sync (Full Auto)</h1>
298
+ </div>
299
+ """)
300
 
301
+ gr.Markdown("""
302
+ ### Automated Handbook Sync Data Pipeline
 
303
 
304
+ Upload the official ISP Handbook (.docx), and this tool will:
305
 
306
+ - Extract university sections
307
+ - Compare them with the **university_handbook_sections** table
308
+ - Update only changed JSON fields
309
+ - Ensure consistent, synchronized data
310
 
311
+ ---
312
+ """)
 
313
 
314
  file_input = gr.File(label="Upload ISP Handbook DOCX", file_types=[".docx"])
315
+ log_output = gr.Textbox(label="Sync Log", lines=30)
316
 
317
+ sync_btn = gr.Button("Run Full Sync")
 
 
 
 
 
318
 
319
+ sync_btn.click(fn=run_full_sync, inputs=file_input, outputs=log_output)
 
 
 
 
320
 
321
 
322
  if __name__ == "__main__":