gtani commited on
Commit
7811d4c
·
verified ·
1 Parent(s): 6b20de6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +172 -227
app.py CHANGED
@@ -1,120 +1,76 @@
 
 
1
  import gradio as gr
2
  import pandas as pd
3
  from pathlib import Path
4
  from typing import Dict, List, Tuple
5
- import os
6
- import base64
7
 
 
 
 
 
8
 
9
- PROCESSED_DATA_DIR = Path(".")
10
- DATA_DIR = Path("./data")
11
- # Embed logo as a base64 data URI to avoid Gradio toolbar interactions
12
- logo_path = "rowsquared-logo-large.png"
13
- with open(logo_path, "rb") as f:
14
- logo_b64 = base64.b64encode(f.read()).decode("utf-8")
15
-
16
-
17
-
18
- # ----------------------------
19
- # Data loading & preprocessing
20
- # ----------------------------
21
  df_isco = (
22
  pd.read_excel(
23
  PROCESSED_DATA_DIR / "isco_imperfect.xlsx",
24
  converters={"major": str, "sub_major": str, "minor": str, "unit": str},
25
- )[["major_label", "sub_major_label", "minor_label", "unit_label"]]
26
- .dropna()
27
- .drop_duplicates()
28
- .reset_index(drop=True)
29
  )
30
-
31
- # Build nested hierarchy dict: {major: {sub: {minor: [units]}}}
32
- hierarchy: Dict[str, Dict[str, Dict[str, List[str]]]] = {}
33
  for _, r in df_isco.iterrows():
34
- hierarchy.setdefault(r.major_label, {}) \
35
- .setdefault(r.sub_major_label, {}) \
36
- .setdefault(r.minor_label, []) \
37
  .append(r.unit_label)
38
-
39
- # Ensure uniqueness & sorting at leaf lists
40
  for maj in hierarchy:
41
  for sub in hierarchy[maj]:
42
  for mn in hierarchy[maj][sub]:
43
- hierarchy[maj][sub][mn] = sorted(list(dict.fromkeys(hierarchy[maj][sub][mn])))
44
-
45
- # Fast helpers for children
46
- def majors() -> List[str]:
47
- return sorted(hierarchy.keys())
48
-
49
- def submajors(maj: str) -> List[str]:
50
- return sorted(hierarchy.get(maj, {}).keys())
51
-
52
- def minors(maj: str, sub: str) -> List[str]:
53
- return sorted(hierarchy.get(maj, {}).get(sub, {}).keys())
54
-
55
- def units(maj: str, sub: str, mn: str) -> List[str]:
56
- return hierarchy.get(maj, {}).get(sub, {}).get(mn, [])
57
-
58
- # ----------------------------
59
- # Records to annotate
60
- # ----------------------------
61
- records = pd.read_excel(PROCESSED_DATA_DIR / "isco_predictions.xlsx").copy()
62
- for col in ["major_label", "sub_major_label", "minor_label", "unit_label"]:
63
- if col not in records:
64
- records[col] = ""
65
-
66
- if "annotated" not in records:
67
- records["annotated"] = False
68
-
69
- # ensure not views
70
- for col in ["major_label", "sub_major_label", "minor_label", "unit_label", "annotated"]:
71
- records[col] = records[col].copy()
72
-
73
- records.reset_index(drop=True, inplace=True)
74
-
75
- # -----------------------------------
76
- # Core logic: clamp & state management
77
- # -----------------------------------
78
- def clamp_path(maj: str, sub: str, mn: str, un: str
79
- ) -> Tuple[str, str, str, str, List[str], List[str], List[str], List[str]]:
80
- """Return a valid (maj, sub, mn, un) tuple + their choices lists.
81
- Only replace a level if it's invalid for the hierarchy."""
82
- maj_choices = majors()
83
- if maj not in maj_choices:
84
- maj = maj_choices[0] if maj_choices else ""
85
-
86
- sub_choices = submajors(maj) if maj else []
87
- if sub not in sub_choices:
88
- sub = sub_choices[0] if sub_choices else ""
89
-
90
- mn_choices = minors(maj, sub) if sub else []
91
- if mn not in mn_choices:
92
- mn = mn_choices[0] if mn_choices else ""
93
-
94
- un_choices = units(maj, sub, mn) if mn else []
95
- if un not in un_choices:
96
- un = un_choices[0] if un_choices else ""
97
-
98
- return maj, sub, mn, un, maj_choices, sub_choices, mn_choices, un_choices
99
-
100
- def save_record(i: int, maj: str, sub: str, mn: str, un: str) -> None:
101
- records.loc[i, ["major_label", "sub_major_label", "minor_label", "unit_label"]] = [maj, sub, mn, un]
102
- records.loc[i, "annotated"] = True
103
-
104
- def status_text(i: int) -> str:
105
- return f"**Status**: {'✅ Annotated' if records.loc[i, 'annotated'] else '❌ Not Annotated'}"
106
-
107
- def load_record(i: int):
108
- rec = records.loc[i]
109
- maj, sub, mn, un, maj_c, sub_c, mn_c, un_c = clamp_path(
110
- rec["major_label"], rec["sub_major_label"], rec["minor_label"], rec["unit_label"]
111
  )
112
- # Persist clamped values back (only if changed)
113
- save_record(i, maj, sub, mn, un)
114
-
115
- record_md = f"## Occupation: {rec['occupation_title_main']}\n## Industry: {rec['industry_title_main']}"
116
  return (
117
- record_md,
118
  status_text(i),
119
  gr.update(choices=maj_c, value=maj),
120
  gr.update(choices=sub_c, value=sub),
@@ -122,161 +78,150 @@ def load_record(i: int):
122
  gr.update(choices=un_c, value=un),
123
  )
124
 
125
- # ---------------------
126
- # Event handler helpers
127
- # ---------------------
128
- def on_major_change(new_major: str, i: int):
129
- sub_c = submajors(new_major)
130
- sub = sub_c[0] if sub_c else ""
131
- mn_c = minors(new_major, sub) if sub else []
132
- mn = mn_c[0] if mn_c else ""
133
- un_c = units(new_major, sub, mn) if mn else []
134
- un = un_c[0] if un_c else ""
135
- save_record(i, new_major, sub, mn, un)
136
  return (
137
- gr.update(choices=majors(), value=new_major),
138
- gr.update(choices=sub_c, value=sub),
139
- gr.update(choices=mn_c, value=mn),
140
- gr.update(choices=un_c, value=un),
141
  status_text(i),
142
  )
143
 
144
- def on_sub_change(new_sub: str, i: int, major: str):
145
- mn_c = minors(major, new_sub)
146
- mn = mn_c[0] if mn_c else ""
147
- un_c = units(major, new_sub, mn) if mn else []
148
- un = un_c[0] if un_c else ""
149
- records.loc[i, ["sub_major_label", "minor_label", "unit_label"]] = [new_sub, mn, un]
150
- records.loc[i, "annotated"] = True
151
  return (
152
- gr.update(choices=submajors(major), value=new_sub),
153
- gr.update(choices=mn_c, value=mn),
154
- gr.update(choices=un_c, value=un),
155
  status_text(i),
156
  )
157
 
158
- def on_minor_change(new_minor: str, i: int, major: str, sub: str):
159
- un_c = units(major, sub, new_minor)
160
- un = un_c[0] if un_c else ""
161
- records.loc[i, ["minor_label", "unit_label"]] = [new_minor, un]
162
- records.loc[i, "annotated"] = True
163
  return (
164
- gr.update(choices=minors(major, sub), value=new_minor),
165
- gr.update(choices=un_c, value=un),
166
  status_text(i),
167
  )
168
 
169
- def on_unit_change(new_unit: str, i: int, major: str, sub: str, mn: str):
170
- un_c = units(major, sub, mn)
171
- if new_unit not in un_c:
172
- new_unit = un_c[0] if un_c else ""
173
- records.loc[i, "unit_label"] = new_unit
174
- records.loc[i, "annotated"] = True
175
- return gr.update(choices=un_c, value=new_unit), status_text(i)
176
-
177
- def go_next(i: int) -> int:
178
- return (i + 1) % len(records)
179
-
180
- def go_prev(i: int) -> int:
181
- return (i - 1) % len(records)
182
-
183
- # ---- NAVIGATION: save + move + reload in ONE callback ----
184
-
185
- def save_and_jump(i: int, direction: str):
186
- # Final safety net: clamp and persist whatever is currently stored
187
- rec = records.loc[i]
188
- maj, sub, mn, un, *_ = clamp_path(
189
- rec["major_label"], rec["sub_major_label"], rec["minor_label"], rec["unit_label"]
190
- )
191
- save_record(i, maj, sub, mn, un)
192
- new_i = go_next(i) if direction == "next" else go_prev(i)
193
- return (new_i,) + load_record(new_i)
194
-
195
- def download_annotations() -> str:
196
- path = PROCESSED_DATA_DIR / "annotated_output.csv"
197
- records.to_csv(path, index=False)
198
  return str(path)
199
 
200
- # --------------
201
- # Build the UI
202
- # --------------
203
  def build_gradio_app():
204
- with gr.Blocks() as demo:
205
- # a flag in State for whether the user is logged in
206
- is_auth = gr.State(False)
207
 
208
- # ————————
209
- # LOGIN SCREEN
210
- # ————————
211
- with gr.Column(visible=not is_auth.value) as login_box:
 
 
 
 
 
 
 
212
  user_in = gr.Textbox(label="Username")
213
  pass_in = gr.Textbox(label="Password", type="password")
214
- login_btn= gr.Button("🔒 Log me in")
215
  login_msg= gr.Markdown("", visible=False)
216
 
217
- def check_creds(u, p):
218
- # load your secrets from env
219
- USER = os.environ.get("APP_USER", "")
220
- PWD = os.environ.get("APP_PASS", "")
221
- if u == USER and p == PWD:
222
- return gr.update(visible=False), gr.update(visible=True), gr.update(value=True, visible=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
  else:
224
- return gr.update(visible=True, value="❌ Bad credentials!"), None, None
225
 
226
  login_btn.click(
227
  check_creds,
228
- inputs=[user_in, pass_in],
229
- outputs=[login_msg, login_box, is_auth]
230
  )
231
 
232
- # ————————
233
- # MAIN APP (hidden until auth)
234
- # ————————
235
- with gr.Column(visible=is_auth.value) as app_box:
236
- # your logo + title + everything else goes here…
237
- with gr.Row():
238
- # embed your base64 logo exactly where you want
239
- gr.HTML(
240
- f'<img src="data:image/png;base64,{logo_b64}" '
241
- 'style="pointer-events:none; user-select:none; width:200px; display:block;" />'
242
- )
243
- gr.Markdown("# ISCO Annotation", elem_id="isco-title")
244
-
245
- # then all your States, buttons, radios, callbacks…
246
- idx_state = gr.State(0)
247
- record_md = gr.Markdown()
248
- status_md = gr.Markdown()
249
- prev_btn = gr.Button("⬅ Previous")
250
- next_btn = gr.Button("✅ Next")
251
- major_radio = gr.Radio(label="Major", choices=[], interactive=True)
252
- sub_radio = gr.Radio(label="Sub-major", choices=[], interactive=True)
253
- minor_radio = gr.Radio(label="Minor", choices=[], interactive=True)
254
- unit_radio = gr.Radio(label="Unit", choices=[], interactive=True)
255
- download_btn = gr.Button("📥 Download")
256
- download_file= gr.File(visible=False)
257
-
258
- # wire up your existing load_record, on_change, save_and_jump, etc.
259
- demo.load(lambda: (0,) + load_record(0),
260
- outputs=[idx_state, record_md, status_md,
261
- major_radio, sub_radio, minor_radio, unit_radio])
262
- next_btn.click(lambda i: save_and_jump(i, "next"),
263
- inputs=[idx_state],
264
- outputs=[idx_state, record_md, status_md,
265
- major_radio, sub_radio, minor_radio, unit_radio])
266
- # … and so on for prev_btn, radio.change handlers, download_btn …
267
-
268
- # hide Gradio footer & share links:
269
- gr.HTML("""
270
- <style>
271
- footer { display: none !important; }
272
- .gradio-container .api-link,
273
- .gradio-container .share-link { display: none !important; }
274
- #isco-title { text-align: center; margin-top: 1em; }
275
- </style>
276
- """)
277
 
278
  return demo
279
 
280
- if __name__ == "__main__":
281
  demo = build_gradio_app()
282
- demo.queue().launch(show_api=False, share=True, server_name="0.0.0.0")
 
1
+ # app.py
2
+ import os, base64
3
  import gradio as gr
4
  import pandas as pd
5
  from pathlib import Path
6
  from typing import Dict, List, Tuple
 
 
7
 
8
+ # --- load & encode logo so it can't be clicked/downloaded ---
9
+ LOGO_PATH = Path("data/asset/rowsquared-logo-large.png")
10
+ with open(LOGO_PATH, "rb") as f:
11
+ logo_b64 = base64.b64encode(f.read()).decode()
12
 
13
+ # --- load your ISCO hierarchy and build nested dict (as before) ---
14
+ PROCESSED_DATA_DIR = Path("./data/processed")
 
 
 
 
 
 
 
 
 
 
15
  df_isco = (
16
  pd.read_excel(
17
  PROCESSED_DATA_DIR / "isco_imperfect.xlsx",
18
  converters={"major": str, "sub_major": str, "minor": str, "unit": str},
19
+ )[["major_label","sub_major_label","minor_label","unit_label"]]
20
+ .dropna().drop_duplicates().reset_index(drop=True)
 
 
21
  )
22
+ hierarchy: Dict[str,Dict[str,Dict[str,List[str]]]] = {}
 
 
23
  for _, r in df_isco.iterrows():
24
+ hierarchy.setdefault(r.major_label, {})\
25
+ .setdefault(r.sub_major_label, {})\
26
+ .setdefault(r.minor_label, [])\
27
  .append(r.unit_label)
28
+ # dedupe & sort units
 
29
  for maj in hierarchy:
30
  for sub in hierarchy[maj]:
31
  for mn in hierarchy[maj][sub]:
32
+ hierarchy[maj][sub][mn] = sorted(dict.fromkeys(hierarchy[maj][sub][mn]))
33
+
34
+ def majors(): return sorted(hierarchy.keys())
35
+ def submajors(m):return sorted(hierarchy.get(m,{}).keys())
36
+ def minors(m,s): return sorted(hierarchy.get(m,{}).get(s,{}).keys())
37
+ def units(m,s,n):return hierarchy.get(m,{}).get(s,{}).get(n,[])
38
+
39
+ # --- load your records (same as before) ---
40
+ records = pd.read_excel(PROCESSED_DATA_DIR/"isco_predictions.xlsx").copy()
41
+ for c in ["major_label","sub_major_label","minor_label","unit_label"]:
42
+ if c not in records: records[c]=""
43
+ if "annotated" not in records: records["annotated"]=False
44
+ records.reset_index(drop=True,inplace=True)
45
+
46
+ # --- clamp + save + load + handlers (copy in your code) ---
47
+ def clamp_path(maj,sub,mn,un):
48
+ maj_c = majors()
49
+ if maj not in maj_c: maj = maj_c[0] if maj_c else ""
50
+ sub_c = submajors(maj)
51
+ if sub not in sub_c: sub = sub_c[0] if sub_c else ""
52
+ mn_c = minors(maj,sub)
53
+ if mn not in mn_c: mn = mn_c[0] if mn_c else ""
54
+ un_c = units(maj,sub,mn)
55
+ if un not in un_c: un = un_c[0] if un_c else ""
56
+ return maj,sub,mn,un, maj_c,sub_c,mn_c,un_c
57
+
58
+ def save_record(i,maj,sub,mn,un):
59
+ records.loc[i,["major_label","sub_major_label","minor_label","unit_label"]] = [maj,sub,mn,un]
60
+ records.loc[i,"annotated"] = True
61
+
62
+ def status_text(i):
63
+ return f"**Status**: {'✅ Annotated' if records.loc[i,'annotated'] else '❌ Not Annotated'}"
64
+
65
+ def load_record(i):
66
+ r = records.loc[i]
67
+ maj,sub,mn,un,maj_c,sub_c,mn_c,un_c = clamp_path(
68
+ r.major_label, r.sub_major_label, r.minor_label, r.unit_label
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  )
70
+ save_record(i, maj,sub,mn,un)
71
+ md = f"## Occupation: {r.occupation_title_main}\n## Industry: {r.industry_title_main}"
 
 
72
  return (
73
+ md,
74
  status_text(i),
75
  gr.update(choices=maj_c, value=maj),
76
  gr.update(choices=sub_c, value=sub),
 
78
  gr.update(choices=un_c, value=un),
79
  )
80
 
81
+ def on_major_change(new_maj,i):
82
+ sub_c = submajors(new_maj); sub = sub_c[0] if sub_c else ""
83
+ mn_c = minors(new_maj,sub); mn = mn_c[0] if mn_c else ""
84
+ un_c = units(new_maj,sub,mn); un = un_c[0] if un_c else ""
85
+ save_record(i,new_maj,sub,mn,un)
 
 
 
 
 
 
86
  return (
87
+ gr.update(choices=majors(),value=new_maj),
88
+ gr.update(choices=sub_c,value=sub),
89
+ gr.update(choices=mn_c,value=mn),
90
+ gr.update(choices=un_c,value=un),
91
  status_text(i),
92
  )
93
 
94
+ def on_sub_change(new_sub,i,maj):
95
+ mn_c = minors(maj,new_sub); mn = mn_c[0] if mn_c else ""
96
+ un_c = units(maj,new_sub,mn); un = un_c[0] if un_c else ""
97
+ records.loc[i,["sub_major_label","minor_label","unit_label"]] = [new_sub,mn,un]
98
+ records.loc[i,"annotated"]=True
 
 
99
  return (
100
+ gr.update(choices=submajors(maj),value=new_sub),
101
+ gr.update(choices=mn_c,value=mn),
102
+ gr.update(choices=un_c,value=un),
103
  status_text(i),
104
  )
105
 
106
+ def on_minor_change(new_mn,i,maj,sub):
107
+ un_c = units(maj,sub,new_mn); un = un_c[0] if un_c else ""
108
+ records.loc[i,["minor_label","unit_label"]]=[new_mn,un]
109
+ records.loc[i,"annotated"]=True
 
110
  return (
111
+ gr.update(choices=minors(maj,sub),value=new_mn),
112
+ gr.update(choices=un_c,value=un),
113
  status_text(i),
114
  )
115
 
116
+ def on_unit_change(new_un,i,maj,sub,mn):
117
+ un_c=units(maj,sub,mn)
118
+ if new_un not in un_c: new_un = un_c[0] if un_c else ""
119
+ records.loc[i,"unit_label"]=new_un
120
+ records.loc[i,"annotated"]=True
121
+ return gr.update(choices=un_c,value=new_un), status_text(i)
122
+
123
+ def go_next(i): return (i+1)%len(records)
124
+ def go_prev(i): return (i-1)%len(records)
125
+
126
+ def save_and_jump(i,dir):
127
+ # clamp and save first
128
+ r=records.loc[i]
129
+ maj,sub,mn,un,*_ = clamp_path(r.major_label, r.sub_major_label, r.minor_label, r.unit_label)
130
+ save_record(i,maj,sub,mn,un)
131
+ j = go_next(i) if dir=="next" else go_prev(i)
132
+ return (j,)+load_record(j)
133
+
134
+ def download_csv():
135
+ path = PROCESSED_DATA_DIR/"annotated_output.csv"
136
+ records.to_csv(path,index=False)
 
 
 
 
 
 
 
 
137
  return str(path)
138
 
139
+ # --- now build the Blocks with a login gate ---
 
 
140
  def build_gradio_app():
141
+ USER = os.getenv("APP_USER","")
142
+ PWD = os.getenv("APP_PASS","")
 
143
 
144
+ with gr.Blocks() as demo:
145
+ gr.HTML(f"""<style>
146
+ footer, .share-link, .api-link {{display:none!important}}
147
+ #logo {{margin-bottom:1em}}
148
+ #title{{text-align:center;margin-bottom:1em}}
149
+ </style>""")
150
+
151
+ # login panel
152
+ with gr.Column(elem_id="login_panel"):
153
+ gr.HTML(f'<img id="logo" src="data:image/png;base64,{logo_b64}" width="180" '
154
+ 'style="pointer-events:none;user-select:none;display:block;margin:auto;" />')
155
  user_in = gr.Textbox(label="Username")
156
  pass_in = gr.Textbox(label="Password", type="password")
157
+ login_btn= gr.Button("🔒 Log in")
158
  login_msg= gr.Markdown("", visible=False)
159
 
160
+ # main app panel (hidden until auth)
161
+ with gr.Column(elem_id="app_panel", visible=False):
162
+ gr.HTML(f'<img id="logo" src="data:image/png;base64,{logo_b64}" width="180" '
163
+ 'style="pointer-events:none;user-select:none;display:block;margin:auto;" />')
164
+ gr.Markdown("# ISCO Annotation", elem_id="title")
165
+ idx_state = gr.State(0)
166
+ record_md = gr.Markdown()
167
+ status_md = gr.Markdown()
168
+ with gr.Row():
169
+ prev_btn = gr.Button("⬅ Previous")
170
+ next_btn = gr.Button("✅ Next")
171
+ with gr.Row():
172
+ major_radio = gr.Radio("Level 1: Major", choices=[], interactive=True)
173
+ sub_radio = gr.Radio("Level 2: Sub-major", choices=[], interactive=True)
174
+ minor_radio = gr.Radio("Level 3: Minor", choices=[], interactive=True)
175
+ unit_radio = gr.Radio("Level 4: Unit", choices=[], interactive=True)
176
+ download_btn = gr.Button("📥 Download CSV")
177
+ download_file = gr.File(visible=False)
178
+
179
+ # login logic
180
+ def check_creds(u,p):
181
+ if u==USER and p==PWD:
182
+ return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), ""
183
  else:
184
+ return None, None, gr.update(visible=True,value="❌ Bad credentials"), ""
185
 
186
  login_btn.click(
187
  check_creds,
188
+ inputs=[user_in,pass_in],
189
+ outputs=[login_panel, app_panel, login_msg, user_in]
190
  )
191
 
192
+ # app logic wiring
193
+ demo.load(lambda: (0,)+load_record(0),
194
+ outputs=[idx_state,record_md,status_md,
195
+ major_radio,sub_radio,minor_radio,unit_radio])
196
+
197
+ next_btn.click(lambda i: save_and_jump(i,"next"),
198
+ inputs=[idx_state],
199
+ outputs=[idx_state,record_md,status_md,
200
+ major_radio,sub_radio,minor_radio,unit_radio])
201
+ prev_btn.click(lambda i: save_and_jump(i,"prev"),
202
+ inputs=[idx_state],
203
+ outputs=[idx_state,record_md,status_md,
204
+ major_radio,sub_radio,minor_radio,unit_radio])
205
+
206
+ major_radio.change(on_major_change,
207
+ inputs=[major_radio,idx_state],
208
+ outputs=[major_radio,sub_radio,minor_radio,unit_radio,status_md])
209
+ sub_radio.change(on_sub_change,
210
+ inputs=[sub_radio,idx_state,major_radio],
211
+ outputs=[sub_radio,minor_radio,unit_radio,status_md])
212
+ minor_radio.change(on_minor_change,
213
+ inputs=[minor_radio,idx_state,major_radio,sub_radio],
214
+ outputs=[minor_radio,unit_radio,status_md])
215
+ unit_radio.change(on_unit_change,
216
+ inputs=[unit_radio,idx_state,major_radio,sub_radio,minor_radio],
217
+ outputs=[unit_radio,status_md])
218
+
219
+ download_btn.click(download_csv, outputs=[download_file]).then(
220
+ lambda: gr.update(visible=True), None, [download_file]
221
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
 
223
  return demo
224
 
225
+ if __name__=="__main__":
226
  demo = build_gradio_app()
227
+ demo.queue().launch(show_api=False, share=True)