rodrigofbatista commited on
Commit
1daa700
·
verified ·
1 Parent(s): 705a07a

Updated model's, and added staff

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +38 -12
src/streamlit_app.py CHANGED
@@ -68,14 +68,17 @@ st.markdown("""
68
  """, unsafe_allow_html=True)
69
 
70
  # ==================== MODEL LOADING ====================
 
 
 
 
 
71
  @st.cache_resource
72
- def load_model():
73
  """Loads the model and tokenizer (cached to avoid reloading)"""
74
- MODEL_NAME = "liaad/Citilink-BERTimbau-large-metadata-pt-baseline"
75
-
76
  try:
77
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
78
- model = AutoModelForTokenClassification.from_pretrained(MODEL_NAME)
79
  model.eval()
80
  return tokenizer, model
81
  except Exception as e:
@@ -216,6 +219,9 @@ def translate_entity_label(label):
216
  "PARTICIPANTE-VEREADOR-PRESENTE": ("👥", "COUNCILOR - PRESENT"),
217
  "PARTICIPANTE-VEREADOR-AUSENTE": ("👥", "COUNCILOR - ABSENT"),
218
  "PARTICIPANTE-VEREADOR-SUBSTITUIDO": ("👥", "COUNCILOR - SUBSTITUTED"),
 
 
 
219
  }
220
 
221
  # Check if exact translation exists
@@ -239,7 +245,8 @@ def get_entity_style(label):
239
  "TIPO-REUNIAO": "#E67E22", # Orange
240
  "HORARIO": "#F39C12", # Yellow-orange
241
  "PARTICIPANTE-PRESIDENTE": "#8E44AD", # Purple for President
242
- "PARTICIPANTE-VEREADOR": "#27AE60" # Green for Councilors
 
243
  }
244
 
245
  # Border styles for different attendance states
@@ -273,6 +280,15 @@ def get_entity_style(label):
273
  border_style = border_styles["AUSENTE"]
274
  elif "SUBSTITUIDO" in label:
275
  border_style = border_styles["SUBSTITUIDO"]
 
 
 
 
 
 
 
 
 
276
  else:
277
  # For other entities, use specific colors
278
  base_category = label.split("-")[0]
@@ -306,6 +322,16 @@ def display_entities_compact(entities_dict):
306
  # ==================== SIDEBAR ====================
307
  with st.sidebar:
308
  st.markdown("### ⚙️ Configuration")
 
 
 
 
 
 
 
 
 
 
309
  st.markdown("Choose an example or enter your own text:")
310
 
311
  selected_example = st.selectbox(
@@ -317,10 +343,10 @@ with st.sidebar:
317
 
318
  # About Section
319
  st.markdown("### 📋 About")
320
- st.markdown("""
321
  **MiNER Stage 2** uses Named Entity Recognition models to automatically extract metadata from meeting minutes.
322
 
323
- - **Model**: BERTimbau fine-tuned
324
  - **Languages**: Portuguese
325
  - **Method**: Token Classification (NER) with BIO tagging
326
  """)
@@ -329,9 +355,9 @@ with st.sidebar:
329
 
330
  # Resources Section
331
  st.markdown("### 🔗 Resources")
332
- st.markdown("""
333
  - [GitHub Repository](https://github.com/LIAAD/MiNER)
334
- - [Model](https://huggingface.co/liaad/Citilink-BERTimbau-large-metadata-pt-baseline)
335
  """)
336
 
337
  # ==================== MAIN CONTENT ====================
@@ -412,8 +438,8 @@ No Edifício Sede do Município de Alandroal, o Senhor Presidente da Câmara Mun
412
  """)
413
 
414
  # Load model
415
- with st.spinner("Loading model..."):
416
- tokenizer, model = load_model()
417
 
418
  if tokenizer is None or model is None:
419
  st.error("❌ Failed to load model. Please check if the model path is correct.")
 
68
  """, unsafe_allow_html=True)
69
 
70
  # ==================== MODEL LOADING ====================
71
+ MODEL_OPTIONS = {
72
+ "Metadata Baseline": "liaad/Citilink-BERTimbau-large-metadata-pt-baseline",
73
+ "Metadata Augmented": "liaad/Citilink-BERTimbau-large-Metadata-pt",
74
+ }
75
+
76
  @st.cache_resource
77
+ def load_model(model_name: str):
78
  """Loads the model and tokenizer (cached to avoid reloading)"""
 
 
79
  try:
80
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
81
+ model = AutoModelForTokenClassification.from_pretrained(model_name)
82
  model.eval()
83
  return tokenizer, model
84
  except Exception as e:
 
219
  "PARTICIPANTE-VEREADOR-PRESENTE": ("👥", "COUNCILOR - PRESENT"),
220
  "PARTICIPANTE-VEREADOR-AUSENTE": ("👥", "COUNCILOR - ABSENT"),
221
  "PARTICIPANTE-VEREADOR-SUBSTITUIDO": ("👥", "COUNCILOR - SUBSTITUTED"),
222
+ "PARTICIPANTE-FUNCIONARIO-PRESENTE": ("🗂️", "STAFF - PRESENT"),
223
+ "PARTICIPANTE-FUNCIONARIO-AUSENTE": ("🗂️", "STAFF - ABSENT"),
224
+ "PARTICIPANTE-FUNCIONARIO-SUBSTITUIDO": ("🗂️", "STAFF - SUBSTITUTED"),
225
  }
226
 
227
  # Check if exact translation exists
 
245
  "TIPO-REUNIAO": "#E67E22", # Orange
246
  "HORARIO": "#F39C12", # Yellow-orange
247
  "PARTICIPANTE-PRESIDENTE": "#8E44AD", # Purple for President
248
+ "PARTICIPANTE-VEREADOR": "#27AE60", # Green for Councilors
249
+ "PARTICIPANTE-FUNCIONARIO": "#1A6B8A" # Steel blue for Staff
250
  }
251
 
252
  # Border styles for different attendance states
 
280
  border_style = border_styles["AUSENTE"]
281
  elif "SUBSTITUIDO" in label:
282
  border_style = border_styles["SUBSTITUIDO"]
283
+ elif "PARTICIPANTE-FUNCIONARIO" in label:
284
+ color = colors["PARTICIPANTE-FUNCIONARIO"]
285
+ # Determine border style
286
+ if "PRESENTE" in label:
287
+ border_style = border_styles["PRESENTE"]
288
+ elif "AUSENTE" in label:
289
+ border_style = border_styles["AUSENTE"]
290
+ elif "SUBSTITUIDO" in label:
291
+ border_style = border_styles["SUBSTITUIDO"]
292
  else:
293
  # For other entities, use specific colors
294
  base_category = label.split("-")[0]
 
322
  # ==================== SIDEBAR ====================
323
  with st.sidebar:
324
  st.markdown("### ⚙️ Configuration")
325
+
326
+ # Model selection
327
+ selected_model_label = st.selectbox(
328
+ "🤖 Select Model",
329
+ options=list(MODEL_OPTIONS.keys())
330
+ )
331
+ selected_model_name = MODEL_OPTIONS[selected_model_label]
332
+
333
+ st.markdown("---")
334
+
335
  st.markdown("Choose an example or enter your own text:")
336
 
337
  selected_example = st.selectbox(
 
343
 
344
  # About Section
345
  st.markdown("### 📋 About")
346
+ st.markdown(f"""
347
  **MiNER Stage 2** uses Named Entity Recognition models to automatically extract metadata from meeting minutes.
348
 
349
+ - **Model**: {selected_model_label}
350
  - **Languages**: Portuguese
351
  - **Method**: Token Classification (NER) with BIO tagging
352
  """)
 
355
 
356
  # Resources Section
357
  st.markdown("### 🔗 Resources")
358
+ st.markdown(f"""
359
  - [GitHub Repository](https://github.com/LIAAD/MiNER)
360
+ - [Model on HuggingFace](https://huggingface.co/{selected_model_name})
361
  """)
362
 
363
  # ==================== MAIN CONTENT ====================
 
438
  """)
439
 
440
  # Load model
441
+ with st.spinner(f"Loading model **{selected_model_label}**..."):
442
+ tokenizer, model = load_model(selected_model_name)
443
 
444
  if tokenizer is None or model is None:
445
  st.error("❌ Failed to load model. Please check if the model path is correct.")