Updated model's, and added staff
Browse files- src/streamlit_app.py +38 -12
src/streamlit_app.py
CHANGED
|
@@ -68,14 +68,17 @@ st.markdown("""
|
|
| 68 |
""", unsafe_allow_html=True)
|
| 69 |
|
| 70 |
# ==================== MODEL LOADING ====================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
@st.cache_resource
|
| 72 |
-
def load_model():
|
| 73 |
"""Loads the model and tokenizer (cached to avoid reloading)"""
|
| 74 |
-
MODEL_NAME = "liaad/Citilink-BERTimbau-large-metadata-pt-baseline"
|
| 75 |
-
|
| 76 |
try:
|
| 77 |
-
tokenizer = AutoTokenizer.from_pretrained(
|
| 78 |
-
model = AutoModelForTokenClassification.from_pretrained(
|
| 79 |
model.eval()
|
| 80 |
return tokenizer, model
|
| 81 |
except Exception as e:
|
|
@@ -216,6 +219,9 @@ def translate_entity_label(label):
|
|
| 216 |
"PARTICIPANTE-VEREADOR-PRESENTE": ("👥", "COUNCILOR - PRESENT"),
|
| 217 |
"PARTICIPANTE-VEREADOR-AUSENTE": ("👥", "COUNCILOR - ABSENT"),
|
| 218 |
"PARTICIPANTE-VEREADOR-SUBSTITUIDO": ("👥", "COUNCILOR - SUBSTITUTED"),
|
|
|
|
|
|
|
|
|
|
| 219 |
}
|
| 220 |
|
| 221 |
# Check if exact translation exists
|
|
@@ -239,7 +245,8 @@ def get_entity_style(label):
|
|
| 239 |
"TIPO-REUNIAO": "#E67E22", # Orange
|
| 240 |
"HORARIO": "#F39C12", # Yellow-orange
|
| 241 |
"PARTICIPANTE-PRESIDENTE": "#8E44AD", # Purple for President
|
| 242 |
-
"PARTICIPANTE-VEREADOR": "#27AE60"
|
|
|
|
| 243 |
}
|
| 244 |
|
| 245 |
# Border styles for different attendance states
|
|
@@ -273,6 +280,15 @@ def get_entity_style(label):
|
|
| 273 |
border_style = border_styles["AUSENTE"]
|
| 274 |
elif "SUBSTITUIDO" in label:
|
| 275 |
border_style = border_styles["SUBSTITUIDO"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 276 |
else:
|
| 277 |
# For other entities, use specific colors
|
| 278 |
base_category = label.split("-")[0]
|
|
@@ -306,6 +322,16 @@ def display_entities_compact(entities_dict):
|
|
| 306 |
# ==================== SIDEBAR ====================
|
| 307 |
with st.sidebar:
|
| 308 |
st.markdown("### ⚙️ Configuration")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 309 |
st.markdown("Choose an example or enter your own text:")
|
| 310 |
|
| 311 |
selected_example = st.selectbox(
|
|
@@ -317,10 +343,10 @@ with st.sidebar:
|
|
| 317 |
|
| 318 |
# About Section
|
| 319 |
st.markdown("### 📋 About")
|
| 320 |
-
st.markdown("""
|
| 321 |
**MiNER Stage 2** uses Named Entity Recognition models to automatically extract metadata from meeting minutes.
|
| 322 |
|
| 323 |
-
- **Model**:
|
| 324 |
- **Languages**: Portuguese
|
| 325 |
- **Method**: Token Classification (NER) with BIO tagging
|
| 326 |
""")
|
|
@@ -329,9 +355,9 @@ with st.sidebar:
|
|
| 329 |
|
| 330 |
# Resources Section
|
| 331 |
st.markdown("### 🔗 Resources")
|
| 332 |
-
st.markdown("""
|
| 333 |
- [GitHub Repository](https://github.com/LIAAD/MiNER)
|
| 334 |
-
- [Model](https://huggingface.co/
|
| 335 |
""")
|
| 336 |
|
| 337 |
# ==================== MAIN CONTENT ====================
|
|
@@ -412,8 +438,8 @@ No Edifício Sede do Município de Alandroal, o Senhor Presidente da Câmara Mun
|
|
| 412 |
""")
|
| 413 |
|
| 414 |
# Load model
|
| 415 |
-
with st.spinner("Loading model..."):
|
| 416 |
-
tokenizer, model = load_model()
|
| 417 |
|
| 418 |
if tokenizer is None or model is None:
|
| 419 |
st.error("❌ Failed to load model. Please check if the model path is correct.")
|
|
|
|
| 68 |
""", unsafe_allow_html=True)
|
| 69 |
|
| 70 |
# ==================== MODEL LOADING ====================
|
| 71 |
+
MODEL_OPTIONS = {
|
| 72 |
+
"Metadata Baseline": "liaad/Citilink-BERTimbau-large-metadata-pt-baseline",
|
| 73 |
+
"Metadata Augmented": "liaad/Citilink-BERTimbau-large-Metadata-pt",
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
@st.cache_resource
|
| 77 |
+
def load_model(model_name: str):
|
| 78 |
"""Loads the model and tokenizer (cached to avoid reloading)"""
|
|
|
|
|
|
|
| 79 |
try:
|
| 80 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 81 |
+
model = AutoModelForTokenClassification.from_pretrained(model_name)
|
| 82 |
model.eval()
|
| 83 |
return tokenizer, model
|
| 84 |
except Exception as e:
|
|
|
|
| 219 |
"PARTICIPANTE-VEREADOR-PRESENTE": ("👥", "COUNCILOR - PRESENT"),
|
| 220 |
"PARTICIPANTE-VEREADOR-AUSENTE": ("👥", "COUNCILOR - ABSENT"),
|
| 221 |
"PARTICIPANTE-VEREADOR-SUBSTITUIDO": ("👥", "COUNCILOR - SUBSTITUTED"),
|
| 222 |
+
"PARTICIPANTE-FUNCIONARIO-PRESENTE": ("🗂️", "STAFF - PRESENT"),
|
| 223 |
+
"PARTICIPANTE-FUNCIONARIO-AUSENTE": ("🗂️", "STAFF - ABSENT"),
|
| 224 |
+
"PARTICIPANTE-FUNCIONARIO-SUBSTITUIDO": ("🗂️", "STAFF - SUBSTITUTED"),
|
| 225 |
}
|
| 226 |
|
| 227 |
# Check if exact translation exists
|
|
|
|
| 245 |
"TIPO-REUNIAO": "#E67E22", # Orange
|
| 246 |
"HORARIO": "#F39C12", # Yellow-orange
|
| 247 |
"PARTICIPANTE-PRESIDENTE": "#8E44AD", # Purple for President
|
| 248 |
+
"PARTICIPANTE-VEREADOR": "#27AE60", # Green for Councilors
|
| 249 |
+
"PARTICIPANTE-FUNCIONARIO": "#1A6B8A" # Steel blue for Staff
|
| 250 |
}
|
| 251 |
|
| 252 |
# Border styles for different attendance states
|
|
|
|
| 280 |
border_style = border_styles["AUSENTE"]
|
| 281 |
elif "SUBSTITUIDO" in label:
|
| 282 |
border_style = border_styles["SUBSTITUIDO"]
|
| 283 |
+
elif "PARTICIPANTE-FUNCIONARIO" in label:
|
| 284 |
+
color = colors["PARTICIPANTE-FUNCIONARIO"]
|
| 285 |
+
# Determine border style
|
| 286 |
+
if "PRESENTE" in label:
|
| 287 |
+
border_style = border_styles["PRESENTE"]
|
| 288 |
+
elif "AUSENTE" in label:
|
| 289 |
+
border_style = border_styles["AUSENTE"]
|
| 290 |
+
elif "SUBSTITUIDO" in label:
|
| 291 |
+
border_style = border_styles["SUBSTITUIDO"]
|
| 292 |
else:
|
| 293 |
# For other entities, use specific colors
|
| 294 |
base_category = label.split("-")[0]
|
|
|
|
| 322 |
# ==================== SIDEBAR ====================
|
| 323 |
with st.sidebar:
|
| 324 |
st.markdown("### ⚙️ Configuration")
|
| 325 |
+
|
| 326 |
+
# Model selection
|
| 327 |
+
selected_model_label = st.selectbox(
|
| 328 |
+
"🤖 Select Model",
|
| 329 |
+
options=list(MODEL_OPTIONS.keys())
|
| 330 |
+
)
|
| 331 |
+
selected_model_name = MODEL_OPTIONS[selected_model_label]
|
| 332 |
+
|
| 333 |
+
st.markdown("---")
|
| 334 |
+
|
| 335 |
st.markdown("Choose an example or enter your own text:")
|
| 336 |
|
| 337 |
selected_example = st.selectbox(
|
|
|
|
| 343 |
|
| 344 |
# About Section
|
| 345 |
st.markdown("### 📋 About")
|
| 346 |
+
st.markdown(f"""
|
| 347 |
**MiNER Stage 2** uses Named Entity Recognition models to automatically extract metadata from meeting minutes.
|
| 348 |
|
| 349 |
+
- **Model**: {selected_model_label}
|
| 350 |
- **Languages**: Portuguese
|
| 351 |
- **Method**: Token Classification (NER) with BIO tagging
|
| 352 |
""")
|
|
|
|
| 355 |
|
| 356 |
# Resources Section
|
| 357 |
st.markdown("### 🔗 Resources")
|
| 358 |
+
st.markdown(f"""
|
| 359 |
- [GitHub Repository](https://github.com/LIAAD/MiNER)
|
| 360 |
+
- [Model on HuggingFace](https://huggingface.co/{selected_model_name})
|
| 361 |
""")
|
| 362 |
|
| 363 |
# ==================== MAIN CONTENT ====================
|
|
|
|
| 438 |
""")
|
| 439 |
|
| 440 |
# Load model
|
| 441 |
+
with st.spinner(f"Loading model **{selected_model_label}**..."):
|
| 442 |
+
tokenizer, model = load_model(selected_model_name)
|
| 443 |
|
| 444 |
if tokenizer is None or model is None:
|
| 445 |
st.error("❌ Failed to load model. Please check if the model path is correct.")
|