User-Interface_v1.0 / src /streamlit_app.py
ErikDaska's picture
Update src/streamlit_app.py
a6a5f47 verified
import streamlit as st
from transformers import pipeline
import os
# ---------------- CONFIG ----------------
st.set_page_config(page_title="Kriolu AI Hub", layout="wide")
token = os.environ.get("token")
# ---------------- CACHE ----------------
@st.cache_resource
def load_pipeline(task, model_path, **kwargs):
return pipeline(task, model=model_path, tokenizer=model_path, token=token, **kwargs)
# ---------------- DECODER ----------------
def instantiate_gpt2(model_name, max_length_, num_return_sequences, text):
model_path = f'Iscte-Sintra/{model_name}'
pipe = load_pipeline("text-generation", model_path)
return pipe(
text,
max_new_tokens=max_length_,
num_return_sequences=num_return_sequences,
do_sample=True,
top_p=0.95,
top_k=50
)
# ---------------- ENCODER ----------------
def instantiate_encoder(model_name, top_k, text):
pipe = load_pipeline("fill-mask", f"Iscte-Sintra/{model_name}")
return pipe(text, top_k=top_k)
# ---------------- TRANSLATION ----------------
def instantiate_translation_model(model_name, text, src_lg, tgt_lg):
model_path = f'Iscte-Sintra/{model_name}'
# ---- NLLB ----
if "nllb" in model_name:
pipe = pipeline(
"translation",
model=model_path,
tokenizer=model_path,
token=token,
src_lang=src_lg,
tgt_lang=tgt_lg
)
return pipe(text)[0]["translation_text"]
# ---- M2M100 ----
elif "m2m100" in model_name:
pipe = load_pipeline("translation", model_path)
# 1. Definimos a língua de origem
# Em modelos customizados, as vezes o src_lang precisa ser o token completo
pipe.tokenizer.src_lang = src_lg
# 2. Pegamos o ID numérico do token de destino (ex: __pt__)
# Usamos convert_tokens_to_ids porque ele ignora a lógica interna de busca de idiomas
tgt_lang_id = pipe.tokenizer.convert_tokens_to_ids(tgt_lg)
if tgt_lang_id == pipe.tokenizer.unk_token_id:
st.error(f"Erro: O token {tgt_lg} não foi encontrado no vocabulário do modelo!")
return None
# 3. Executamos a tradução forçando o ID de início de frase (BOS)
result = pipe(
text,
forced_bos_token_id=tgt_lang_id
)
return result[0]["translation_text"]
# ---- MBART ----
else:
pipe = pipeline(
"translation",
model=model_path,
tokenizer=model_path,
token=token,
src_lang=src_lg,
tgt_lang=tgt_lg
)
return pipe(text)[0]["translation_text"]
# ---------------- UI: TRANSLATION ----------------
def build_translation_page(model_name):
st.title(f"🌍 {model_name}: Tradução")
if "nllb" in model_name:
lang_map = {
"Português": "por_Latn",
"Kabuverdianu": "kea_Latn"
}
elif "m2m100" in model_name:
lang_map = {
"Português": "__pt__",
"Kabuverdianu": "__en__" # Proxying kea as __en__
}
else: # mBART
lang_map = {
"Português": "pt_XX",
"Kabuverdianu": "en_XX"
}
col1, col2 = st.columns(2)
with col1:
src_label = st.selectbox("Língua de Origem", list(lang_map.keys()))
with col2:
tgt_label = st.selectbox("Língua de Destino", list(lang_map.keys()))
text = st.text_area("Texto de entrada", "Katxór sta trás di pórta.", height=100)
if st.button("Traduzir"):
if not text.strip():
st.warning("Introduza texto!")
return
with st.spinner("A traduzir..."):
try:
result = instantiate_translation_model(
model_name,
text,
lang_map[src_label],
lang_map[tgt_label]
)
st.success("Resultado:")
st.write(result)
except Exception as e:
st.error(f"Erro: {e}")
# ---------------- UI: DECODER ----------------
def build_decoder_page(model_name):
st.title(f"✍️ {model_name}: Geração de Texto")
max_length = st.sidebar.slider("Máximo de Tokens", 10, 200, 50)
num_seq = st.sidebar.number_input("Sequências", 1, 5, 1)
text = st.text_area("Prompt", "Katxór sta trás di pórta.")
if st.button("Gerar"):
with st.spinner("A processar..."):
try:
results = instantiate_gpt2(model_name, max_length, num_seq, text)
for res in results:
st.info(res["generated_text"])
except Exception as e:
st.error(f"Erro: {e}")
# ---------------- UI: ENCODER ----------------
def build_encoder_page(model_name):
st.title(f"🔍 {model_name}: Fill-Mask")
top_k = st.sidebar.slider("Top K sugestões", 1, 5, 3)
mask_token = "<mask>" if "RoBERTa" in model_name else "[MASK]"
st.write(f"Use o token **{mask_token}** para a palavra em falta.")
input_text = st.text_input("Frase", f"Katxór sta trás di {mask_token}.")
if st.button("Prever"):
try:
results = instantiate_encoder(model_name, top_k, input_text)
for res in results:
st.write(f"✅ **{res['token_str']}** ({res['score']:.2%})")
except Exception:
st.error(f"Certifique-se que usou o token {mask_token}")
# ---------------- MAIN ----------------
model_dict = {
"RoBERTa-Kriolu": "Encoder",
"GPT2_v1.18": "Decoder",
"LLM-kea-v1.0": "Decoder",
"Modelo-Traducao-kea-ptpt-v1.0": "Encoder-Decoder",
"nllb-v1.0": "Encoder-Decoder",
"m2m100-v1.0": "Encoder-Decoder",
"mbart-v2.0": "Encoder-Decoder",
"m2m100-v2.0": "Encoder-Decoder",
"mbart-v2.1": "Encoder-Decoder",
"nllb-v2.0": "Encoder-Decoder",
}
selected_model = st.sidebar.selectbox("Escolha o Modelo", list(model_dict.keys()))
arch = model_dict[selected_model]
if arch == "Encoder":
build_encoder_page(selected_model)
elif arch == "Encoder-Decoder":
build_translation_page(selected_model)
else:
build_decoder_page(selected_model)