| | import streamlit as st |
| | from transformers import pipeline |
| | import os |
| |
|
| | |
| | st.set_page_config(page_title="Kriolu AI Hub", layout="wide") |
| | token = os.environ.get("token") |
| |
|
| | |
| | @st.cache_resource |
| | def load_pipeline(task, model_path, **kwargs): |
| | return pipeline(task, model=model_path, tokenizer=model_path, token=token, **kwargs) |
| |
|
| | |
| | def instantiate_gpt2(model_name, max_length_, num_return_sequences, text): |
| | model_path = f'Iscte-Sintra/{model_name}' |
| | pipe = load_pipeline("text-generation", model_path) |
| |
|
| | return pipe( |
| | text, |
| | max_new_tokens=max_length_, |
| | num_return_sequences=num_return_sequences, |
| | do_sample=True, |
| | top_p=0.95, |
| | top_k=50 |
| | ) |
| |
|
| | |
| | def instantiate_encoder(model_name, top_k, text): |
| | pipe = load_pipeline("fill-mask", f"Iscte-Sintra/{model_name}") |
| | return pipe(text, top_k=top_k) |
| |
|
| | |
| | def instantiate_translation_model(model_name, text, src_lg, tgt_lg): |
| | model_path = f'Iscte-Sintra/{model_name}' |
| |
|
| | |
| | if "nllb" in model_name: |
| | pipe = pipeline( |
| | "translation", |
| | model=model_path, |
| | tokenizer=model_path, |
| | token=token, |
| | src_lang=src_lg, |
| | tgt_lang=tgt_lg |
| | ) |
| | return pipe(text)[0]["translation_text"] |
| |
|
| | |
| | elif "m2m100" in model_name: |
| | pipe = load_pipeline("translation", model_path) |
| |
|
| | |
| | |
| | pipe.tokenizer.src_lang = src_lg |
| | |
| | |
| | |
| | tgt_lang_id = pipe.tokenizer.convert_tokens_to_ids(tgt_lg) |
| | |
| | if tgt_lang_id == pipe.tokenizer.unk_token_id: |
| | st.error(f"Erro: O token {tgt_lg} não foi encontrado no vocabulário do modelo!") |
| | return None |
| |
|
| | |
| | result = pipe( |
| | text, |
| | forced_bos_token_id=tgt_lang_id |
| | ) |
| | return result[0]["translation_text"] |
| | |
| | else: |
| | pipe = pipeline( |
| | "translation", |
| | model=model_path, |
| | tokenizer=model_path, |
| | token=token, |
| | src_lang=src_lg, |
| | tgt_lang=tgt_lg |
| | ) |
| | return pipe(text)[0]["translation_text"] |
| |
|
| | |
| | def build_translation_page(model_name): |
| | st.title(f"🌍 {model_name}: Tradução") |
| |
|
| | if "nllb" in model_name: |
| | lang_map = { |
| | "Português": "por_Latn", |
| | "Kabuverdianu": "kea_Latn" |
| | } |
| |
|
| | elif "m2m100" in model_name: |
| | lang_map = { |
| | "Português": "__pt__", |
| | "Kabuverdianu": "__en__" |
| | } |
| |
|
| | else: |
| | lang_map = { |
| | "Português": "pt_XX", |
| | "Kabuverdianu": "en_XX" |
| | } |
| |
|
| | col1, col2 = st.columns(2) |
| | with col1: |
| | src_label = st.selectbox("Língua de Origem", list(lang_map.keys())) |
| | with col2: |
| | tgt_label = st.selectbox("Língua de Destino", list(lang_map.keys())) |
| |
|
| | text = st.text_area("Texto de entrada", "Katxór sta trás di pórta.", height=100) |
| |
|
| | if st.button("Traduzir"): |
| | if not text.strip(): |
| | st.warning("Introduza texto!") |
| | return |
| |
|
| | with st.spinner("A traduzir..."): |
| | try: |
| | result = instantiate_translation_model( |
| | model_name, |
| | text, |
| | lang_map[src_label], |
| | lang_map[tgt_label] |
| | ) |
| | st.success("Resultado:") |
| | st.write(result) |
| | except Exception as e: |
| | st.error(f"Erro: {e}") |
| |
|
| | |
| | def build_decoder_page(model_name): |
| | st.title(f"✍️ {model_name}: Geração de Texto") |
| | max_length = st.sidebar.slider("Máximo de Tokens", 10, 200, 50) |
| | num_seq = st.sidebar.number_input("Sequências", 1, 5, 1) |
| | text = st.text_area("Prompt", "Katxór sta trás di pórta.") |
| |
|
| | if st.button("Gerar"): |
| | with st.spinner("A processar..."): |
| | try: |
| | results = instantiate_gpt2(model_name, max_length, num_seq, text) |
| | for res in results: |
| | st.info(res["generated_text"]) |
| | except Exception as e: |
| | st.error(f"Erro: {e}") |
| |
|
| | |
| | def build_encoder_page(model_name): |
| | st.title(f"🔍 {model_name}: Fill-Mask") |
| | top_k = st.sidebar.slider("Top K sugestões", 1, 5, 3) |
| |
|
| | mask_token = "<mask>" if "RoBERTa" in model_name else "[MASK]" |
| | st.write(f"Use o token **{mask_token}** para a palavra em falta.") |
| |
|
| | input_text = st.text_input("Frase", f"Katxór sta trás di {mask_token}.") |
| |
|
| | if st.button("Prever"): |
| | try: |
| | results = instantiate_encoder(model_name, top_k, input_text) |
| | for res in results: |
| | st.write(f"✅ **{res['token_str']}** ({res['score']:.2%})") |
| | except Exception: |
| | st.error(f"Certifique-se que usou o token {mask_token}") |
| |
|
| | |
| | model_dict = { |
| | "RoBERTa-Kriolu": "Encoder", |
| | "GPT2_v1.18": "Decoder", |
| | "LLM-kea-v1.0": "Decoder", |
| | "Modelo-Traducao-kea-ptpt-v1.0": "Encoder-Decoder", |
| | "nllb-v1.0": "Encoder-Decoder", |
| | "m2m100-v1.0": "Encoder-Decoder", |
| | "mbart-v2.0": "Encoder-Decoder", |
| | "m2m100-v2.0": "Encoder-Decoder", |
| | "mbart-v2.1": "Encoder-Decoder", |
| | "nllb-v2.0": "Encoder-Decoder", |
| | } |
| |
|
| | selected_model = st.sidebar.selectbox("Escolha o Modelo", list(model_dict.keys())) |
| | arch = model_dict[selected_model] |
| |
|
| | if arch == "Encoder": |
| | build_encoder_page(selected_model) |
| | elif arch == "Encoder-Decoder": |
| | build_translation_page(selected_model) |
| | else: |
| | build_decoder_page(selected_model) |
| |
|