Spaces:

Iscte-Sintra
/

User-Interface_v1.0

Running

App Files Files Community

ErikDaska commited on Jan 8

Commit

416e378

verified ·

1 Parent(s): 070212f

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +106 -135

src/streamlit_app.py CHANGED Viewed

@@ -3,162 +3,133 @@ import transformers
 from transformers import pipeline
 import os
-# Read token from environment (set as secret in Space settings)
-token = os.environ.get("token")
-def instantiate_gpt2(model_name: str,max_length_ : int, num_return_sequences : int, text : str) -> dict:
-    pipe = pipeline(task='text-generation', model=f'Iscte-Sintra/{model_name}', tokenizer=f'Iscte-Sintra/{model_name}',
-                    token=token, truncation=True, device_map="cpu")
-    if model_name == "Qwen_v0.1":
-        results = pipe(
-            text,
-            max_new_tokens=max_length_,
-            num_return_sequences=num_return_sequences,
-            do_sample=True,
-            top_p=0.95,
-            top_k=50
-        )
     else:
-        results = pipe(
-        text,
-        max_length=max_length_,
-        num_return_sequences=num_return_sequences,
-        do_sample=True,
-        top_p=0.95,
-        top_k=50
-    )
-    return results
-def instantiate_encoder(model_name: str, top_k : int, text : str) -> dict:
-    pipe = pipeline("fill-mask", model=f"Iscte-Sintra/{model_name}", tokenizer=f"Iscte-Sintra/{model_name}", token=token)
     return pipe(text, top_k=top_k)
-def instantiate_translation_model(model_name: str, text: str, selected_input_lg:str, selected_output_lg:str) -> dict:
-    if model_name=="Modelo-Traducao-kea-ptpt-v1.0" or model_name=="mbart-v0.2":
-        # Initialize the translation pipeline
-        pipe = pipeline(
-            "translation",
-            model=f'Iscte-Sintra/{model_name}',
-            tokenizer=f'Iscte-Sintra/{model_name}',
-            token=token,
-            use_fast=False,
-            src_lang=selected_input_lg,
-            tgt_lang=selected_output_lg
-        )
-    elif model_name=="m2m100-v1.0":
-        # Initialize the translation pipeline
-        pipe = pipeline(
-            "translation",
-            model=f'Iscte-Sintra/{model_name}',
-            tokenizer=f'Iscte-Sintra/{model_name}',
-            token=token,
-            use_fast=False,
-            src_lang="en",  # source: Kabuverdianu
-            tgt_lang="pt"    # target: Portuguese
-        )
     result = pipe(text)
     return result[0]["translation_text"]
 def build_translation_page(model_name):
-    try:
-        st.title(f"{model_name} : Tarefa de Tradução (Kabuverdianu → Português)")
-        text = st.text_area("Introduza texto em Kabuverdiano", "Katxór sta trás di pórta.", height=100)
-        input_supported_languages = {"pt": "pt_XX", "kea": "en_XX"}
-        selected_input_lg = st.sidebar.selectbox("Língua (Entrada)", list(input_supported_languages.keys()))
-        selected_output_lg = st.sidebar.selectbox("Lingua (Saída)", list(input_supported_languages.keys()))
-        if st.button("Traduzir"):
-            if not text.strip():
-                st.warning("Por favor, introduza texto para ser traduzido!", icon="⚠️")
-                return
-            # Call your translation function
-            result = instantiate_translation_model(model_name, text, input_supported_languages[selected_input_lg], input_supported_languages[selected_output_lg])
-            if result:
-                st.subheader("Texto Traduzido (Português)")
                 st.write(result)
-    except Exception as e:
-        st.warning("Ocorreu um erro durante a tradução", icon="⚠️")
-        st.warning(e)
 def build_decoder_page(model_name):
-    try:
-        st.title(f"{model_name} - Tarefa Geração de Texto")
-        max_length : int = st.sidebar.slider("Tamanho Máximo da frase", 10, 200)
-        num_return_sequences : int = st.sidebar.number_input('Número de sequências/frases desejadas', min_value=1, max_value=10, value=1, step=1)
-        text : str = st.text_area("Texto", "Katxór sta trás di pórta.", height=75)
-        if st.button("Submeter"):
-            results = instantiate_gpt2(model_name ,max_length, num_return_sequences, text)
-            if results:
-                for result in results:
-                    st.write(f"**Texto Gerado:**: {result['generated_text']}")
-    except Exception as e:
-        st.warning('Tamanho máximo de tokens deve ser maior do que o número de tokens presentes na frase atual!', icon="⚠️")
-        st.warning(e)
-def build_encoder_page(model_name:str):
-    st.title(f"{model_name} - Tarefa Fill-Mask")
-    top_k = st.sidebar.number_input('Número de sequências/frases desejadas', min_value=1, max_value=5, value=1, step=1)
-    results = None
-    col1, col2 = st.columns(2)
-    with col1:
-        st.subheader("Texto")
-        if model_name=="Albertina-Kriolu":
-            st.write("Digite uma frase com um token **[MASK]**, e o modelo irá prever a palavra em falta.")
-            input_text = st.text_input("Frase de entrada", "Katxór sta trás di [MASK].")
-        else:
-            st.write("Digite uma frase com um token **<MASK>**, e o modelo irá prever a palavra em falta.")
-            input_text = st.text_input("Frase de entrada", "Katxór sta trás di <mask>.")
-        submit = st.button("Submeter")
-        try:
-            if submit and input_text:
-                results = instantiate_encoder(model_name, top_k, input_text)
-        except Exception as e:
-            st.warning('Atenção, deve de haver um token especial "<mask>" na frase!', icon="⚠️")
-            st.warning(e)
-    with col2:
-        st.subheader("Previsões")
-        if results:
-            predicted_text = st.text_input("Token Previsto", value=results[0]['sequence'], disabled=True)
-            for result in results:
-                st.write(f"**Previsão**: {result['token_str']} | **Confiança**: {round(result['score'], 4)}")
-        else:
-            predicted_text = st.text_input("Token previsto", disabled=True)
-# Your dictionary of models
-model_dict = {'RoBERTa-Kriolu': "Encoder",
-            "GPT2_v1.18":"Decoder",
-            "LLM-kea-v1.0": "Decoder",
-            "Modelo-Traducao-kea-ptpt-v1.0": "Encoder-Decoder",
-              "nllb-v1.0": "Encoder-Decoder",
-            "m2m100-v1.0": "Encoder-Decoder"
-            }
-# Always appears at the top of the sidebar
-selected_model = st.sidebar.selectbox("Arquitetura", list(model_dict.keys()))
-if model_dict[selected_model] == "Encoder":
     build_encoder_page(selected_model)
-elif model_dict[selected_model] == "Encoder-Decoder":
     build_translation_page(selected_model)
 else:
     build_decoder_page(selected_model)

 from transformers import pipeline
 import os
+# Set page config for better UI
+st.set_page_config(page_title="Kriolu AI Hub", layout="wide")
+# Read token from environment
+token = os.environ.get("token")
+# --- Model Loading with Caching ---
+# This prevents the app from reloading the model every time you click a button
+@st.cache_resource
+def load_pipeline(task, model_path, **kwargs):
+    return pipeline(task, model=model_path, tokenizer=model_path, token=token, **kwargs)
+def instantiate_gpt2(model_name: str, max_length_: int, num_return_sequences: int, text: str):
+    model_path = f'Iscte-Sintra/{model_name}'
+    # Use device_map="auto" to handle memory better if available
+    pipe = load_pipeline('text-generation', model_path)
+    # Logic for different generation params
+    if "Qwen" in model_name:
+        return pipe(text, max_new_tokens=max_length_, num_return_sequences=num_return_sequences,
+                    do_sample=True, top_p=0.95, top_k=50)
     else:
+        return pipe(text, max_length=max_length_, num_return_sequences=num_return_sequences,
+                    do_sample=True, top_p=0.95, top_k=50)
+def instantiate_encoder(model_name: str, top_k: int, text: str):
+    pipe = load_pipeline("fill-mask", f"Iscte-Sintra/{model_name}")
     return pipe(text, top_k=top_k)
+def instantiate_translation_model(model_name: str, text: str, src_lg: str, tgt_lg: str):
+    model_path = f'Iscte-Sintra/{model_name}'
+    # Dictionary to handle specific language code mapping per model type
+    # NLLB uses codes like 'por_Latn', MBart uses 'pt_XX'
+    if "nllb" in model_name:
+        # Simple mapping for NLLB (Example: adjust based on your specific model training)
+        src = "kea_Latn" if "en" in src_lg else "por_Latn"
+        tgt = "por_Latn" if "pt" in tgt_lg else "kea_Latn"
+        pipe = pipeline("translation", model=model_path, token=token, src_lang=src, tgt_lang=tgt)
+    else:
+        # Standard logic for MBart / M2M100
+        pipe = pipeline("translation", model=model_path, token=token, src_lang=src_lg, tgt_lang=tgt_lg)
     result = pipe(text)
     return result[0]["translation_text"]
+# --- UI Build Functions ---
 def build_translation_page(model_name):
+    st.title(f"🌍 {model_name}: Tradução")
+    # Dynamic language mapping based on model
+    if "nllb" in model_name:
+        lang_map = {"Português": "por_Latn", "Kabuverdianu": "kea_Latn"}
+    else:
+        lang_map = {"Português": "pt_XX", "Kabuverdianu": "en_XX"} # MBart style
+    col1, col2 = st.columns(2)
+    with col1:
+        src_label = st.selectbox("Língua de Origem", list(lang_map.keys()), index=1)
+    with col2:
+        tgt_label = st.selectbox("Língua de Destino", list(lang_map.keys()), index=0)
+    text = st.text_area("Texto de entrada", "Katxór sta trás di pórta.", height=100)
+    if st.button("Traduzir"):
+        if not text.strip():
+            st.warning("Introduza texto!")
+            return
+        with st.spinner("A traduzir..."):
+            try:
+                result = instantiate_translation_model(model_name, text, lang_map[src_label], lang_map[tgt_label])
+                st.success("Resultado:")
                 st.write(result)
+            except Exception as e:
+                st.error(f"Erro: {e}")
 def build_decoder_page(model_name):
+    st.title(f"✍️ {model_name}: Geração de Texto")
+    max_length = st.sidebar.slider("Máximo de Tokens", 10, 200, 50)
+    num_seq = st.sidebar.number_input('Sequências', 1, 5, 1)
+    text = st.text_area("Prompt", "Katxór sta trás di pórta.")
+    if st.button("Gerar"):
+        with st.spinner("A processar..."):
+            try:
+                results = instantiate_gpt2(model_name, max_length, num_seq, text)
+                for res in results:
+                    st.info(res['generated_text'])
+            except Exception as e:
+                st.error(f"Erro: {e}")
+def build_encoder_page(model_name):
+    st.title(f"🔍 {model_name}: Fill-Mask")
+    top_k = st.sidebar.slider("Top K sugestões", 1, 5, 3)
+    mask_token = "[MASK]" if "RoBERTa" not in model_name else "<mask>"
+    st.write(f"Use o token **{mask_token}** para a palavra em falta.")
+    input_text = st.text_input("Frase", f"Katxór sta trás di {mask_token}.")
+    if st.button("Prever"):
+        try:
+            results = instantiate_encoder(model_name, top_k, input_text)
+            for res in results:
+                st.write(f"✅ **{res['token_str']}** (Confiança: {res['score']:.2%})")
+        except Exception as e:
+            st.error(f"Certifique-se que usou o token {mask_token}")
+# --- Main App Logic ---
+model_dict = {
+    'RoBERTa-Kriolu': "Encoder",
+    "GPT2_v1.18": "Decoder",
+    "LLM-kea-v1.0": "Decoder",
+    "Modelo-Traducao-kea-ptpt-v1.0": "Encoder-Decoder",
+    "nllb-v1.0": "Encoder-Decoder",
+    "m2m100-v1.0": "Encoder-Decoder"
+}
+selected_model = st.sidebar.selectbox("Escolha o Modelo", list(model_dict.keys()))
+arch = model_dict[selected_model]
+if arch == "Encoder":
     build_encoder_page(selected_model)
+elif arch == "Encoder-Decoder":
     build_translation_page(selected_model)
 else:
     build_decoder_page(selected_model)