Spaces:

Iscte-Sintra
/

User-Interface_v1.0

Running

App Files Files Community

ErikDaska commited on Jan 16

Commit

2022753

verified ·

1 Parent(s): 55ad4cc

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +75 -52

src/streamlit_app.py CHANGED Viewed

@@ -1,47 +1,52 @@
 import streamlit as st
-import transformers
 from transformers import pipeline
 import os
-# Set page config for better UI
 st.set_page_config(page_title="Kriolu AI Hub", layout="wide")
-# Read token from environment
 token = os.environ.get("token")
-# --- Model Loading with Caching ---
-# This prevents the app from reloading the model every time you click a button
 @st.cache_resource
 def load_pipeline(task, model_path, **kwargs):
     return pipeline(task, model=model_path, tokenizer=model_path, token=token, **kwargs)
-def instantiate_gpt2(model_name: str, max_length_: int, num_return_sequences: int, text: str):
     model_path = f'Iscte-Sintra/{model_name}'
-    # Use device_map="auto" to handle memory better if available
-    pipe = load_pipeline('text-generation', model_path)
-    # Logic for different generation params
-    if "Qwen" in model_name:
-        return pipe(text, max_new_tokens=max_length_, num_return_sequences=num_return_sequences,
-                    do_sample=True, top_p=0.95, top_k=50)
-    else:
-        return pipe(text, max_length=max_length_, num_return_sequences=num_return_sequences,
-                    do_sample=True, top_p=0.95, top_k=50)
-def instantiate_encoder(model_name: str, top_k: int, text: str):
     pipe = load_pipeline("fill-mask", f"Iscte-Sintra/{model_name}")
     return pipe(text, top_k=top_k)
-def instantiate_translation_model(model_name: str, text: str, src_lg: str, tgt_lg: str):
     model_path = f'Iscte-Sintra/{model_name}'
-    # Dictionary to handle specific language code mapping per model type
-    # NLLB uses codes like 'por_Latn', MBart uses 'pt_XX'
     if "nllb" in model_name:
-        # Simple mapping for NLLB (Example: adjust based on your specific model training)
-        src = "kea_Latn" if "en" in src_lg else "por_Latn"
-        tgt = "por_Latn" if "pt" in tgt_lg else "kea_Latn"
-        pipe = pipeline("translation", model=model_path, token=token, src_lang=src, tgt_lang=tgt)
     elif "m2m100" in model_name:
         pipe = pipeline(
             "translation",
@@ -49,15 +54,27 @@ def instantiate_translation_model(model_name: str, text: str, src_lg: str, tgt_l
             tokenizer=model_path,
             token=token
         )
         pipe.tokenizer.src_lang = src_lg
     else:
-        # Standard logic for MBart
-        pipe = pipeline("translation", model=model_path, token=token, src_lang=src_lg, tgt_lang=tgt_lg)
-    result = pipe(text, forced_bos_token_id=pipe.tokenizer.get_lang_id(tgt_lg))
-    return result[0]["translation_text"]
 def build_translation_page(model_name):
     st.title(f"🌍 {model_name}: Tradução")
@@ -70,40 +87,46 @@ def build_translation_page(model_name):
     elif "m2m100" in model_name:
         lang_map = {
             "Português": "pt",
-            "Kabuverdianu": "en"  # m2m100 does not support kea
         }
     else:  # mBART
         lang_map = {
             "Português": "pt_XX",
-            "Kabuverdianu": "en_XX"
         }
     col1, col2 = st.columns(2)
     with col1:
-        src_label = st.selectbox("Língua de Origem", list(lang_map.keys()), index=1)
     with col2:
-        tgt_label = st.selectbox("Língua de Destino", list(lang_map.keys()), index=0)
     text = st.text_area("Texto de entrada", "Katxór sta trás di pórta.", height=100)
     if st.button("Traduzir"):
         if not text.strip():
             st.warning("Introduza texto!")
             return
         with st.spinner("A traduzir..."):
             try:
-                result = instantiate_translation_model(model_name, text, lang_map[src_label], lang_map[tgt_label])
                 st.success("Resultado:")
                 st.write(result)
             except Exception as e:
                 st.error(f"Erro: {e}")
 def build_decoder_page(model_name):
     st.title(f"✍️ {model_name}: Geração de Texto")
     max_length = st.sidebar.slider("Máximo de Tokens", 10, 200, 50)
-    num_seq = st.sidebar.number_input('Sequências', 1, 5, 1)
     text = st.text_area("Prompt", "Katxór sta trás di pórta.")
     if st.button("Gerar"):
@@ -111,32 +134,32 @@ def build_decoder_page(model_name):
             try:
                 results = instantiate_gpt2(model_name, max_length, num_seq, text)
                 for res in results:
-                    st.info(res['generated_text'])
             except Exception as e:
                 st.error(f"Erro: {e}")
 def build_encoder_page(model_name):
     st.title(f"🔍 {model_name}: Fill-Mask")
     top_k = st.sidebar.slider("Top K sugestões", 1, 5, 3)
-    mask_token = "[MASK]" if "RoBERTa" not in model_name else "<mask>"
     st.write(f"Use o token **{mask_token}** para a palavra em falta.")
     input_text = st.text_input("Frase", f"Katxór sta trás di {mask_token}.")
     if st.button("Prever"):
         try:
             results = instantiate_encoder(model_name, top_k, input_text)
             for res in results:
-                st.write(f"✅ **{res['token_str']}** (Confiança: {res['score']:.2%})")
-        except Exception as e:
             st.error(f"Certifique-se que usou o token {mask_token}")
-# --- Main App Logic ---
 model_dict = {
-    'RoBERTa-Kriolu': "Encoder",
-    "GPT2_v1.18": "Decoder",
     "LLM-kea-v1.0": "Decoder",
     "Modelo-Traducao-kea-ptpt-v1.0": "Encoder-Decoder",
     "nllb-v1.0": "Encoder-Decoder",
@@ -152,4 +175,4 @@ if arch == "Encoder":
 elif arch == "Encoder-Decoder":
     build_translation_page(selected_model)
 else:
-    build_decoder_page(selected_model)

 import streamlit as st
 from transformers import pipeline
 import os
+# ---------------- CONFIG ----------------
 st.set_page_config(page_title="Kriolu AI Hub", layout="wide")
 token = os.environ.get("token")
+# ---------------- CACHE ----------------
 @st.cache_resource
 def load_pipeline(task, model_path, **kwargs):
     return pipeline(task, model=model_path, tokenizer=model_path, token=token, **kwargs)
+# ---------------- DECODER ----------------
+def instantiate_gpt2(model_name, max_length_, num_return_sequences, text):
     model_path = f'Iscte-Sintra/{model_name}'
+    pipe = load_pipeline("text-generation", model_path)
+    return pipe(
+        text,
+        max_new_tokens=max_length_,
+        num_return_sequences=num_return_sequences,
+        do_sample=True,
+        top_p=0.95,
+        top_k=50
+    )
+# ---------------- ENCODER ----------------
+def instantiate_encoder(model_name, top_k, text):
     pipe = load_pipeline("fill-mask", f"Iscte-Sintra/{model_name}")
     return pipe(text, top_k=top_k)
+# ---------------- TRANSLATION ----------------
+def instantiate_translation_model(model_name, text, src_lg, tgt_lg):
     model_path = f'Iscte-Sintra/{model_name}'
+    # ---- NLLB ----
     if "nllb" in model_name:
+        pipe = pipeline(
+            "translation",
+            model=model_path,
+            tokenizer=model_path,
+            token=token,
+            src_lang=src_lg,
+            tgt_lang=tgt_lg
+        )
+        return pipe(text)[0]["translation_text"]
+    # ---- M2M100 ----
     elif "m2m100" in model_name:
         pipe = pipeline(
             "translation",
             tokenizer=model_path,
             token=token
         )
         pipe.tokenizer.src_lang = src_lg
+        result = pipe(
+            text,
+            forced_bos_token_id=pipe.tokenizer.get_lang_id(tgt_lg)
+        )
+        return result[0]["translation_text"]
+    # ---- MBART ----
     else:
+        pipe = pipeline(
+            "translation",
+            model=model_path,
+            tokenizer=model_path,
+            token=token,
+            src_lang=src_lg,
+            tgt_lang=tgt_lg
+        )
+        return pipe(text)[0]["translation_text"]
+# ---------------- UI: TRANSLATION ----------------
 def build_translation_page(model_name):
     st.title(f"🌍 {model_name}: Tradução")
     elif "m2m100" in model_name:
         lang_map = {
             "Português": "pt",
+            "Inglês": "en"  # m2m100 does NOT support kea
         }
     else:  # mBART
         lang_map = {
             "Português": "pt_XX",
+            "Inglês": "en_XX"
         }
     col1, col2 = st.columns(2)
     with col1:
+        src_label = st.selectbox("Língua de Origem", list(lang_map.keys()))
     with col2:
+        tgt_label = st.selectbox("Língua de Destino", list(lang_map.keys()))
     text = st.text_area("Texto de entrada", "Katxór sta trás di pórta.", height=100)
     if st.button("Traduzir"):
         if not text.strip():
             st.warning("Introduza texto!")
             return
         with st.spinner("A traduzir..."):
             try:
+                result = instantiate_translation_model(
+                    model_name,
+                    text,
+                    lang_map[src_label],
+                    lang_map[tgt_label]
+                )
                 st.success("Resultado:")
                 st.write(result)
             except Exception as e:
                 st.error(f"Erro: {e}")
+# ---------------- UI: DECODER ----------------
 def build_decoder_page(model_name):
     st.title(f"✍️ {model_name}: Geração de Texto")
     max_length = st.sidebar.slider("Máximo de Tokens", 10, 200, 50)
+    num_seq = st.sidebar.number_input("Sequências", 1, 5, 1)
     text = st.text_area("Prompt", "Katxór sta trás di pórta.")
     if st.button("Gerar"):
             try:
                 results = instantiate_gpt2(model_name, max_length, num_seq, text)
                 for res in results:
+                    st.info(res["generated_text"])
             except Exception as e:
                 st.error(f"Erro: {e}")
+# ---------------- UI: ENCODER ----------------
 def build_encoder_page(model_name):
     st.title(f"🔍 {model_name}: Fill-Mask")
     top_k = st.sidebar.slider("Top K sugestões", 1, 5, 3)
+    mask_token = "<mask>" if "RoBERTa" in model_name else "[MASK]"
     st.write(f"Use o token **{mask_token}** para a palavra em falta.")
     input_text = st.text_input("Frase", f"Katxór sta trás di {mask_token}.")
     if st.button("Prever"):
         try:
             results = instantiate_encoder(model_name, top_k, input_text)
             for res in results:
+                st.write(f"✅ **{res['token_str']}** ({res['score']:.2%})")
+        except Exception:
             st.error(f"Certifique-se que usou o token {mask_token}")
+# ---------------- MAIN ----------------
 model_dict = {
+    "RoBERTa-Kriolu": "Encoder",
+    "GPT2_v1.18": "Decoder",
     "LLM-kea-v1.0": "Decoder",
     "Modelo-Traducao-kea-ptpt-v1.0": "Encoder-Decoder",
     "nllb-v1.0": "Encoder-Decoder",
 elif arch == "Encoder-Decoder":
     build_translation_page(selected_model)
 else:
+    build_decoder_page(selected_model)