Spaces:

st192011
/

Maltese-MT-Lab

Running

App Files Files Community

st192011 commited on 18 days ago

Commit

b8da437

verified ·

1 Parent(s): 34b244c

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -23

app.py CHANGED Viewed

@@ -2,27 +2,38 @@ import gradio as gr
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
-# --- CONFIGURATION ---
 MODELS_CONFIG = {
     "Phase 2: Stable (Formal)": {
         "id": "st192011/Maltese-EuroLLM-1.7B-Phase2-Stable",
-        "description": "The 'Bureaucrat Bot'. Optimized for formal precision.",
         "chrf": "60.18",
         "comet": "0.6431"
     },
     "Phase 4: Anchored (Native)": {
         "id": "st192011/Maltese-EuroLLM-1.7B-Phase4-Anchored",
-        "description": "The 'Native Speaker'. Optimized for cultural awareness and logic.",
         "chrf": "52.68",
         "comet": "0.6567"
     }
 }
-# --- MODEL LOADING ---
-# We load them globally so they stay in memory (this requires ~14GB RAM total)
-print("Loading models to CPU... this may take a few minutes.")
-# Load Model 2
 tokenizer_p2 = AutoTokenizer.from_pretrained(MODELS_CONFIG["Phase 2: Stable (Formal)"]["id"])
 model_p2 = AutoModelForCausalLM.from_pretrained(
     MODELS_CONFIG["Phase 2: Stable (Formal)"]["id"],
@@ -30,7 +41,7 @@ model_p2 = AutoModelForCausalLM.from_pretrained(
     torch_dtype=torch.float32
 )
-# Load Model 4
 tokenizer_p4 = AutoTokenizer.from_pretrained(MODELS_CONFIG["Phase 4: Anchored (Native)"]["id"])
 model_p4 = AutoModelForCausalLM.from_pretrained(
     MODELS_CONFIG["Phase 4: Anchored (Native)"]["id"],
@@ -39,6 +50,10 @@ model_p4 = AutoModelForCausalLM.from_pretrained(
 )
 def local_translate(model, tokenizer, text, temp):
     prompt = f"### INGLIŻ: {text}\n### MALTI:"
     inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
@@ -52,19 +67,21 @@ def local_translate(model, tokenizer, text, temp):
             pad_token_id=tokenizer.eos_token_id
         )
-    # Decode only the new tokens
     full_text = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
-    # Extract the part after ### MALTI:
-    maltese_text = full_text.split("### MALTI:")[-1].strip()
     return maltese_text
 def translate_logic(text, selected_models, temp):
     out_p2 = "Model not selected."
     out_p4 = "Model not selected."
-    if not text.strip():
-        return "Please enter text.", "Please enter text."
     if "Phase 2: Stable (Formal)" in selected_models:
         try:
             out_p2 = local_translate(model_p2, tokenizer_p2, text, temp)
@@ -81,30 +98,42 @@ def translate_logic(text, selected_models, temp):
 # --- GRADIO UI ---
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🇲🇹 Maltese-MT Lab (Local CPU)")
-    gr.Markdown("Comparing English-to-Maltese EuroLLM models running directly on this machine.")
     with gr.Row():
         with gr.Column(scale=2):
-            input_text = gr.Textbox(label="English Source Text", placeholder="Enter English text here...", lines=4)
             model_selector = gr.CheckboxGroup(
                 choices=list(MODELS_CONFIG.keys()),
                 value=list(MODELS_CONFIG.keys()),
                 label="Select Models to Compare"
             )
-            temp_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.1, step=0.1, label="Creativity (Temperature)")
             btn = gr.Button("🚀 Run Translation", variant="primary")
     with gr.Row():
         with gr.Column():
-            gr.Markdown("### Phase 2: Stable")
             p2_out = gr.Textbox(label="Output", interactive=False, lines=5)
-            gr.Markdown(f"**ChrF++:** `{MODELS_CONFIG['Phase 2: Stable (Formal)']['chrf']}` | **COMET:** `{MODELS_CONFIG['Phase 2: Stable (Formal)']['comet']}`")
         with gr.Column():
-            gr.Markdown("### Phase 4: Anchored")
             p4_out = gr.Textbox(label="Output", interactive=False, lines=5)
-            gr.Markdown(f"**ChrF++:** `{MODELS_CONFIG['Phase 4: Anchored (Native)']['chrf']}` | **COMET:** `{MODELS_CONFIG['Phase 4: Anchored (Native)']['comet']}`")
     gr.Examples(
         examples=[
@@ -121,4 +150,5 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         outputs=[p2_out, p4_out]
     )
-demo.launch()

 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
+# --- MODEL DATA (Original Detailed Descriptions) ---
 MODELS_CONFIG = {
     "Phase 2: Stable (Formal)": {
         "id": "st192011/Maltese-EuroLLM-1.7B-Phase2-Stable",
+        "description": (
+            "The 'Bureaucrat Bot'. Built upon a foundational adaptation phase that mixed "
+            "monolingual Maltese and Italian to bridge morphological roots. This version "
+            "was fine-tuned on high-fidelity EU and governmental parallel corpora, "
+            "optimizing it for extreme formal precision and administrative accuracy."
+        ),
         "chrf": "60.18",
         "comet": "0.6431"
     },
     "Phase 4: Anchored (Native)": {
         "id": "st192011/Maltese-EuroLLM-1.7B-Phase4-Anchored",
+        "description": (
+            "The 'Native Speaker'. An evolution of Phase 2 utilizing a curriculum-based "
+            "'Full Circle' approach. It integrates synthesized reasoning chains (CoT) "
+            "that allow the model to process linguistic logic before translating. By mixing "
+            "all previous data types, it anchors factual accuracy to native-level phrasing "
+            "and cultural awareness."
+        ),
         "chrf": "52.68",
         "comet": "0.6567"
     }
 }
+# --- MODEL LOADING (Local CPU) ---
+# Note: Loading two 1.7B models takes ~14GB of RAM.
+print("Loading models to CPU... Please wait.")
+# Load Model Phase 2
 tokenizer_p2 = AutoTokenizer.from_pretrained(MODELS_CONFIG["Phase 2: Stable (Formal)"]["id"])
 model_p2 = AutoModelForCausalLM.from_pretrained(
     MODELS_CONFIG["Phase 2: Stable (Formal)"]["id"],
     torch_dtype=torch.float32
 )
+# Load Model Phase 4
 tokenizer_p4 = AutoTokenizer.from_pretrained(MODELS_CONFIG["Phase 4: Anchored (Native)"]["id"])
 model_p4 = AutoModelForCausalLM.from_pretrained(
     MODELS_CONFIG["Phase 4: Anchored (Native)"]["id"],
 )
 def local_translate(model, tokenizer, text, temp):
+    if not text.strip():
+        return ""
+    # Prompt format consistent with training
     prompt = f"### INGLIŻ: {text}\n### MALTI:"
     inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
             pad_token_id=tokenizer.eos_token_id
         )
+    # skip_special_tokens=True removes the <|endoftext|> and other technical tokens
     full_text = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
+    # Extract only the Maltese translation part (the text after the prompt)
+    if "### MALTI:" in full_text:
+        maltese_text = full_text.split("### MALTI:")[-1].strip()
+    else:
+        maltese_text = full_text.strip()
     return maltese_text
 def translate_logic(text, selected_models, temp):
     out_p2 = "Model not selected."
     out_p4 = "Model not selected."
     if "Phase 2: Stable (Formal)" in selected_models:
         try:
             out_p2 = local_translate(model_p2, tokenizer_p2, text, temp)
 # --- GRADIO UI ---
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🇲🇹 Maltese-MT Lab")
+    gr.Markdown("Compare English-to-Maltese EuroLLM models running locally on CPU.")
     with gr.Row():
         with gr.Column(scale=2):
+            input_text = gr.Textbox(
+                label="English Source Text",
+                placeholder="Enter English text here...",
+                lines=4
+            )
             model_selector = gr.CheckboxGroup(
                 choices=list(MODELS_CONFIG.keys()),
                 value=list(MODELS_CONFIG.keys()),
                 label="Select Models to Compare"
             )
+            temp_slider = gr.Slider(
+                minimum=0.1,
+                maximum=1.0,
+                value=0.1,
+                step=0.1,
+                label="Creativity (Temperature)"
+            )
             btn = gr.Button("🚀 Run Translation", variant="primary")
     with gr.Row():
         with gr.Column():
+            gr.Markdown("### Phase 2: Stable (Formal)")
             p2_out = gr.Textbox(label="Output", interactive=False, lines=5)
+            gr.Markdown(f"**Training Strategy:**\n{MODELS_CONFIG['Phase 2: Stable (Formal)']['description']}")
+            gr.Markdown(f"**Metrics:** ChrF++: `{MODELS_CONFIG['Phase 2: Stable (Formal)']['chrf']}` | COMET: `{MODELS_CONFIG['Phase 2: Stable (Formal)']['comet']}`")
         with gr.Column():
+            gr.Markdown("### Phase 4: Anchored (Native)")
             p4_out = gr.Textbox(label="Output", interactive=False, lines=5)
+            gr.Markdown(f"**Training Strategy:**\n{MODELS_CONFIG['Phase 4: Anchored (Native)']['description']}")
+            gr.Markdown(f"**Metrics:** ChrF++: `{MODELS_CONFIG['Phase 4: Anchored (Native)']['chrf']}` | COMET: `{MODELS_CONFIG['Phase 4: Anchored (Native)']['comet']}`")
     gr.Examples(
         examples=[
         outputs=[p2_out, p4_out]
     )
+if __name__ == "__main__":
+    demo.launch()