st192011 commited on
Commit
bec47f7
·
verified ·
1 Parent(s): 135cfcd

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -0
app.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from huggingface_hub import InferenceClient
3
+
4
+ # --- MODEL DATA ---
5
+ MODELS = {
6
+ "Phase 2: Stable (Formal)": {
7
+ "id": "st192011/Maltese-EuroLLM-1.7B-Phase2-Stable",
8
+ "description": "The 'Bureaucrat Bot'. Trained on 200k rows of EU/Government data (TildeMODEL). High fidelity for legal and official documents.",
9
+ "chrf": "60.18",
10
+ "comet": "0.6431"
11
+ },
12
+ "Phase 4: Anchored (Native)": {
13
+ "id": "st192011/Maltese-EuroLLM-1.7B-Phase4-Anchored",
14
+ "description": "The 'Native Speaker'. Uses Anchored Reasoning (CoT) distilled from Llama-70B. Designed for natural phrasing and cultural awareness.",
15
+ "chrf": "52.68",
16
+ "comet": "0.6567"
17
+ }
18
+ }
19
+
20
+ def translate_logic(text, selected_models, temp):
21
+ results = {}
22
+
23
+ for model_name in selected_models:
24
+ model_id = MODELS[model_name]["id"]
25
+ client = InferenceClient(model=model_id)
26
+
27
+ # Prompt format consistent with training
28
+ prompt = f"### INGLIŻ: {text}\n### MALTI:"
29
+
30
+ try:
31
+ output = client.text_generation(
32
+ prompt,
33
+ max_new_tokens=150,
34
+ temperature=temp,
35
+ do_sample=True if temp > 0.1 else False,
36
+ repetition_penalty=1.2
37
+ )
38
+ # Clean up the response
39
+ clean_output = output.strip().split("### MALTI:")[-1].replace("<|endoftext|>", "").strip()
40
+ results[model_name] = clean_output
41
+ except Exception as e:
42
+ results[model_name] = f"Error: Inference API is still loading or unavailable. ({str(e)})"
43
+
44
+ # Return formatted outputs for the UI
45
+ # We return a list of outputs corresponding to the two textboxes
46
+ out_p2 = results.get("Phase 2: Stable (Formal)", "Model not selected.")
47
+ out_p4 = results.get("Phase 4: Anchored (Native)", "Model not selected.")
48
+
49
+ return out_p2, out_p4
50
+
51
+ # --- GRADIO UI ---
52
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
53
+ gr.Markdown("# 🇲🇹 Maltese-MT Arena")
54
+ gr.Markdown("Compare different generations of fine-tuned EuroLLM models for English-to-Maltese translation.")
55
+
56
+ with gr.Row():
57
+ with gr.Column(scale=2):
58
+ input_text = gr.Textbox(label="English Source Text", placeholder="Enter English text here...", lines=4)
59
+ model_selector = gr.CheckboxGroup(
60
+ choices=list(MODELS.keys()),
61
+ value=list(MODELS.keys()),
62
+ label="Select Models to Compare"
63
+ )
64
+ temp_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.1, step=0.1, label="Creativity (Temperature)")
65
+ btn = gr.Button("🚀 Run Translation", variant="primary")
66
+
67
+ with gr.Row():
68
+ with gr.Column():
69
+ gr.Markdown("### Phase 2: Stable")
70
+ p2_out = gr.Textbox(label="Output", interactive=False, lines=5)
71
+ gr.Markdown(f"**Training:** {MODELS['Phase 2: Stable (Formal)']['description']}")
72
+ gr.Markdown(f"**Metrics:** ChrF++: `{MODELS['Phase 2: Stable (Formal)']['chrf']}` | COMET: `{MODELS['Phase 2: Stable (Formal)']['comet']}`")
73
+
74
+ with gr.Column():
75
+ gr.Markdown("### Phase 4: Anchored")
76
+ p4_out = gr.Textbox(label="Output", interactive=False, lines=5)
77
+ gr.Markdown(f"**Training:** {MODELS['Phase 4: Anchored (Native)']['description']}")
78
+ gr.Markdown(f"**Metrics:** ChrF++: `{MODELS['Phase 4: Anchored (Native)']['chrf']}` | COMET: `{MODELS['Phase 4: Anchored (Native)']['comet']}`")
79
+
80
+ gr.Examples(
81
+ examples=[
82
+ ["The ferry to Gozo leaves every 45 minutes."],
83
+ ["We now have 4-month-old mice that are non-diabetic that used to be diabetic."],
84
+ ["This regulation shall be binding in its entirety and directly applicable in all Member States."]
85
+ ],
86
+ inputs=input_text
87
+ )
88
+
89
+ btn.click(
90
+ fn=translate_logic,
91
+ inputs=[input_text, model_selector, temp_slider],
92
+ outputs=[p2_out, p4_out]
93
+ )
94
+
95
+ demo.launch()