st192011's picture
Update app.py
a654c97 verified
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import os
import json
import uuid
from datetime import datetime
from pathlib import Path
from huggingface_hub import CommitScheduler
# --- CONFIGURATION ---
MODEL_ID = "st192011/Maltese-EuroLLM-1.7B-Phase5-Steerable"
DATASET_REPO_ID = "st192011/Maltese-MT-Architect"
TITLE = "🏛️ Maltese-MT-Architect: The Steerable Translator"
DESCRIPTION = """
This model was fine-tuned upon the foundational weights of the Phase 4 model using a mixed-objective training regime (combining direct translation, guided planning to translation, and plan-only tasks) to decouple linguistic planning from textual generation.
This architecture enables a dual-stage inference process: the generation of an intermediate Linguistic PJAN (comprising Domain, Tone, and Vocabulary constraints) followed by a conditioned execution phase.
This design prioritizes model interpretability and allows for human-in-the-loop intervention at the planning stage.
"""
# --- LOGGING SETUP ---
# Get the Write Token from Secrets (Ensure you have a secret named 'DATASET' in Space Settings)
HF_TOKEN = os.getenv("DATASET")
# Create local logging directory
log_folder = Path("logs")
log_folder.mkdir(parents=True, exist_ok=True)
log_file = log_folder / f"data_{uuid.uuid4()}.jsonl"
# Initialize Data Scheduler (Saves to your private dataset)
scheduler = CommitScheduler(
repo_id=DATASET_REPO_ID,
repo_type="dataset",
folder_path=log_folder,
path_in_repo="data",
every=1,
token=HF_TOKEN
)
# --- LOAD MODEL (GPU Optimized) ---
print(f"⏳ Loading {MODEL_ID}...")
# Check for GPU
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"🚀 Inference Device: {device.upper()}")
try:
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
# Optimization: Use float16 if on GPU (Fast), float32 if on CPU (Compatible)
torch_dtype = torch.float16 if device == "cuda" else torch.float32
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
device_map="auto", # Automatically uses GPU if available
torch_dtype=torch_dtype,
low_cpu_mem_usage=True,
trust_remote_code=True
)
print("✅ Model loaded successfully.")
except Exception as e:
print(f"❌ Error loading model: {e}")
raise e
# --- INFERENCE FUNCTIONS ---
def draft_plan(english_text):
"""
Step 1: Ask the model to generate the 'Guide' (Pjan).
Returns: (UI Plan, Hidden State Plan)
"""
if not english_text.strip():
return "Please enter English text first.", ""
prompt = f"### INGLIŻ: {english_text}\n### PJAN:"
# Ensure inputs are on the correct device
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=64,
temperature=0.4,
do_sample=True,
repetition_penalty=1.1,
pad_token_id=tokenizer.eos_token_id
)
full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
# 🛠️ FIX: Manually strip the <|endoftext|> token if it appears
full_output = full_output.replace("<|endoftext|>", "").strip()
clean_plan = "Error: Model could not generate a plan."
if "### PJAN:" in full_output:
raw_plan = full_output.split("### PJAN:")[1].strip()
clean_plan = raw_plan.split("### MALTI:")[0].strip()
# We return the plan TWICE:
# 1. For the UI (Editable)
# 2. For the Hidden State (To remember what the AI originally thought)
return clean_plan, clean_plan
def execute_translation(english_text, user_guide, original_guide_state):
"""
Step 2: Translate using the User's Guide AND Log the data.
"""
if not english_text.strip():
return "Please enter English text."
prompt = f"### INGLIŻ: {english_text}\n### PJAN: {user_guide}\n### MALTI:"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=128,
temperature=0.1,
do_sample=True,
repetition_penalty=1.1,
pad_token_id=tokenizer.eos_token_id
)
full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
# 🛠️ FIX: Manually strip the <|endoftext|> token
full_output = full_output.replace("<|endoftext|>", "").strip()
translation = full_output
if "### MALTI:" in full_output:
translation = full_output.split("### MALTI:")[1].strip()
# --- LOGGING BLOCK ---
# Only log if we have a valid token
if HF_TOKEN:
with scheduler.lock:
with log_file.open("a") as f:
log_entry = {
"id": str(uuid.uuid4()),
"timestamp": datetime.now().isoformat(),
"source_english": english_text,
"plan_original_model": original_guide_state, # Original AI thought
"plan_final_user": user_guide, # User Edited version
"is_plan_edited": (user_guide != original_guide_state),
"translation_output": translation
}
f.write(json.dumps(log_entry) + "\n")
print("📝 Interaction logged to dataset.")
return translation
# --- USER INTERFACE ---
with gr.Blocks(theme=gr.themes.Soft(primary_hue="red")) as demo:
gr.Markdown(f"# {TITLE}")
# Hidden Component to store the original AI plan
original_plan_state = gr.State("")
with gr.Accordion("📘 Model Description: EuroLLM-1.7B Phase 5: Steerable", open=False):
gr.Markdown(DESCRIPTION)
with gr.Row():
with gr.Column(scale=1):
source_input = gr.Textbox(
label="1. English Source",
placeholder="Type your sentence here (e.g., 'The ferry to Gozo is late')...",
lines=5
)
btn_plan = gr.Button("Step 1: Draft Blueprint 📐", variant="secondary")
with gr.Column(scale=1):
guide_input = gr.Textbox(
label="2. Architectural Plan (Editable)",
placeholder="The model's logic will appear here. You can edit it!\nTry: 'Ton: Informali' or 'Vokabolarju: Uża Dgħajsa'",
lines=5,
interactive=True
)
btn_translate = gr.Button("Step 2: Build Translation 🚀", variant="primary")
with gr.Row():
target_output = gr.Textbox(
label="3. Final Maltese Translation",
lines=4
)
with gr.Accordion("📚 User Manual: How to Control the Architect", open=False):
gr.Markdown("""
### Commands you can use in the Plan:
1. **Ton (Tone):**
- `Ton: Formali` (Official/Business)
- `Ton: Informali` (Casual/Friends)
2. **Vokabolarju (Vocabulary):**
- `Vokabolarju: Uża 'KelmaA' minflok 'KelmaB'` (Use WordA instead of WordB)
""")
gr.Examples(
examples=[
["The ferry to Gozo leaves in 5 minutes.", "Ton: Informali. Vokabolarju: 'Vapur', 'jitlaq'."],
["The internet connection is very stable today.", "Ton: Formali. Vokabolarju: 'Konnessjoni'."],
["I am very tired, I need to sleep.", "Ton: Kollokjali. Vokabolarju: 'Għajjien mejjet'."]
],
inputs=[source_input, guide_input],
label="Try these Examples"
)
# Event Handlers updated to handle State
btn_plan.click(
draft_plan,
inputs=[source_input],
outputs=[guide_input, original_plan_state] # Update both UI and State
)
btn_translate.click(
execute_translation,
inputs=[source_input, guide_input, original_plan_state],
outputs=[target_output]
)
if __name__ == "__main__":
demo.launch()