Spaces:

st192011
/

Maltese-MT-Architect

Sleeping

App Files Files Community

Maltese-MT-Architect / app.py

st192011

Update app.py

a654c97 verified 4 months ago

raw

history blame contribute delete

8.12 kB

	import gradio as gr
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import os
	import json
	import uuid
	from datetime import datetime
	from pathlib import Path
	from huggingface_hub import CommitScheduler

	# --- CONFIGURATION ---
	MODEL_ID = "st192011/Maltese-EuroLLM-1.7B-Phase5-Steerable"
	DATASET_REPO_ID = "st192011/Maltese-MT-Architect"
	TITLE = "🏛️ Maltese-MT-Architect: The Steerable Translator"
	DESCRIPTION = """
	This model was fine-tuned upon the foundational weights of the Phase 4 model using a mixed-objective training regime (combining direct translation, guided planning to translation, and plan-only tasks) to decouple linguistic planning from textual generation.
	This architecture enables a dual-stage inference process: the generation of an intermediate Linguistic PJAN (comprising Domain, Tone, and Vocabulary constraints) followed by a conditioned execution phase.
	This design prioritizes model interpretability and allows for human-in-the-loop intervention at the planning stage.
	"""

	# --- LOGGING SETUP ---
	# Get the Write Token from Secrets (Ensure you have a secret named 'DATASET' in Space Settings)
	HF_TOKEN = os.getenv("DATASET")

	# Create local logging directory
	log_folder = Path("logs")
	log_folder.mkdir(parents=True, exist_ok=True)
	log_file = log_folder / f"data_{uuid.uuid4()}.jsonl"

	# Initialize Data Scheduler (Saves to your private dataset)
	scheduler = CommitScheduler(
	repo_id=DATASET_REPO_ID,
	repo_type="dataset",
	folder_path=log_folder,
	path_in_repo="data",
	every=1,
	token=HF_TOKEN
	)

	# --- LOAD MODEL (GPU Optimized) ---
	print(f"⏳ Loading {MODEL_ID}...")

	# Check for GPU
	device = "cuda" if torch.cuda.is_available() else "cpu"
	print(f"🚀 Inference Device: {device.upper()}")

	try:
	tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)

	# Optimization: Use float16 if on GPU (Fast), float32 if on CPU (Compatible)
	torch_dtype = torch.float16 if device == "cuda" else torch.float32

	model = AutoModelForCausalLM.from_pretrained(
	MODEL_ID,
	device_map="auto", # Automatically uses GPU if available
	torch_dtype=torch_dtype,
	low_cpu_mem_usage=True,
	trust_remote_code=True
	)
	print("✅ Model loaded successfully.")
	except Exception as e:
	print(f"❌ Error loading model: {e}")
	raise e

	# --- INFERENCE FUNCTIONS ---

	def draft_plan(english_text):
	"""
	Step 1: Ask the model to generate the 'Guide' (Pjan).
	Returns: (UI Plan, Hidden State Plan)
	"""
	if not english_text.strip():
	return "Please enter English text first.", ""

	prompt = f"### INGLIŻ: {english_text}\n### PJAN:"

	# Ensure inputs are on the correct device
	inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=64,
	temperature=0.4,
	do_sample=True,
	repetition_penalty=1.1,
	pad_token_id=tokenizer.eos_token_id
	)

	full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)

	# 🛠️ FIX: Manually strip the <\|endoftext\|> token if it appears
	full_output = full_output.replace("<\|endoftext\|>", "").strip()

	clean_plan = "Error: Model could not generate a plan."

	if "### PJAN:" in full_output:
	raw_plan = full_output.split("### PJAN:")[1].strip()
	clean_plan = raw_plan.split("### MALTI:")[0].strip()

	# We return the plan TWICE:
	# 1. For the UI (Editable)
	# 2. For the Hidden State (To remember what the AI originally thought)
	return clean_plan, clean_plan

	def execute_translation(english_text, user_guide, original_guide_state):
	"""
	Step 2: Translate using the User's Guide AND Log the data.
	"""
	if not english_text.strip():
	return "Please enter English text."

	prompt = f"### INGLIŻ: {english_text}\n### PJAN: {user_guide}\n### MALTI:"

	inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=128,
	temperature=0.1,
	do_sample=True,
	repetition_penalty=1.1,
	pad_token_id=tokenizer.eos_token_id
	)

	full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)

	# 🛠️ FIX: Manually strip the <\|endoftext\|> token
	full_output = full_output.replace("<\|endoftext\|>", "").strip()

	translation = full_output
	if "### MALTI:" in full_output:
	translation = full_output.split("### MALTI:")[1].strip()

	# --- LOGGING BLOCK ---
	# Only log if we have a valid token
	if HF_TOKEN:
	with scheduler.lock:
	with log_file.open("a") as f:
	log_entry = {
	"id": str(uuid.uuid4()),
	"timestamp": datetime.now().isoformat(),
	"source_english": english_text,
	"plan_original_model": original_guide_state, # Original AI thought
	"plan_final_user": user_guide, # User Edited version
	"is_plan_edited": (user_guide != original_guide_state),
	"translation_output": translation
	}
	f.write(json.dumps(log_entry) + "\n")
	print("📝 Interaction logged to dataset.")

	return translation

	# --- USER INTERFACE ---

	with gr.Blocks(theme=gr.themes.Soft(primary_hue="red")) as demo:
	gr.Markdown(f"# {TITLE}")

	# Hidden Component to store the original AI plan
	original_plan_state = gr.State("")

	with gr.Accordion("📘 Model Description: EuroLLM-1.7B Phase 5: Steerable", open=False):
	gr.Markdown(DESCRIPTION)

	with gr.Row():
	with gr.Column(scale=1):
	source_input = gr.Textbox(
	label="1. English Source",
	placeholder="Type your sentence here (e.g., 'The ferry to Gozo is late')...",
	lines=5
	)
	btn_plan = gr.Button("Step 1: Draft Blueprint 📐", variant="secondary")

	with gr.Column(scale=1):
	guide_input = gr.Textbox(
	label="2. Architectural Plan (Editable)",
	placeholder="The model's logic will appear here. You can edit it!\nTry: 'Ton: Informali' or 'Vokabolarju: Uża Dgħajsa'",
	lines=5,
	interactive=True
	)
	btn_translate = gr.Button("Step 2: Build Translation 🚀", variant="primary")

	with gr.Row():
	target_output = gr.Textbox(
	label="3. Final Maltese Translation",
	lines=4
	)

	with gr.Accordion("📚 User Manual: How to Control the Architect", open=False):
	gr.Markdown("""
	### Commands you can use in the Plan:
	1. Ton (Tone):
	- `Ton: Formali` (Official/Business)
	- `Ton: Informali` (Casual/Friends)
	2. Vokabolarju (Vocabulary):
	- `Vokabolarju: Uża 'KelmaA' minflok 'KelmaB'` (Use WordA instead of WordB)
	""")

	gr.Examples(
	examples=[
	["The ferry to Gozo leaves in 5 minutes.", "Ton: Informali. Vokabolarju: 'Vapur', 'jitlaq'."],
	["The internet connection is very stable today.", "Ton: Formali. Vokabolarju: 'Konnessjoni'."],
	["I am very tired, I need to sleep.", "Ton: Kollokjali. Vokabolarju: 'Għajjien mejjet'."]
	],
	inputs=[source_input, guide_input],
	label="Try these Examples"
	)

	# Event Handlers updated to handle State
	btn_plan.click(
	draft_plan,
	inputs=[source_input],
	outputs=[guide_input, original_plan_state] # Update both UI and State
	)

	btn_translate.click(
	execute_translation,
	inputs=[source_input, guide_input, original_plan_state],
	outputs=[target_output]
	)

	if __name__ == "__main__":
	demo.launch()