# app.py # Requirements: transformers, torch, sentencepiece, sacremoses, gradio import torch import gradio as gr from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, MarianMTModel, MarianTokenizer DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") MODEL_OPTIONS = [ "Helsinki-NLP", # Round-trip OPUS-MT en→es→en "FLAN-T5-base (Google en→en)" ] # Cache CACHE = {} # --- FLAN loader --- def load_flan(): if "flan" not in CACHE: tok = AutoTokenizer.from_pretrained("google/flan-t5-base") mdl = AutoModelForSeq2SeqLM.from_pretrained( "google/flan-t5-base", low_cpu_mem_usage=True, torch_dtype="auto" ).to(DEVICE) CACHE["flan"] = (mdl, tok) return CACHE["flan"] def run_flan(sentence: str) -> str: model, tok = load_flan() prompt = f"Correct grammar and rewrite in fluent British English: {sentence}" inputs = tok(prompt, return_tensors="pt").to(DEVICE) with torch.no_grad(): out = model.generate(**inputs, max_new_tokens=96, num_beams=4) return tok.decode(out[0], skip_special_tokens=True).strip() # --- Marian round-trip loader --- def load_marian(): if "en_es" not in CACHE: tok1 = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-es") mdl1 = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-en-es").to(DEVICE) tok2 = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-es-en") mdl2 = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-es-en").to(DEVICE) CACHE["en_es"] = (mdl1, tok1, mdl2, tok2) return CACHE["en_es"] def run_roundtrip(sentence: str) -> str: mdl1, tok1, mdl2, tok2 = load_marian() # English → Spanish inputs = tok1(sentence, return_tensors="pt").to(DEVICE) es_tokens = mdl1.generate(**inputs, max_length=128, num_beams=4) spanish = tok1.decode(es_tokens[0], skip_special_tokens=True) # Spanish → English inputs2 = tok2(spanish, return_tensors="pt").to(DEVICE) en_tokens = mdl2.generate(**inputs2, max_length=128, num_beams=4) english = tok2.decode(en_tokens[0], skip_special_tokens=True) return english.strip() # --- Dispatcher --- def polish(sentence: str, choice: str) -> str: if not sentence.strip(): return "" if choice.startswith("FLAN"): return run_flan(sentence) elif choice.startswith("Helsinki"): return run_roundtrip(sentence) else: return "Unknown option." # --- Gradio UI --- with gr.Blocks(title="HizkuntzLagun: English Fixer (CPU enabled)") as demo: gr.Markdown("### HizkuntzLagun: English Fixer\n") gr.Markdown( """ > ⚡ **Note:** > This tool runs on free, CPU-friendly AI models. > It’s designed to be fast and accessible — not perfect. > Expect quick corrections, not deep grammar analysis. > Drop in anytime — a quick fix a day keeps awkward grammar away. """ inp = gr.Textbox(lines=3, label="Input (English) E.g. She go tomorrow buy two bread.", placeholder="Type an English sentence to correct.") choice = gr.Dropdown(choices=MODEL_OPTIONS, value="Helsinki-NLP", label="Method") btn = gr.Button("Oxford grammar polish") out = gr.Textbox(label="Output") btn.click(polish, inputs=[inp, choice], outputs=out) if __name__ == "__main__": demo.launch()