czech_gec_ui / app.py
asdfasdfdsafdsa's picture
Upload app.py with huggingface_hub
695c832 verified
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
# Load model and tokenizer
model_name = "ufal/byt5-large-geccc-mate"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
# Check if CUDA is available and move model to GPU if possible
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
def correct_text(input_text):
"""
Correct grammatical errors in the input text using ByT5 GEC model
"""
if not input_text.strip():
return ""
# Tokenize input text
inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True)
inputs = {k: v.to(device) for k, v in inputs.items()}
# Generate corrected text
with torch.no_grad():
outputs = model.generate(
**inputs,
max_length=512,
num_beams=5,
early_stopping=True,
no_repeat_ngram_size=2
)
# Decode and return the corrected text
corrected_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
return corrected_text
# Create Gradio interface
with gr.Blocks(title="Czech Grammar Error Correction - ByT5") as demo:
gr.Markdown("""
# Czech Grammar Error Correction with ByT5
This tool uses the **ByT5-large-geccc-mate** model to correct grammatical errors in Czech text.
Simply enter your text below and click "Correct Text" to get the grammatically corrected version.
""")
with gr.Row():
with gr.Column():
input_text = gr.Textbox(
label="Input Text",
placeholder="Enter Czech text with potential grammar errors...",
lines=10,
max_lines=20
)
correct_btn = gr.Button("Correct Text", variant="primary")
with gr.Column():
output_text = gr.Textbox(
label="Corrected Text",
lines=10,
max_lines=20,
interactive=True,
placeholder="Corrected text will appear here..."
)
copy_btn = gr.Button("📋 Copy to Clipboard", variant="secondary")
# Add examples
gr.Examples(
examples=[
["Včera jsem šel do obchodu a koupil jsem si rohlíky."],
["Chtěl bych se zeptat, jestli máte volno zítra."],
["Mám rád když svítí slunce a můžu jít ven."]
],
inputs=input_text,
label="Example sentences (click to try)"
)
# Set up event handlers
correct_btn.click(fn=correct_text, inputs=input_text, outputs=output_text)
input_text.submit(fn=correct_text, inputs=input_text, outputs=output_text)
# JavaScript for copy functionality
copy_btn.click(
None,
None,
None,
js="""
() => {
const outputText = document.querySelector('#component-5 textarea').value;
navigator.clipboard.writeText(outputText);
alert('Text copied to clipboard!');
}
"""
)
gr.Markdown("""
---
**Model:** [ufal/byt5-large-geccc-mate](https://huggingface.co/ufal/byt5-large-geccc-mate)
**Note:** This model is specifically trained for Czech language grammar correction.
""")
if __name__ == "__main__":
demo.launch()