File size: 3,397 Bytes
695c832
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch

# Load model and tokenizer
model_name = "ufal/byt5-large-geccc-mate"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Check if CUDA is available and move model to GPU if possible
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

def correct_text(input_text):
    """
    Correct grammatical errors in the input text using ByT5 GEC model
    """
    if not input_text.strip():
        return ""

    # Tokenize input text
    inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True)
    inputs = {k: v.to(device) for k, v in inputs.items()}

    # Generate corrected text
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_length=512,
            num_beams=5,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    # Decode and return the corrected text
    corrected_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return corrected_text

# Create Gradio interface
with gr.Blocks(title="Czech Grammar Error Correction - ByT5") as demo:
    gr.Markdown("""
    # Czech Grammar Error Correction with ByT5

    This tool uses the **ByT5-large-geccc-mate** model to correct grammatical errors in Czech text.

    Simply enter your text below and click "Correct Text" to get the grammatically corrected version.
    """)

    with gr.Row():
        with gr.Column():
            input_text = gr.Textbox(
                label="Input Text",
                placeholder="Enter Czech text with potential grammar errors...",
                lines=10,
                max_lines=20
            )
            correct_btn = gr.Button("Correct Text", variant="primary")

        with gr.Column():
            output_text = gr.Textbox(
                label="Corrected Text",
                lines=10,
                max_lines=20,
                interactive=True,
                placeholder="Corrected text will appear here..."
            )
            copy_btn = gr.Button("📋 Copy to Clipboard", variant="secondary")

    # Add examples
    gr.Examples(
        examples=[
            ["Včera jsem šel do obchodu a koupil jsem si rohlíky."],
            ["Chtěl bych se zeptat, jestli máte volno zítra."],
            ["Mám rád když svítí slunce a můžu jít ven."]
        ],
        inputs=input_text,
        label="Example sentences (click to try)"
    )

    # Set up event handlers
    correct_btn.click(fn=correct_text, inputs=input_text, outputs=output_text)
    input_text.submit(fn=correct_text, inputs=input_text, outputs=output_text)

    # JavaScript for copy functionality
    copy_btn.click(
        None,
        None,
        None,
        js="""
        () => {
            const outputText = document.querySelector('#component-5 textarea').value;
            navigator.clipboard.writeText(outputText);
            alert('Text copied to clipboard!');
        }
        """
    )

    gr.Markdown("""
    ---
    **Model:** [ufal/byt5-large-geccc-mate](https://huggingface.co/ufal/byt5-large-geccc-mate)

    **Note:** This model is specifically trained for Czech language grammar correction.
    """)

if __name__ == "__main__":
    demo.launch()