asdfasdfdsafdsa commited on
Commit
1faa491
·
verified ·
1 Parent(s): 319286c

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +106 -0
app.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
+ import torch
4
+
5
+ # Load model and tokenizer
6
+ model_name = "ufal/byt5-large-geccc-mate"
7
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
8
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
9
+
10
+ # Check if CUDA is available and move model to GPU if possible
11
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
+ model = model.to(device)
13
+
14
+ def correct_text(input_text):
15
+ """
16
+ Correct grammatical errors in the input text using ByT5 GEC model
17
+ """
18
+ if not input_text.strip():
19
+ return ""
20
+
21
+ # Tokenize input text
22
+ inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True)
23
+ inputs = {k: v.to(device) for k, v in inputs.items()}
24
+
25
+ # Generate corrected text
26
+ with torch.no_grad():
27
+ outputs = model.generate(
28
+ **inputs,
29
+ max_length=512,
30
+ num_beams=5,
31
+ early_stopping=True,
32
+ no_repeat_ngram_size=2
33
+ )
34
+
35
+ # Decode and return the corrected text
36
+ corrected_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
37
+ return corrected_text
38
+
39
+ # Create Gradio interface
40
+ with gr.Blocks(title="Czech Grammar Error Correction - ByT5") as demo:
41
+ gr.Markdown("""
42
+ # Czech Grammar Error Correction with ByT5
43
+
44
+ This tool uses the **ByT5-large-geccc-mate** model to correct grammatical errors in Czech text.
45
+
46
+ Simply enter your text below and click "Correct Text" to get the grammatically corrected version.
47
+ """)
48
+
49
+ with gr.Row():
50
+ with gr.Column():
51
+ input_text = gr.Textbox(
52
+ label="Input Text",
53
+ placeholder="Enter Czech text with potential grammar errors...",
54
+ lines=10,
55
+ max_lines=20
56
+ )
57
+ correct_btn = gr.Button("Correct Text", variant="primary")
58
+
59
+ with gr.Column():
60
+ output_text = gr.Textbox(
61
+ label="Corrected Text",
62
+ lines=10,
63
+ max_lines=20,
64
+ interactive=True,
65
+ placeholder="Corrected text will appear here..."
66
+ )
67
+ copy_btn = gr.Button("📋 Copy to Clipboard", variant="secondary")
68
+
69
+ # Add examples
70
+ gr.Examples(
71
+ examples=[
72
+ ["Včera jsem šel do obchodu a koupil jsem si rohlíky."],
73
+ ["Chtěl bych se zeptat, jestli máte volno zítra."],
74
+ ["Mám rád když svítí slunce a můžu jít ven."]
75
+ ],
76
+ inputs=input_text,
77
+ label="Example sentences (click to try)"
78
+ )
79
+
80
+ # Set up event handlers
81
+ correct_btn.click(fn=correct_text, inputs=input_text, outputs=output_text)
82
+ input_text.submit(fn=correct_text, inputs=input_text, outputs=output_text)
83
+
84
+ # JavaScript for copy functionality
85
+ copy_btn.click(
86
+ None,
87
+ None,
88
+ None,
89
+ js="""
90
+ () => {
91
+ const outputText = document.querySelector('#component-5 textarea').value;
92
+ navigator.clipboard.writeText(outputText);
93
+ alert('Text copied to clipboard!');
94
+ }
95
+ """
96
+ )
97
+
98
+ gr.Markdown("""
99
+ ---
100
+ **Model:** [ufal/byt5-large-geccc-mate](https://huggingface.co/ufal/byt5-large-geccc-mate)
101
+
102
+ **Note:** This model is specifically trained for Czech language grammar correction.
103
+ """)
104
+
105
+ if __name__ == "__main__":
106
+ demo.launch()