Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,3 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
| 3 |
import os
|
|
@@ -37,7 +41,6 @@ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
|
| 37 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 38 |
model.to(device)
|
| 39 |
|
| 40 |
-
# Translate function
|
| 41 |
@lru_cache(maxsize=512)
|
| 42 |
def cached_translate(text, src_lang, tgt_lang, max_length=128, temperature=0.7):
|
| 43 |
if not text.strip(): return ""
|
|
@@ -60,11 +63,10 @@ def translate_file(file, src_lang, tgt_lang, max_length, temperature):
|
|
| 60 |
try:
|
| 61 |
lines = file.decode("utf-8").splitlines()
|
| 62 |
translated = [cached_translate(line, src_lang, tgt_lang, max_length, temperature) for line in lines if line.strip()]
|
| 63 |
-
return "
|
| 64 |
except Exception as e:
|
| 65 |
return f"File translation error: {e}"
|
| 66 |
|
| 67 |
-
# Hugging Face Summarization API
|
| 68 |
API_URL = "https://api-inference.huggingface.co/models/facebook/bart-large-cnn"
|
| 69 |
HF_API_KEY = os.environ.get("HF_API_KEY")
|
| 70 |
headers = {"Authorization": f"Bearer {HF_API_KEY}"}
|
|
@@ -79,7 +81,6 @@ def summarize_text(text, max_length):
|
|
| 79 |
result = response.json()
|
| 80 |
return result[0]["summary_text"] if isinstance(result, list) else "Error: " + str(result)
|
| 81 |
|
| 82 |
-
# Paraphraser with PEGASUS
|
| 83 |
paraphrase_tokenizer = AutoTokenizer.from_pretrained("tuner007/pegasus_paraphrase")
|
| 84 |
paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained("tuner007/pegasus_paraphrase")
|
| 85 |
paraphrase_model.to(device)
|
|
@@ -89,83 +90,49 @@ def paraphrase_text(input_text, num_return_sequences, num_beams):
|
|
| 89 |
translated = paraphrase_model.generate(**batch, max_length=60, num_beams=num_beams, num_return_sequences=num_return_sequences, temperature=1.5)
|
| 90 |
return paraphrase_tokenizer.batch_decode(translated, skip_special_tokens=True)
|
| 91 |
|
| 92 |
-
#
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
"""
|
| 98 |
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
with gr.Tab("🌐 Text Translator"):
|
| 103 |
-
with gr.Row():
|
| 104 |
-
src_lang = gr.Dropdown(list(LANGUAGE_CODES.keys()), label="🌐 From", value="English")
|
| 105 |
-
swap = gr.Button("⇄")
|
| 106 |
-
tgt_lang = gr.Dropdown(list(LANGUAGE_CODES.keys()), label="🎯 To", value="Korean")
|
| 107 |
-
|
| 108 |
-
with gr.Row():
|
| 109 |
-
input_text = gr.Textbox(lines=3, label="✍️ Input Text")
|
| 110 |
-
output_text = gr.Textbox(lines=3, label="📤 Translated Output", interactive=False)
|
| 111 |
-
|
| 112 |
-
with gr.Row():
|
| 113 |
-
translate = gr.Button("🚀 Translate", variant="primary")
|
| 114 |
-
clear = gr.Button("🧽 Clear")
|
| 115 |
-
|
| 116 |
-
with gr.Accordion("⚙️ Advanced Settings", open=False):
|
| 117 |
-
max_length = gr.Slider(10, 512, value=128, step=1, label="Max Length")
|
| 118 |
-
temperature = gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature")
|
| 119 |
-
|
| 120 |
-
with gr.Accordion("📜 Translation History", open=False):
|
| 121 |
-
history_json = gr.JSON(label="Recent Translations")
|
| 122 |
-
with gr.Row():
|
| 123 |
-
refresh = gr.Button("🔄 Refresh")
|
| 124 |
-
clear_history = gr.Button("🧹 Clear History")
|
| 125 |
-
|
| 126 |
-
with gr.Tab("📁 File Translator"):
|
| 127 |
-
file_input = gr.File(label="📂 Upload .txt File", file_types=[".txt"])
|
| 128 |
-
file_src = gr.Dropdown(list(LANGUAGE_CODES.keys()), label="📌 From", value="English")
|
| 129 |
-
file_tgt = gr.Dropdown(list(LANGUAGE_CODES.keys()), label="📌 To", value="Korean")
|
| 130 |
-
file_translate = gr.Button("📄 Translate File", variant="primary")
|
| 131 |
-
file_result = gr.Textbox(label="📑 File Output", lines=10, interactive=False)
|
| 132 |
-
|
| 133 |
-
with gr.Accordion("⚙️ Advanced Settings", open=False):
|
| 134 |
-
f_max_length = gr.Slider(10, 512, value=128, step=1, label="Max Length")
|
| 135 |
-
f_temp = gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature")
|
| 136 |
-
|
| 137 |
-
with gr.Tab("📝 Text Summarizer"):
|
| 138 |
-
summary_input = gr.Textbox(lines=5, label="📚 Enter text to summarize")
|
| 139 |
-
summary_length = gr.Slider(32, 512, value=128, step=8, label="📏 Max Length")
|
| 140 |
-
summary_output = gr.Textbox(label="🧾 Summary", lines=5, interactive=False)
|
| 141 |
-
summary_btn = gr.Button("🧠 Summarize")
|
| 142 |
-
|
| 143 |
-
with gr.Tab("🔁 Paraphraser"):
|
| 144 |
-
para_input = gr.Textbox(lines=4, label="✏️ Enter text to paraphrase")
|
| 145 |
-
num_outputs = gr.Slider(1, 5, value=3, step=1, label="🔁 Number of Paraphrases")
|
| 146 |
-
beam_width = gr.Slider(1, 10, value=5, step=1, label="🧭 Beam Width")
|
| 147 |
-
para_output = gr.Textbox(label="📋 Paraphrased Sentences", lines=6)
|
| 148 |
-
para_btn = gr.Button("🎯 Paraphrase Now")
|
| 149 |
-
|
| 150 |
-
# Events
|
| 151 |
-
translate.click(cached_translate, [input_text, src_lang, tgt_lang, max_length, temperature], output_text)
|
| 152 |
-
clear.click(lambda: ("", ""), None, [input_text, output_text])
|
| 153 |
-
swap.click(lambda s, t: (t, s), [src_lang, tgt_lang], [src_lang, tgt_lang])
|
| 154 |
-
refresh.click(lambda: history.get(), None, history_json)
|
| 155 |
-
clear_history.click(lambda: history.clear() or [], None, history_json)
|
| 156 |
-
file_translate.click(lambda file, src, tgt, ml, t: translate_file(file.read(), src, tgt, ml, t),
|
| 157 |
-
[file_input, file_src, file_tgt, f_max_length, f_temp], file_result)
|
| 158 |
-
summary_btn.click(summarize_text, [summary_input, summary_length], summary_output)
|
| 159 |
-
para_btn.click(lambda text, num, beams: "\n\n".join(paraphrase_text(text, num, beams)),
|
| 160 |
-
[para_input, num_outputs, beam_width], para_output)
|
| 161 |
-
|
| 162 |
-
gr.Markdown(f"""
|
| 163 |
-
### 🔍 Info
|
| 164 |
-
- Translator: `{model_name}` on `{device}`
|
| 165 |
-
- Summarizer: `facebook/bart-large-cnn`
|
| 166 |
-
- Paraphraser: `tuner007/pegasus_paraphrase`
|
| 167 |
-
- API Token Status: {'✅ Loaded' if HF_API_KEY else '❌ Not Found'}
|
| 168 |
-
""")
|
| 169 |
|
| 170 |
-
|
| 171 |
-
demo.launch(share=True)
|
|
|
|
| 1 |
+
# Rewriting after execution state reset (previous file was lost)
|
| 2 |
+
# Saving the new version of app.py with Grammar Correction tab added
|
| 3 |
+
|
| 4 |
+
app_py_with_grammar = """
|
| 5 |
import gradio as gr
|
| 6 |
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
| 7 |
import os
|
|
|
|
| 41 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 42 |
model.to(device)
|
| 43 |
|
|
|
|
| 44 |
@lru_cache(maxsize=512)
|
| 45 |
def cached_translate(text, src_lang, tgt_lang, max_length=128, temperature=0.7):
|
| 46 |
if not text.strip(): return ""
|
|
|
|
| 63 |
try:
|
| 64 |
lines = file.decode("utf-8").splitlines()
|
| 65 |
translated = [cached_translate(line, src_lang, tgt_lang, max_length, temperature) for line in lines if line.strip()]
|
| 66 |
+
return "\\n".join(translated)
|
| 67 |
except Exception as e:
|
| 68 |
return f"File translation error: {e}"
|
| 69 |
|
|
|
|
| 70 |
API_URL = "https://api-inference.huggingface.co/models/facebook/bart-large-cnn"
|
| 71 |
HF_API_KEY = os.environ.get("HF_API_KEY")
|
| 72 |
headers = {"Authorization": f"Bearer {HF_API_KEY}"}
|
|
|
|
| 81 |
result = response.json()
|
| 82 |
return result[0]["summary_text"] if isinstance(result, list) else "Error: " + str(result)
|
| 83 |
|
|
|
|
| 84 |
paraphrase_tokenizer = AutoTokenizer.from_pretrained("tuner007/pegasus_paraphrase")
|
| 85 |
paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained("tuner007/pegasus_paraphrase")
|
| 86 |
paraphrase_model.to(device)
|
|
|
|
| 90 |
translated = paraphrase_model.generate(**batch, max_length=60, num_beams=num_beams, num_return_sequences=num_return_sequences, temperature=1.5)
|
| 91 |
return paraphrase_tokenizer.batch_decode(translated, skip_special_tokens=True)
|
| 92 |
|
| 93 |
+
# Grammar Corrector
|
| 94 |
+
grammar_tokenizer = AutoTokenizer.from_pretrained("prithivida/grammar_error_correcter_v1")
|
| 95 |
+
grammar_model = AutoModelForSeq2SeqLM.from_pretrained("prithivida/grammar_error_correcter_v1").to(device)
|
| 96 |
+
|
| 97 |
+
def correct_grammar(input_text):
|
| 98 |
+
input_ids = grammar_tokenizer.encode(input_text, return_tensors="pt").to(device)
|
| 99 |
+
output_ids = grammar_model.generate(input_ids, max_length=128, num_beams=4, early_stopping=True)
|
| 100 |
+
return grammar_tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
| 101 |
+
|
| 102 |
+
# UI
|
| 103 |
+
with gr.Blocks() as demo:
|
| 104 |
+
with gr.Tab("Translator"):
|
| 105 |
+
src_lang = gr.Dropdown(choices=list(LANGUAGE_CODES), value="English", label="From")
|
| 106 |
+
tgt_lang = gr.Dropdown(choices=list(LANGUAGE_CODES), value="Korean", label="To")
|
| 107 |
+
text_input = gr.Textbox(lines=3, label="Input")
|
| 108 |
+
text_output = gr.Textbox(lines=3, label="Output")
|
| 109 |
+
translate_btn = gr.Button("Translate")
|
| 110 |
+
translate_btn.click(cached_translate, [text_input, src_lang, tgt_lang], text_output)
|
| 111 |
+
with gr.Tab("Summarizer"):
|
| 112 |
+
summary_input = gr.Textbox(lines=5, label="Input")
|
| 113 |
+
summary_len = gr.Slider(50, 300, 128, label="Max Length")
|
| 114 |
+
summary_output = gr.Textbox(lines=5, label="Summary")
|
| 115 |
+
summary_btn = gr.Button("Summarize")
|
| 116 |
+
summary_btn.click(summarize_text, [summary_input, summary_len], summary_output)
|
| 117 |
+
with gr.Tab("Paraphraser"):
|
| 118 |
+
para_input = gr.Textbox(lines=4, label="Text")
|
| 119 |
+
para_output = gr.Textbox(lines=5, label="Paraphrases")
|
| 120 |
+
num_variants = gr.Slider(1, 5, 3, label="Number")
|
| 121 |
+
beams = gr.Slider(1, 10, 5, label="Beams")
|
| 122 |
+
para_btn = gr.Button("Paraphrase")
|
| 123 |
+
para_btn.click(lambda x, y, z: "\\n\\n".join(paraphrase_text(x, y, z)), [para_input, num_variants, beams], para_output)
|
| 124 |
+
with gr.Tab("Grammar Corrector"):
|
| 125 |
+
grammar_input = gr.Textbox(lines=5, label="Text with Errors")
|
| 126 |
+
grammar_output = gr.Textbox(lines=5, label="Corrected")
|
| 127 |
+
grammar_btn = gr.Button("Correct")
|
| 128 |
+
grammar_btn.click(correct_grammar, grammar_input, grammar_output)
|
| 129 |
+
|
| 130 |
+
if __name__ == "__main__":
|
| 131 |
+
demo.launch()
|
| 132 |
"""
|
| 133 |
|
| 134 |
+
# Save to file for user to download and deploy to Hugging Face
|
| 135 |
+
with open("/mnt/data/app_grammar_corrector.py", "w") as f:
|
| 136 |
+
f.write(app_py_with_grammar)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
|
| 138 |
+
"/mnt/data/app_grammar_corrector.py"
|
|
|