Darsala's picture
Update app.py
d8b7238 verified
import gradio as gr
from transformers import EncoderDecoderModel
from encoder_decoder_tokenizer import EncoderDecoderTokenizer
import torch
import re
from huggingface_hub import snapshot_download
# Download the repo to a local folder
path_to_downloaded = snapshot_download(
repo_id="Darsala/Georgian-Translation",
local_dir="./Georgian-Translation",
local_dir_use_symlinks=False
)
# Load the model and tokenizer from the downloaded folder
model = EncoderDecoderModel.from_pretrained(path_to_downloaded)
tokenizer = EncoderDecoderTokenizer.from_pretrained(path_to_downloaded)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def translate(
text: str,
num_beams: int = 5,
max_length: int = 256,
) -> str:
"""
Translate a single string with the given EncoderDecoderModel.
"""
text = text.lower()
text = re.sub(r'\s+', ' ', text)
# tokenize & move to device
inputs = tokenizer(
text,
return_tensors="pt",
truncation=True,
padding="longest"
).to(device)
# generation
generated_ids = model.generate(
input_ids=inputs.input_ids,
attention_mask=inputs.attention_mask,
num_beams=num_beams,
max_length=max_length,
early_stopping=True,
)
output = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
print(f"English: {text}")
print(f"Translated: {output}")
# decode the first (and only) sequence
return output
demo = gr.Interface(
fn=translate,
inputs=[
gr.components.Textbox(label="Text"),
],
outputs=["text"],
examples=[["Hello, what's your name?"]],
cache_examples=False,
title="Translation Demo",
description="This demo is a Georgian-Translation model"
)
demo.launch()