Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import ctranslate2
|
| 3 |
+
from transformers import MarianTokenizer
|
| 4 |
+
|
| 5 |
+
# Load tokenizer and model
|
| 6 |
+
tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-tr-ar") # Use the original tokenizer path
|
| 7 |
+
translator = ctranslate2.Translator("ctranslate")
|
| 8 |
+
|
| 9 |
+
st.title("Helsinki-NLP/opus-mt-finetune-tr-ota")
|
| 10 |
+
|
| 11 |
+
input_text = st.text_area("Enter text to translate:")
|
| 12 |
+
|
| 13 |
+
if st.button("Translate"):
|
| 14 |
+
# Tokenize input
|
| 15 |
+
tokens = tokenizer.convert_ids_to_tokens(tokenizer.encode(input_text, return_tensors="pt")[0])
|
| 16 |
+
# Translate
|
| 17 |
+
results = translator.translate_batch([tokens], beam_size=5)
|
| 18 |
+
# Detokenize output
|
| 19 |
+
output_tokens = results[0].hypotheses[0]
|
| 20 |
+
translation = tokenizer.decode(tokenizer.convert_tokens_to_ids(output_tokens), skip_special_tokens=True)
|
| 21 |
+
st.success(translation)
|