space_23

Sleeping

App Files Files Community

Frenchizer commited on Jan 26, 2025

Commit

65ed74c

verified ·

1 Parent(s): 8d5320c

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -22

app.py CHANGED Viewed

@@ -1,37 +1,50 @@
 import numpy as np
 import onnxruntime as ort
-import torch
-from transformers import MarianMTModel, MarianTokenizer
 import gradio as gr
-# Load the MarianMT model and tokenizer from the local folder
-model_path = "./model.onnx"  # Path to the folder containing the model files
-tokenizer = MarianTokenizer.from_pretrained(model_name)
-decoder_model = MarianMTModel.from_pretrained(model_name).get_decoder()
-# Load the ONNX encoder
-encoder_session = ort.InferenceSession("./onnx_model/encoder.onnx")
-def translate_text(input_text):
-    # Tokenize input text
     tokenized_input = tokenizer(
-        input_text, return_tensors="pt", padding=True, truncation=True, max_length=512
     )
     input_ids = tokenized_input["input_ids"]
     attention_mask = tokenized_input["attention_mask"]
-    # Generate translation using the model
-    with torch.no_grad():
-        outputs = model.generate(
-            input_ids=input_ids,
-            attention_mask=attention_mask,
-            max_length=512,  # Maximum length of the output
-            num_beams=5,  # Use beam search for better translations
-            early_stopping=True,  # Stop generation when the model predicts the end-of-sequence token
-        )
     # Decode the output tokens
-    translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    return translated_text
 interface.launch()

 import numpy as np
 import onnxruntime as ort
+from transformers import MarianTokenizer
 import gradio as gr
+# Load the tokenizer from the local folder
+model_path = "./onnx_model"  # Path to the folder containing the model files
+tokenizer = MarianTokenizer.from_pretrained(model_path)
+# Load the ONNX model
+onnx_model_path = "./model.onnx"
+session = ort.InferenceSession(onnx_model_path)
+def translate_text(input_texts):
+    # Tokenize input texts (batch processing)
     tokenized_input = tokenizer(
+        input_texts, return_tensors="np", padding=True, truncation=True, max_length=512
     )
     input_ids = tokenized_input["input_ids"]
     attention_mask = tokenized_input["attention_mask"]
+    decoder_start_token_id = translation_tokenizer.cls_token_id or translation_tokenizer.pad_token_id
+    decoder_input_ids = np.array([[decoder_start_token_id]], dtype=np.int64)
+    # Prepare inputs for ONNX model
+    ort_inputs = {
+        "input_ids": input_ids.astype(np.int64),
+        "attention_mask": attention_mask.astype(np.int64),
+        "decoder_input_ids": decoder_input_ids,
+    }
+    # Run inference using the ONNX model
+    ort_outputs = session.run(None, ort_inputs)
+    output_ids = ort_outputs[0]  # Get the output token IDs
     # Decode the output tokens
+    translated_texts = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
+    return translated_texts
+# Gradio interface
+interface = gr.Interface(
+    fn=translate_text,
+    inputs=gr.inputs.Textbox(lines=2, placeholder="Enter text to translate..."),
+    outputs="text",
+    title="MarianMT Translation",
+    description="Translate text using MarianMT model with ONNX runtime.",
+)
+# Launch the interface
 interface.launch()