space_23

Sleeping

App Files Files Community

Frenchizer commited on Jan 26, 2025

Commit

0760540

verified ·

1 Parent(s): 80505e2

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -43

app.py CHANGED Viewed

@@ -4,58 +4,65 @@ from transformers import MarianTokenizer
 import gradio as gr
 # Load the tokenizer from the local folder
-model_path = "./onnx_model"  # Path to the folder containing the model files
 tokenizer = MarianTokenizer.from_pretrained(model_path)
 # Load the ONNX model
 onnx_model_path = "./model.onnx"
 session = ort.InferenceSession(onnx_model_path)
-def translate_text(input_texts):
-    # Tokenize input texts (batch processing)
-    tokenized_input = tokenizer(
-        input_texts, return_tensors="np", padding=True, truncation=True, max_length=512
-    )
-    input_ids = tokenized_input["input_ids"]
-    attention_mask = tokenized_input["attention_mask"]
-    # Define the decoder start token ID
-    decoder_start_token_id = tokenizer.pad_token_id  # Use pad_token_id as the decoder start token
-    decoder_input_ids = np.array([[decoder_start_token_id]], dtype=np.int64)
-    # Prepare inputs for ONNX model
-    ort_inputs = {
-        "input_ids": input_ids.astype(np.int64),
-        "attention_mask": attention_mask.astype(np.int64),
-        "decoder_input_ids": decoder_input_ids,
-    }
-    # Run inference using the ONNX model
-    ort_outputs = session.run(None, ort_inputs)
-    output_ids = ort_outputs[0]  # Get the output token IDs
-    # Debug: Inspect the structure of output_ids
-    print("Output IDs shape:", output_ids.shape)
-    print("Output IDs:", output_ids)
-    # Ensure output_ids is in the correct format (2D array)
-    if isinstance(output_ids, list):
-        output_ids = np.array(output_ids)  # Convert list to numpy array if necessary
-    if output_ids.ndim > 2:
-        output_ids = output_ids.squeeze(0)  # Remove extra dimensions if present
-    # Decode the output tokens
-    translated_texts = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
-    return translated_texts
 # Gradio interface
 interface = gr.Interface(
-	    fn=translate_text,
-    inputs="text",
-    outputs="text",
-    title="Frenchizer Translation Model",
-    description="Translate text with MarianMT ONNX model and encoding by batches."
 )
-# Launch the interface
 interface.launch()

 import gradio as gr
 # Load the tokenizer from the local folder
+model_path = "./onnx_model"  # Path to the folder containing the tokenizer files
 tokenizer = MarianTokenizer.from_pretrained(model_path)
 # Load the ONNX model
 onnx_model_path = "./model.onnx"
 session = ort.InferenceSession(onnx_model_path)
+def translate_text(input_texts, max_length=512):
+    # Tokenize the input texts
+    inputs = tokenizer(input_texts, return_tensors="np", padding=True, truncation=True, max_length=max_length)
+    input_ids = inputs["input_ids"].astype(np.int64)
+    attention_mask = inputs["attention_mask"].astype(np.int64)
+    # Initialize variables for decoding
+    batch_size = input_ids.shape[0]
+    decoder_input_ids = np.array([[tokenizer.pad_token_id]] * batch_size, dtype=np.int64)  # Start with pad token
+    # Generate output tokens iteratively
+    for _ in range(max_length):
+        # Run the ONNX model
+        ort_outputs = session.run(
+            None,
+            {
+                "input_ids": input_ids,
+                "attention_mask": attention_mask,
+                "decoder_input_ids": decoder_input_ids,
+            },
+        )
+        # Get the next token logits (output of the ONNX model)
+        next_token_logits = ort_outputs[0][:, -1, :]  # Shape: (batch_size, vocab_size)
+        # Greedy decoding: select the token with the highest probability
+        next_tokens = np.argmax(next_token_logits, axis=-1)  # Shape: (batch_size,)
+        # Append the next tokens to the decoder input for the next iteration
+        decoder_input_ids = np.concatenate([decoder_input_ids, next_tokens[:, None]], axis=-1)
+        # Stop if all sequences have reached the EOS token
+        if all(tokenizer.eos_token_id in sequence for sequence in decoder_input_ids):
+            break
+    # Decode the output tokens to text
+    translations = tokenizer.batch_decode(decoder_input_ids, skip_special_tokens=True)
+    return translations
 # Gradio interface
+def gradio_translate(input_texts):
+    translations = translate_text(input_texts)
+    return translations
+# Create the Gradio interface
 interface = gr.Interface(
+    fn=gradio_translate,
+    inputs=gr.Textbox(lines=2, placeholder="Enter text to translate...", label="Input Text"),
+    outputs=gr.Textbox(label="Translated Text"),
+    title="ONNX English to French Translation",
+    description="Translate English text to French using a MarianMT ONNX model.",
 )
+# Launch the Gradio app
 interface.launch()