Spaces:

Zaherrr
/

Translation_model_demo

Sleeping

App Files Files Community

Zaherrr commited on Jul 23, 2024

Commit

a7b55ec

verified ·

1 Parent(s): f9cb70e

Upload 4 files

Browse files

Files changed (4) hide show

app.py +218 -0
english_tokenizer.pickle +3 -0
french_tokenizer.pickle +3 -0
model2_v2.h5 +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,218 @@

+import gradio as gr
+import os
+import pandas as pd
+import numpy as np
+import tensorflow as tf
+from tensorflow.keras.preprocessing.text import Tokenizer
+from tensorflow.keras.preprocessing.sequence import pad_sequences
+import pickle
+import sys
+from tensorflow.keras import preprocessing
+sys.modules['keras.src.preprocessing'] = preprocessing
+from tensorflow import keras
+sys.modules['keras'] = keras
+# ---------------------------------------------------------------------------------------------------------------------------------------
+# Loading the translation model and english and french tokenizers
+with open('english_tokenizer.pickle', 'rb') as handle:
+    english_tokenizer = pickle.load(handle)
+with open('french_tokenizer.pickle', 'rb') as handle:
+    french_tokenizer = pickle.load(handle)
+translation_model = tf.keras.models.load_model('model2_v2.h5')
+# ---------------------------------------------------------------------------------------------------------------------------------------
+# Translate sentence function
+MAX_LEN_EN = 15
+MAX_LEN_FR = 21
+VOCAB_SIZE_EN = len(english_tokenizer.word_index)
+VOCAB_SIZE_FR = len(french_tokenizer.word_index)
+# print(f'MAX_LEN_EN: {MAX_LEN_EN}')
+# print(f'MAX_LEN_FR: {MAX_LEN_FR}')
+# print(f'VOCAB_SIZE_EN: {VOCAB_SIZE_EN}')
+# print(f'VOCAB_SIZE_FR: {VOCAB_SIZE_FR}')
+# function implemented earlier, modified it to be used with gradio.
+def translate_sentence(sentence, verbose=False):
+    # Preprocess the input sentence
+    sequence = english_tokenizer.texts_to_sequences([sentence])
+    padded_sequence = pad_sequences(sequence, maxlen=MAX_LEN_EN, padding='post')
+    # Initialize the target sequence with the start token
+    start_token = VOCAB_SIZE_FR  #344
+    target_sequence = np.zeros((1, MAX_LEN_FR))
+    target_sequence[0, 0] = start_token
+    # Placeholder for the translation
+    translation = ''
+    # Step-by-step translation
+    for i in range(1, MAX_LEN_FR):
+        # Predict the next word
+        output_tokens = translation_model.predict([padded_sequence, target_sequence], verbose=verbose)
+        # Get the most likely next word
+        sampled_token_index = np.argmax(output_tokens[0, i - 1, :])
+        if verbose:
+          print(f'sampled_token_index: {sampled_token_index}')
+        if sampled_token_index == 0:  # End token
+            break
+        sampled_word = french_tokenizer.index_word[sampled_token_index]
+        if verbose:
+          print(f'sampled_word: {sampled_word}')
+        # Append the word to the translation
+        translation += ' ' + sampled_word
+        # Update the target sequence
+        target_sequence[0, i] = sampled_token_index
+    return translation.strip()
+# Example usage:
+# english_sentence = "paris is relaxing during december but it is usually chilly in july"
+# print(english_sentence)
+# translated_sentence = translate_sentence(english_sentence)
+# print(translated_sentence)
+# ----------------------------------------------------------------------------------------------------------------------------------------
+# Gradio app
+# Function to update the history block with status
+def update_history_with_status(english, french, history, status):
+    history.append((english, french, status))
+    history_text = "\n".join([f"{inp} ----> {out} ({status})" for inp, out, status in history])
+    return history_text, history
+def revert_last_action(history):
+    if history:
+        # Revert history
+        history.pop()
+        # Update history block text
+        history_text = "\n".join([f"{inp} ----> {out} ({status})" for inp, out, status in history])
+        # Revert last row in the CSV file
+        if row_indices:
+            last_index = row_indices.pop()
+            # Remove the last row from the CSV file
+            csv_file = "flagged_translations/log.csv"
+            if os.path.exists(csv_file):
+                df = pd.read_csv(csv_file)
+                # print('read the csv file')
+                df = df.drop(last_index-1).reset_index(drop=True)
+                # print('removed the last index')
+                df.to_csv(csv_file, index=False)
+                # print('dumped the df to csv')
+    return history_text, history
+# CSV Logger for flagging
+flagging_callback = gr.CSVLogger()  # logs the flagged data into a csv file
+# Define the Gradio interface
+with gr.Blocks(theme='gstaff/sketch') as demo:
+    gr.Markdown("<center><h1>Translate English to French</h1></center>")
+    with gr.Row():
+        with gr.Column():
+            english = gr.Textbox(label="English", placeholder="Input English text here")
+            Translate_button = gr.Button(value="Translate", variant="primary")
+            hidden_text = gr.Textbox(label="Hidden Text", placeholder="Hidden Text", interactive=False, visible=False)
+            flagged_successful = gr.Textbox(label="Acceptance Status", placeholder="Flagged Successful", interactive=False, visible=False)
+        with gr.Column():
+            french = gr.Textbox(label="French", placeholder="Predicted French text here", interactive=False)
+            corrected_french = gr.Textbox(label="Corrected French", placeholder="Corrected French translation here")
+            with gr.Column():
+                with gr.Row():
+                    accept_button = gr.Button(value="Accept", variant="primary")
+                    flag_button = gr.Button(value="Flag", variant="secondary")
+                    revert_button = gr.Button(value="Revert", variant="secondary")
+    # This needs to be called at some point prior to the first call to flagging.callback.flag()
+    # flagging_callback.setup([english, french, corrected_french, "IsFlagged"], "flagged_translations")
+    flagging_callback.setup([english, french, corrected_french, flagged_successful], "flagged_translations")
+    examples = gr.Examples(examples=[
+        "paris is relaxing during december but it is usually chilly in july",
+        "She is driving the truck"],
+        inputs=english)
+    gr.Markdown("History:")
+    history_block = gr.Textbox(label="History", placeholder="English - French Translation Pairs", interactive=False, lines=5, max_lines=50)
+    history = gr.State([])
+    # Track the row indices in the CSVLogger
+    row_indices = []
+    def flag_action(english, french, corrected_french, flagged_successful, history):
+        data = [english, french, corrected_french, flagged_successful]
+        # Add the IsFlagged column with value True
+        flagged_value = flagged_successful if flagged_successful else "Flagged"
+        print(f"Flag Action - flagged_successful: {flagged_value}")
+        print(f"flagged_successful object: {flagged_successful}")
+        index = flagging_callback.flag(data)
+        row_indices.append(index)
+        return update_history_with_status(english, french, history, "Flagged")
+    def accept_action(english, french, hidden_text, flagged_successful, history):
+        data = [english, french, hidden_text, flagged_successful]
+        # Add the IsFlagged column with value False
+        # Extract value from flagged_successful
+        flagged_value = flagged_successful if flagged_successful else "Accepted"
+        print(f"Accept Action - flagged_successful: {flagged_value}")
+        print(f"flagged_successful object: {flagged_successful}")
+        index = flagging_callback.flag(data)
+        row_indices.append(index)
+        return update_history_with_status(english, french, history, "Accepted")
+    gr.on(
+        triggers=[english.submit, Translate_button.click],
+        fn=translate_sentence,
+        inputs=english,
+        outputs=[french],
+    ).then(
+        fn=lambda: gr.Textbox(visible=False),
+        inputs=None,
+        outputs=flagged_successful,
+    )
+    gr.on(
+        triggers=[flag_button.click],
+        fn=lambda: gr.Textbox(value="Flagged", visible=True),
+        outputs=flagged_successful,
+    ).then(
+        fn=flag_action,
+        inputs=[english, french, corrected_french, flagged_successful, history],
+        outputs=[history_block, history],
+    )
+    gr.on(
+        triggers=[accept_button.click],
+        fn=lambda: gr.Textbox(value="Accepted", visible=True),
+        outputs=flagged_successful,
+    ).then(
+        fn=accept_action,
+        inputs=[english, french, hidden_text, flagged_successful, history],
+        outputs=[history_block, history],
+    )
+    gr.on(
+        triggers=[revert_button.click],
+        fn=revert_last_action,
+        inputs=[history],
+        outputs=[history_block, history],
+    ).then(
+        fn=lambda: gr.Textbox(placeholder="Reverted", visible=True),
+        outputs=flagged_successful,
+    )
+    demo.launch(share=True, auth=('username', 'Zaka_module7'),
+                auth_message="Check your <strong>Login details</strong> sent to your <i>email</i>")#, debug=True)

english_tokenizer.pickle ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8a7e198626a7c26d022e8db734e247db2b1792ec483ffadd6f4e9977620624c1
+size 6044

french_tokenizer.pickle ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9773dd49cef95f1a2eb08c37ddaae08fdd0ba13a31b077deb7c5934ffbc2bae7
+size 11453

model2_v2.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e3f9304c423113437cb1a22a5f73ae99db50557f1ac079befd949fade2dfe323
+size 23313472