Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import os | |
| import pandas as pd | |
| import numpy as np | |
| import tensorflow as tf | |
| from tensorflow.keras.preprocessing.text import Tokenizer | |
| from tensorflow.keras.preprocessing.sequence import pad_sequences | |
| import pickle | |
| import sys | |
| from tensorflow.keras import preprocessing | |
| sys.modules['keras.src.preprocessing'] = preprocessing | |
| from tensorflow import keras | |
| sys.modules['keras'] = keras | |
| from huggingface_hub import HfApi | |
| # Set your Hugging Face API token in the settings of this space as a secret variable | |
| # Authenticate using HfApi | |
| # api = HfApi() | |
| # api.login(token=os.getenv("HUGGINGFACE_HUB_TOKEN")) | |
| from huggingface_hub import login | |
| login(token=os.getenv("HUGGINGFACE_HUB_TOKEN")) | |
| # --------------------------------------------------------------------------------------------------------------------------------------- | |
| # Loading the translation model and english and french tokenizers | |
| with open('english_tokenizer.pickle', 'rb') as handle: | |
| english_tokenizer = pickle.load(handle) | |
| with open('french_tokenizer.pickle', 'rb') as handle: | |
| french_tokenizer = pickle.load(handle) | |
| translation_model = tf.keras.models.load_model('model2_v2.h5') | |
| # --------------------------------------------------------------------------------------------------------------------------------------- | |
| # Translate sentence function | |
| MAX_LEN_EN = 15 | |
| MAX_LEN_FR = 21 | |
| VOCAB_SIZE_EN = len(english_tokenizer.word_index) | |
| VOCAB_SIZE_FR = len(french_tokenizer.word_index) | |
| # print(f'MAX_LEN_EN: {MAX_LEN_EN}') | |
| # print(f'MAX_LEN_FR: {MAX_LEN_FR}') | |
| # print(f'VOCAB_SIZE_EN: {VOCAB_SIZE_EN}') | |
| # print(f'VOCAB_SIZE_FR: {VOCAB_SIZE_FR}') | |
| # function implemented earlier, modified it to be used with gradio. | |
| def translate_sentence(sentence, verbose=False): | |
| # Preprocess the input sentence | |
| sequence = english_tokenizer.texts_to_sequences([sentence]) | |
| padded_sequence = pad_sequences(sequence, maxlen=MAX_LEN_EN, padding='post') | |
| # Initialize the target sequence with the start token | |
| start_token = VOCAB_SIZE_FR #344 | |
| target_sequence = np.zeros((1, MAX_LEN_FR)) | |
| target_sequence[0, 0] = start_token | |
| # Placeholder for the translation | |
| translation = '' | |
| # Step-by-step translation | |
| for i in range(1, MAX_LEN_FR): | |
| # Predict the next word | |
| output_tokens = translation_model.predict([padded_sequence, target_sequence], verbose=verbose) | |
| # Get the most likely next word | |
| sampled_token_index = np.argmax(output_tokens[0, i - 1, :]) | |
| if verbose: | |
| print(f'sampled_token_index: {sampled_token_index}') | |
| if sampled_token_index == 0: # End token | |
| break | |
| sampled_word = french_tokenizer.index_word[sampled_token_index] | |
| if verbose: | |
| print(f'sampled_word: {sampled_word}') | |
| # Append the word to the translation | |
| translation += ' ' + sampled_word | |
| # Update the target sequence | |
| target_sequence[0, i] = sampled_token_index | |
| return translation.strip() | |
| # Example usage: | |
| # english_sentence = "paris is relaxing during december but it is usually chilly in july" | |
| # print(english_sentence) | |
| # translated_sentence = translate_sentence(english_sentence) | |
| # print(translated_sentence) | |
| # ---------------------------------------------------------------------------------------------------------------------------------------- | |
| # Gradio app | |
| from datasets import load_dataset, Dataset | |
| # Function to load the dataset from Hugging Face | |
| def load_hf_dataset(): | |
| dataset = load_dataset("Zaherrr/translation_log") | |
| return dataset["train"] # Access the dataset without split specification | |
| def update_history_with_status(english, french, history, status): | |
| history.append((english, french, status)) | |
| history_text = "\n".join([f"{inp} ----> {out} ({status})" for inp, out, status in history]) | |
| return history_text, history | |
| def revert_last_action(history): | |
| if history: | |
| # Revert history | |
| history.pop() | |
| # Update history block text | |
| history_text = "\n".join([f"{inp} ----> {out} ({status})" for inp, out, status in history]) | |
| # Revert last row in the dataset | |
| if row_indices: | |
| last_index = row_indices.pop() | |
| # Remove the last row from the dataset | |
| dataset = load_hf_dataset() | |
| df = dataset.to_pandas() | |
| df = df.drop(last_index).reset_index(drop=True) | |
| updated_dataset = Dataset.from_pandas(df) | |
| updated_dataset.push_to_hub("Zaherrr/translation_log") | |
| return history_text, history | |
| # Function to flag data | |
| def flag_action(english, french, corrected_french, flagged_successful, history): | |
| data = {"english": english, "french": french, "corrected_french": corrected_french, "status": flagged_successful} | |
| dataset = load_hf_dataset() | |
| df = dataset.to_pandas() | |
| new_df = pd.DataFrame([data]) | |
| df = pd.concat([df, new_df], ignore_index=True) | |
| updated_dataset = Dataset.from_pandas(df) | |
| updated_dataset.push_to_hub("Zaherrr/translation_log") | |
| index = len(df) - 1 | |
| row_indices.append(index) | |
| return update_history_with_status(english, french, history, "Flagged") | |
| # Function to accept data | |
| def accept_action(english, french, hidden_text, flagged_successful, history): | |
| data = {"english": english, "french": french, "corrected_french": hidden_text, "status": flagged_successful} | |
| dataset = load_hf_dataset() | |
| df = dataset.to_pandas() | |
| new_df = pd.DataFrame([data]) | |
| df = pd.concat([df, new_df], ignore_index=True) | |
| updated_dataset = Dataset.from_pandas(df) | |
| updated_dataset.push_to_hub("Zaherrr/translation_log") | |
| index = len(df) - 1 | |
| row_indices.append(index) | |
| return update_history_with_status(english, french, history, "Accepted") | |
| # Define the Gradio interface | |
| with gr.Blocks(theme='gstaff/sketch') as demo: | |
| gr.Markdown("<center><h1>Translate English to French</h1></center>") | |
| with gr.Row(): | |
| with gr.Column(): | |
| english = gr.Textbox(label="English", placeholder="Input English text here") | |
| Translate_button = gr.Button(value="Translate", variant="primary") | |
| hidden_text = gr.Textbox(label="Hidden Text", placeholder="Hidden Text", interactive=False, visible=False) | |
| flagged_successful = gr.Textbox(label="Acceptance Status", placeholder="Flagged Successful", interactive=False, visible=False) | |
| with gr.Column(): | |
| french = gr.Textbox(label="French", placeholder="Predicted French text here", interactive=False) | |
| corrected_french = gr.Textbox(label="Corrected French", placeholder="Corrected French translation here") | |
| with gr.Column(): | |
| with gr.Row(): | |
| accept_button = gr.Button(value="Accept", variant="primary") | |
| flag_button = gr.Button(value="Flag", variant="secondary") | |
| revert_button = gr.Button(value="Revert", variant="secondary") | |
| examples = gr.Examples(examples=[ | |
| "paris is relaxing during december but it is usually chilly in july", | |
| "She is driving the truck"], | |
| inputs=english) | |
| gr.Markdown("History:") | |
| history_block = gr.Textbox(label="History", placeholder="English - French Translation Pairs", interactive=False, lines=5, max_lines=50) | |
| history = gr.State([]) | |
| # Track the row indices in the CSVLogger | |
| row_indices = [] | |
| gr.on( | |
| triggers=[english.submit, Translate_button.click], | |
| fn=translate_sentence, | |
| inputs=english, | |
| outputs=[french], | |
| ).then( | |
| fn=lambda: gr.Textbox(visible=False), | |
| inputs=None, | |
| outputs=flagged_successful, | |
| ) | |
| gr.on( | |
| triggers=[flag_button.click], | |
| fn=lambda: gr.Textbox(value="Flagged", visible=True), | |
| outputs=flagged_successful, | |
| ).then( | |
| fn=flag_action, | |
| inputs=[english, french, corrected_french, flagged_successful, history], | |
| outputs=[history_block, history], | |
| ) | |
| gr.on( | |
| triggers=[accept_button.click], | |
| fn=lambda: gr.Textbox(value="Accepted", visible=True), | |
| outputs=flagged_successful, | |
| ).then( | |
| fn=accept_action, | |
| inputs=[english, french, hidden_text, flagged_successful, history], | |
| outputs=[history_block, history], | |
| ) | |
| gr.on( | |
| triggers=[revert_button.click], | |
| fn=revert_last_action, | |
| inputs=[history], | |
| outputs=[history_block, history], | |
| ).then( | |
| fn=lambda: gr.Textbox(placeholder="Reverted", visible=True), | |
| outputs=flagged_successful, | |
| ) | |
| demo.launch(share=True, auth=('username', 'password123'), auth_message="Check your <strong>Login details</strong> sent to your <i>email</i>") |