Spaces:

Zaherrr
/

Translation_model_demo

Sleeping

App Files Files Community

Translation_model_demo / app.py

Zaherrr

Update app.py

cd90af5 verified over 1 year ago

raw

history blame contribute delete

8.71 kB

	import gradio as gr
	import os
	import pandas as pd
	import numpy as np
	import tensorflow as tf
	from tensorflow.keras.preprocessing.text import Tokenizer
	from tensorflow.keras.preprocessing.sequence import pad_sequences
	import pickle

	import sys
	from tensorflow.keras import preprocessing
	sys.modules['keras.src.preprocessing'] = preprocessing
	from tensorflow import keras
	sys.modules['keras'] = keras

	from huggingface_hub import HfApi

	# Set your Hugging Face API token in the settings of this space as a secret variable

	# Authenticate using HfApi
	# api = HfApi()
	# api.login(token=os.getenv("HUGGINGFACE_HUB_TOKEN"))

	from huggingface_hub import login
	login(token=os.getenv("HUGGINGFACE_HUB_TOKEN"))

	# ---------------------------------------------------------------------------------------------------------------------------------------
	# Loading the translation model and english and french tokenizers

	with open('english_tokenizer.pickle', 'rb') as handle:
	english_tokenizer = pickle.load(handle)

	with open('french_tokenizer.pickle', 'rb') as handle:
	french_tokenizer = pickle.load(handle)

	translation_model = tf.keras.models.load_model('model2_v2.h5')

	# ---------------------------------------------------------------------------------------------------------------------------------------
	# Translate sentence function
	MAX_LEN_EN = 15
	MAX_LEN_FR = 21

	VOCAB_SIZE_EN = len(english_tokenizer.word_index)
	VOCAB_SIZE_FR = len(french_tokenizer.word_index)

	# print(f'MAX_LEN_EN: {MAX_LEN_EN}')
	# print(f'MAX_LEN_FR: {MAX_LEN_FR}')
	# print(f'VOCAB_SIZE_EN: {VOCAB_SIZE_EN}')
	# print(f'VOCAB_SIZE_FR: {VOCAB_SIZE_FR}')

	# function implemented earlier, modified it to be used with gradio.
	def translate_sentence(sentence, verbose=False):
	# Preprocess the input sentence
	sequence = english_tokenizer.texts_to_sequences([sentence])
	padded_sequence = pad_sequences(sequence, maxlen=MAX_LEN_EN, padding='post')

	# Initialize the target sequence with the start token
	start_token = VOCAB_SIZE_FR #344
	target_sequence = np.zeros((1, MAX_LEN_FR))
	target_sequence[0, 0] = start_token

	# Placeholder for the translation
	translation = ''

	# Step-by-step translation
	for i in range(1, MAX_LEN_FR):
	# Predict the next word
	output_tokens = translation_model.predict([padded_sequence, target_sequence], verbose=verbose)

	# Get the most likely next word
	sampled_token_index = np.argmax(output_tokens[0, i - 1, :])
	if verbose:
	print(f'sampled_token_index: {sampled_token_index}')
	if sampled_token_index == 0: # End token
	break
	sampled_word = french_tokenizer.index_word[sampled_token_index]
	if verbose:
	print(f'sampled_word: {sampled_word}')
	# Append the word to the translation
	translation += ' ' + sampled_word

	# Update the target sequence
	target_sequence[0, i] = sampled_token_index

	return translation.strip()

	# Example usage:
	# english_sentence = "paris is relaxing during december but it is usually chilly in july"
	# print(english_sentence)
	# translated_sentence = translate_sentence(english_sentence)
	# print(translated_sentence)



	# ----------------------------------------------------------------------------------------------------------------------------------------
	# Gradio app

	from datasets import load_dataset, Dataset

	# Function to load the dataset from Hugging Face
	def load_hf_dataset():
	dataset = load_dataset("Zaherrr/translation_log")
	return dataset["train"] # Access the dataset without split specification

	def update_history_with_status(english, french, history, status):
	history.append((english, french, status))
	history_text = "\n".join([f"{inp} ----> {out} ({status})" for inp, out, status in history])
	return history_text, history

	def revert_last_action(history):
	if history:
	# Revert history
	history.pop()

	# Update history block text
	history_text = "\n".join([f"{inp} ----> {out} ({status})" for inp, out, status in history])

	# Revert last row in the dataset
	if row_indices:
	last_index = row_indices.pop()
	# Remove the last row from the dataset
	dataset = load_hf_dataset()
	df = dataset.to_pandas()
	df = df.drop(last_index).reset_index(drop=True)
	updated_dataset = Dataset.from_pandas(df)
	updated_dataset.push_to_hub("Zaherrr/translation_log")
	return history_text, history

	# Function to flag data
	def flag_action(english, french, corrected_french, flagged_successful, history):
	data = {"english": english, "french": french, "corrected_french": corrected_french, "status": flagged_successful}
	dataset = load_hf_dataset()
	df = dataset.to_pandas()
	new_df = pd.DataFrame([data])
	df = pd.concat([df, new_df], ignore_index=True)
	updated_dataset = Dataset.from_pandas(df)
	updated_dataset.push_to_hub("Zaherrr/translation_log")
	index = len(df) - 1
	row_indices.append(index)
	return update_history_with_status(english, french, history, "Flagged")

	# Function to accept data
	def accept_action(english, french, hidden_text, flagged_successful, history):
	data = {"english": english, "french": french, "corrected_french": hidden_text, "status": flagged_successful}
	dataset = load_hf_dataset()
	df = dataset.to_pandas()
	new_df = pd.DataFrame([data])
	df = pd.concat([df, new_df], ignore_index=True)
	updated_dataset = Dataset.from_pandas(df)
	updated_dataset.push_to_hub("Zaherrr/translation_log")
	index = len(df) - 1
	row_indices.append(index)
	return update_history_with_status(english, french, history, "Accepted")

	# Define the Gradio interface
	with gr.Blocks(theme='gstaff/sketch') as demo:
	gr.Markdown("<center><h1>Translate English to French</h1></center>")
	with gr.Row():
	with gr.Column():
	english = gr.Textbox(label="English", placeholder="Input English text here")
	Translate_button = gr.Button(value="Translate", variant="primary")
	hidden_text = gr.Textbox(label="Hidden Text", placeholder="Hidden Text", interactive=False, visible=False)
	flagged_successful = gr.Textbox(label="Acceptance Status", placeholder="Flagged Successful", interactive=False, visible=False)
	with gr.Column():
	french = gr.Textbox(label="French", placeholder="Predicted French text here", interactive=False)
	corrected_french = gr.Textbox(label="Corrected French", placeholder="Corrected French translation here")
	with gr.Column():
	with gr.Row():
	accept_button = gr.Button(value="Accept", variant="primary")
	flag_button = gr.Button(value="Flag", variant="secondary")
	revert_button = gr.Button(value="Revert", variant="secondary")

	examples = gr.Examples(examples=[
	"paris is relaxing during december but it is usually chilly in july",
	"She is driving the truck"],
	inputs=english)

	gr.Markdown("History:")
	history_block = gr.Textbox(label="History", placeholder="English - French Translation Pairs", interactive=False, lines=5, max_lines=50)
	history = gr.State([])

	# Track the row indices in the CSVLogger
	row_indices = []

	gr.on(
	triggers=[english.submit, Translate_button.click],
	fn=translate_sentence,
	inputs=english,
	outputs=[french],
	).then(
	fn=lambda: gr.Textbox(visible=False),
	inputs=None,
	outputs=flagged_successful,
	)

	gr.on(
	triggers=[flag_button.click],
	fn=lambda: gr.Textbox(value="Flagged", visible=True),
	outputs=flagged_successful,
	).then(
	fn=flag_action,
	inputs=[english, french, corrected_french, flagged_successful, history],
	outputs=[history_block, history],
	)

	gr.on(
	triggers=[accept_button.click],
	fn=lambda: gr.Textbox(value="Accepted", visible=True),
	outputs=flagged_successful,

	).then(
	fn=accept_action,
	inputs=[english, french, hidden_text, flagged_successful, history],
	outputs=[history_block, history],
	)

	gr.on(
	triggers=[revert_button.click],
	fn=revert_last_action,
	inputs=[history],
	outputs=[history_block, history],
	).then(
	fn=lambda: gr.Textbox(placeholder="Reverted", visible=True),
	outputs=flagged_successful,
	)

	demo.launch(share=True, auth=('username', 'password123'), auth_message="Check your <strong>Login details</strong> sent to your <i>email</i>")