Spaces:

Innovex
/

text_gen_class

Sleeping

App Files Files Community

text_gen_class / app.py

Scezui

Refactor code for improved performance and readability

dfe58cd about 2 years ago

raw

history blame contribute delete

5.09 kB

	from flask import Flask, render_template, request, url_for
	import numpy as np
	from tensorflow.keras.models import load_model
	from tensorflow.keras.preprocessing.sequence import pad_sequences
	from tensorflow.keras.preprocessing.text import Tokenizer
	import pickle
	import json
	import re

	app = Flask(__name__, static_folder='static')

	cls_biLSTM = load_model("Classification/biLSTM_model.h5")
	cls_LSTM = load_model("Classification/LSTM_model.h5")
	cls_GRU = load_model("Classification/GRU_model.h5")

	gen_biLSTM = load_model("Generation/bilstm_model.h5")
	gen_LSTM = load_model("Generation/lstm_model.h5")
	gen_GRU = load_model("Generation/gru_model.h5")


	# Post-process Texts
	def postprocess_text(text):
	# Remove leading and trailing whitespace, consecutive spaces, and ensure a space after punctuation marks
	text = re.sub(r"\s+", " ", text.strip())
	text = re.sub(r"(\w)([.!?])(\w)", r"\1\2 \3", text)

	# Capitalize the first letter of the sentence
	text = text[0].upper() + text[1:]

	# # Add a period at the end if missing
	# if not text.endswith("."):
	# text += "."

	return text



	@app.route('/')
	def index():
	return render_template('index.html')


	@app.route('/classifier')
	def classifier():
	return render_template('classifier.html')


	@app.route('/classification', methods=['GET', 'POST'])
	def classification():
	if request.method == 'POST':
	sentence = request.form['sentence']

	with open('Classification/data.json', 'r') as file:
	data = json.load(file)

	max_length = data['max_length']
	padding_type = data['padding_type']
	trunc_type = data['trunc_type']
	threshold = data['threshold']
	tokenizer = pickle.load(open('Classification/tokenizer.pkl', 'rb'))

	sequences = tokenizer.texts_to_sequences([sentence])
	padded = pad_sequences(sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)
	biLSTM_pred = cls_biLSTM.predict(padded)
	LSTM_pred = cls_LSTM.predict(padded)
	GRU_pred = cls_GRU.predict(padded)

	biLSTM_label = "Positive" if biLSTM_pred > threshold else "Negative"
	LSTM_label = "Positive" if LSTM_pred > threshold else "Negative"
	GRU_label = "Positive" if GRU_pred > threshold else "Negative"

	biLSTM_pred = "{:.9f}".format(biLSTM_pred[0][0])
	LSTM_pred = "{:.9f}".format(LSTM_pred[0][0])
	GRU_pred = "{:.9f}".format(GRU_pred[0][0])


	return render_template('classification.html', sentence=sentence,
	biLSTM_pred=biLSTM_pred, biLSTM_label=biLSTM_label,
	LSTM_pred=LSTM_pred, LSTM_label=LSTM_label,
	GRU_pred=GRU_pred, GRU_label=GRU_label)

	return render_template('classification.html')



	@app.route("/generation", methods=['GET', 'POST'])
	def generation():
	if request.method == 'POST':
	sentence = postprocess_text(request.form['sentence'])
	next_words = int(request.form['valueradio'])

	# Generate text using LSTM
	LSTM_Pred = generate_text(sentence, next_words, "lstm")

	# Generate text using GRU
	GRU_Pred = generate_text(sentence, next_words, "gru")

	# Generate text using BiLSTM
	BILSTM_Pred = generate_text(sentence, next_words, "bilstm")

	return render_template("generation.html", sentence=sentence, next_words=next_words, LSTM_Pred=LSTM_Pred + ".", GRU_Pred=GRU_Pred + ".", BILSTM_Pred=BILSTM_Pred +".", valueradio=next_words)
	else:
	return render_template("generation.html")




	def generate_text(sentence, next_words, model_name):
	models = {
	"lstm": gen_LSTM,
	"gru": gen_GRU,
	"bilstm": gen_biLSTM
	}

	model = models[model_name]
	with open('Classification/Reviews.json', 'r') as f:
	data = json.load(f)

	reviews = [item['Reviews'] for item in data]

	tokenizer = Tokenizer()
	tokenizer.fit_on_texts(reviews)
	total_words = len(tokenizer.word_index) + 1

	input_sequences = []
	for line in reviews:
	token_list = tokenizer.texts_to_sequences([line])[0]
	for i in range(1, len(token_list)):
	n_gram_sequence = token_list[:i+1]
	input_sequences.append(n_gram_sequence)

	max_sequence_len = max([len(x) for x in input_sequences])
	input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))

	generated_text = sentence

	for _ in range(next_words):
	token_list = tokenizer.texts_to_sequences([generated_text])[0]
	token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
	predicted = np.argmax(model.predict(token_list), axis=1)

	output_word = " "
	for word, index in tokenizer.word_index.items():
	if index == predicted:
	output_word = word
	break

	generated_text += " " + output_word

	return generated_text

	if __name__ == '__main__':
	app.run(debug=True, port=8000)