Spaces:

Innovex
/

text_gen_class

Sleeping

File size: 5,091 Bytes

dfe58cd

from flask import Flask, render_template, request, url_for
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
import pickle
import json
import re

app = Flask(__name__, static_folder='static')

cls_biLSTM = load_model("Classification/biLSTM_model.h5")
cls_LSTM = load_model("Classification/LSTM_model.h5")
cls_GRU = load_model("Classification/GRU_model.h5")

gen_biLSTM = load_model("Generation/bilstm_model.h5")
gen_LSTM = load_model("Generation/lstm_model.h5")
gen_GRU = load_model("Generation/gru_model.h5")


# Post-process Texts
def postprocess_text(text):
    # Remove leading and trailing whitespace, consecutive spaces, and ensure a space after punctuation marks
    text = re.sub(r"\s+", " ", text.strip())
    text = re.sub(r"(\w)([.!?])(\w)", r"\1\2 \3", text)

    # Capitalize the first letter of the sentence
    text = text[0].upper() + text[1:]

    # # Add a period at the end if missing
    # if not text.endswith("."):
    #     text += "."

    return text



@app.route('/')
def index():
    return render_template('index.html')


@app.route('/classifier')
def classifier():
    return render_template('classifier.html')


@app.route('/classification', methods=['GET', 'POST'])
def classification():
    if request.method == 'POST':
        sentence = request.form['sentence']
        
        with open('Classification/data.json', 'r') as file:
            data = json.load(file)

        max_length = data['max_length']
        padding_type = data['padding_type']
        trunc_type = data['trunc_type']
        threshold = data['threshold']
        tokenizer = pickle.load(open('Classification/tokenizer.pkl', 'rb'))

        sequences = tokenizer.texts_to_sequences([sentence])
        padded = pad_sequences(sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)
        biLSTM_pred = cls_biLSTM.predict(padded)
        LSTM_pred = cls_LSTM.predict(padded)
        GRU_pred = cls_GRU.predict(padded)
        
        biLSTM_label = "Positive" if biLSTM_pred > threshold else "Negative"
        LSTM_label = "Positive" if LSTM_pred > threshold else "Negative"
        GRU_label = "Positive" if GRU_pred > threshold else "Negative"
        
        biLSTM_pred = "{:.9f}".format(biLSTM_pred[0][0])
        LSTM_pred = "{:.9f}".format(LSTM_pred[0][0])
        GRU_pred = "{:.9f}".format(GRU_pred[0][0])


        return render_template('classification.html', sentence=sentence,
                               biLSTM_pred=biLSTM_pred, biLSTM_label=biLSTM_label,
                               LSTM_pred=LSTM_pred, LSTM_label=LSTM_label,
                               GRU_pred=GRU_pred, GRU_label=GRU_label)

    return render_template('classification.html')



@app.route("/generation", methods=['GET', 'POST'])
def generation():
    if request.method == 'POST':
        sentence = postprocess_text(request.form['sentence'])
        next_words = int(request.form['valueradio'])

        # Generate text using LSTM
        LSTM_Pred = generate_text(sentence, next_words, "lstm")
    
        # Generate text using GRU
        GRU_Pred = generate_text(sentence, next_words, "gru")
    
        # Generate text using BiLSTM
        BILSTM_Pred = generate_text(sentence, next_words, "bilstm")
    
        return render_template("generation.html", sentence=sentence, next_words=next_words, LSTM_Pred=LSTM_Pred + ".", GRU_Pred=GRU_Pred + ".", BILSTM_Pred=BILSTM_Pred +".", valueradio=next_words)
    else:
        return render_template("generation.html")




def generate_text(sentence, next_words, model_name):
    models = {
        "lstm": gen_LSTM,
        "gru": gen_GRU,
        "bilstm": gen_biLSTM
    }

    model = models[model_name]
    with open('Classification/Reviews.json', 'r') as f:
        data = json.load(f)

    reviews = [item['Reviews'] for item in data]

    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(reviews)
    total_words = len(tokenizer.word_index) + 1

    input_sequences = []
    for line in reviews:
        token_list = tokenizer.texts_to_sequences([line])[0]
        for i in range(1, len(token_list)):
            n_gram_sequence = token_list[:i+1]
            input_sequences.append(n_gram_sequence)

    max_sequence_len = max([len(x) for x in input_sequences])
    input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))

    generated_text = sentence

    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([generated_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
        predicted = np.argmax(model.predict(token_list), axis=1)

        output_word = " "
        for word, index in tokenizer.word_index.items():
            if index == predicted:
                output_word = word
                break

        generated_text += " " + output_word

    return generated_text

if __name__ == '__main__':
    app.run(debug=True, port=8000)